]>
Commit | Line | Data |
---|---|---|
e90b1cf9 | 1 | /* |
e0edde6f | 2 | * Copyright (c) 2007-2012 Nicira, Inc. |
e90b1cf9 JG |
3 | * Distributed under the terms of the GNU GPL version 2. |
4 | * | |
5 | * Significant portions of this file may be copied from parts of the Linux | |
6 | * kernel, by Linus Torvalds and others. | |
7 | */ | |
8 | ||
dfffaef1 JP |
9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
10 | ||
e90b1cf9 JG |
11 | #include <linux/version.h> |
12 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) | |
13 | ||
14 | #include <linux/if.h> | |
15 | #include <linux/in.h> | |
16 | #include <linux/ip.h> | |
17 | #include <linux/list.h> | |
18 | #include <linux/net.h> | |
2a4999f3 | 19 | #include <net/net_namespace.h> |
e90b1cf9 JG |
20 | |
21 | #include <net/icmp.h> | |
22 | #include <net/inet_frag.h> | |
23 | #include <net/ip.h> | |
24 | #include <net/protocol.h> | |
cd8055cc | 25 | #include <net/udp.h> |
e90b1cf9 | 26 | |
2a4999f3 | 27 | #include "datapath.h" |
e90b1cf9 JG |
28 | #include "tunnel.h" |
29 | #include "vport.h" | |
30 | #include "vport-generic.h" | |
31 | ||
32 | #define CAPWAP_SRC_PORT 58881 | |
33 | #define CAPWAP_DST_PORT 58882 | |
34 | ||
35 | #define CAPWAP_FRAG_TIMEOUT (30 * HZ) | |
36 | #define CAPWAP_FRAG_MAX_MEM (256 * 1024) | |
6455100f | 37 | #define CAPWAP_FRAG_PRUNE_MEM (192 * 1024) |
e90b1cf9 JG |
38 | #define CAPWAP_FRAG_SECRET_INTERVAL (10 * 60 * HZ) |
39 | ||
40 | /* | |
41 | * The CAPWAP header is a mess, with all kinds of odd size bit fields that | |
42 | * cross byte boundaries, which are difficult to represent correctly in | |
43 | * various byte orderings. Luckily we only care about a few permutations, so | |
44 | * statically create them and we can do very fast parsing by checking all 12 | |
45 | * fields in one go. | |
46 | */ | |
40a75177 VG |
47 | #define CAPWAP_PREAMBLE_MASK __cpu_to_be32(0xFF000000) |
48 | #define CAPWAP_HLEN_SHIFT 17 | |
49 | #define CAPWAP_HLEN_MASK __cpu_to_be32(0x00F80000) | |
50 | #define CAPWAP_RID_MASK __cpu_to_be32(0x0007C000) | |
51 | #define CAPWAP_WBID_MASK __cpu_to_be32(0x00003E00) | |
52 | #define CAPWAP_F_MASK __cpu_to_be32(0x000001FF) | |
53 | ||
54 | #define CAPWAP_F_FRAG __cpu_to_be32(0x00000080) | |
55 | #define CAPWAP_F_LASTFRAG __cpu_to_be32(0x00000040) | |
56 | #define CAPWAP_F_WSI __cpu_to_be32(0x00000020) | |
57 | #define CAPWAP_F_RMAC __cpu_to_be32(0x00000010) | |
58 | ||
59 | #define CAPWAP_RMAC_LEN 4 | |
60 | ||
61 | /* Standard CAPWAP looks for a WBID value of 2. | |
62 | * When we insert WSI field, use WBID value of 30, which has been | |
63 | * proposed for all "experimental" usage - users with no reserved WBID value | |
64 | * of their own. | |
65 | */ | |
66 | #define CAPWAP_WBID_30 __cpu_to_be32(0x00003C00) | |
67 | #define CAPWAP_WBID_2 __cpu_to_be32(0x00000200) | |
68 | ||
69 | #define FRAG_HDR (CAPWAP_F_FRAG) | |
70 | #define FRAG_LAST_HDR (FRAG_HDR | CAPWAP_F_LASTFRAG) | |
71 | ||
72 | /* Keyed packet, WBID 30, and length long enough to include WSI key */ | |
73 | #define CAPWAP_KEYED (CAPWAP_WBID_30 | CAPWAP_F_WSI | htonl(20 << CAPWAP_HLEN_SHIFT)) | |
74 | /* A backward-compatible packet, WBID 2 and length of 2 words (no WSI fields) */ | |
75 | #define CAPWAP_NO_WSI (CAPWAP_WBID_2 | htonl(8 << CAPWAP_HLEN_SHIFT)) | |
76 | ||
77 | /* Mask for all parts of header that must be 0. */ | |
78 | #define CAPWAP_ZERO_MASK (CAPWAP_PREAMBLE_MASK | \ | |
79 | (CAPWAP_F_MASK ^ (CAPWAP_F_WSI | CAPWAP_F_FRAG | CAPWAP_F_LASTFRAG | CAPWAP_F_RMAC))) | |
e90b1cf9 JG |
80 | |
81 | struct capwaphdr { | |
82 | __be32 begin; | |
83 | __be16 frag_id; | |
40a75177 | 84 | /* low 3 bits of frag_off are reserved */ |
e90b1cf9 JG |
85 | __be16 frag_off; |
86 | }; | |
87 | ||
40a75177 VG |
88 | /* |
89 | * We use the WSI field to hold additional tunnel data. | |
90 | * The first eight bits store the size of the wsi data in bytes. | |
91 | */ | |
92 | struct capwaphdr_wsi { | |
93 | u8 wsi_len; | |
94 | u8 flags; | |
95 | __be16 reserved_padding; | |
96 | }; | |
97 | ||
98 | struct capwaphdr_wsi_key { | |
99 | __be64 key; | |
100 | }; | |
101 | ||
102 | /* Flag indicating a 64bit key is stored in WSI data field */ | |
103 | #define CAPWAP_WSI_F_KEY64 0x80 | |
104 | ||
6455100f | 105 | static struct capwaphdr *capwap_hdr(const struct sk_buff *skb) |
e90b1cf9 JG |
106 | { |
107 | return (struct capwaphdr *)(udp_hdr(skb) + 1); | |
108 | } | |
109 | ||
110 | /* | |
111 | * The fragment offset is actually the high 13 bits of the last 16 bit field, | |
112 | * so we would normally need to right shift 3 places. However, it stores the | |
113 | * offset in 8 byte chunks, which would involve a 3 place left shift. So we | |
114 | * just mask off the last 3 bits and be done with it. | |
115 | */ | |
116 | #define FRAG_OFF_MASK (~0x7U) | |
117 | ||
40a75177 VG |
118 | /* |
119 | * The minimum header length. The header may be longer if the optional | |
120 | * WSI field is used. | |
121 | */ | |
122 | #define CAPWAP_MIN_HLEN (sizeof(struct udphdr) + sizeof(struct capwaphdr)) | |
e90b1cf9 JG |
123 | |
124 | struct frag_match { | |
125 | __be32 saddr; | |
126 | __be32 daddr; | |
127 | __be16 id; | |
128 | }; | |
129 | ||
130 | struct frag_queue { | |
131 | struct inet_frag_queue ifq; | |
132 | struct frag_match match; | |
133 | }; | |
134 | ||
135 | struct frag_skb_cb { | |
136 | u16 offset; | |
137 | }; | |
138 | #define FRAG_CB(skb) ((struct frag_skb_cb *)(skb)->cb) | |
139 | ||
140 | static struct sk_buff *fragment(struct sk_buff *, const struct vport *, | |
40a75177 | 141 | struct dst_entry *dst, unsigned int hlen); |
e90b1cf9 JG |
142 | static struct sk_buff *defrag(struct sk_buff *, bool frag_last); |
143 | ||
144 | static void capwap_frag_init(struct inet_frag_queue *, void *match); | |
145 | static unsigned int capwap_frag_hash(struct inet_frag_queue *); | |
146 | static int capwap_frag_match(struct inet_frag_queue *, void *match); | |
147 | static void capwap_frag_expire(unsigned long ifq); | |
148 | ||
149 | static struct inet_frags frag_state = { | |
150 | .constructor = capwap_frag_init, | |
151 | .qsize = sizeof(struct frag_queue), | |
152 | .hashfn = capwap_frag_hash, | |
153 | .match = capwap_frag_match, | |
154 | .frag_expire = capwap_frag_expire, | |
155 | .secret_interval = CAPWAP_FRAG_SECRET_INTERVAL, | |
156 | }; | |
e90b1cf9 | 157 | |
c19e6535 | 158 | static int capwap_hdr_len(const struct tnl_mutable_config *mutable) |
e90b1cf9 | 159 | { |
40a75177 VG |
160 | int size = CAPWAP_MIN_HLEN; |
161 | ||
c19e6535 BP |
162 | /* CAPWAP has no checksums. */ |
163 | if (mutable->flags & TNL_F_CSUM) | |
e90b1cf9 JG |
164 | return -EINVAL; |
165 | ||
6455100f | 166 | /* if keys are specified, then add WSI field */ |
40a75177 VG |
167 | if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) { |
168 | size += sizeof(struct capwaphdr_wsi) + | |
169 | sizeof(struct capwaphdr_wsi_key); | |
170 | } | |
e90b1cf9 | 171 | |
40a75177 | 172 | return size; |
e90b1cf9 JG |
173 | } |
174 | ||
842cf6f4 JG |
175 | static void capwap_build_header(const struct vport *vport, |
176 | const struct tnl_mutable_config *mutable, | |
177 | void *header) | |
e90b1cf9 | 178 | { |
842cf6f4 JG |
179 | struct udphdr *udph = header; |
180 | struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1); | |
e90b1cf9 JG |
181 | |
182 | udph->source = htons(CAPWAP_SRC_PORT); | |
183 | udph->dest = htons(CAPWAP_DST_PORT); | |
e90b1cf9 JG |
184 | udph->check = 0; |
185 | ||
e90b1cf9 JG |
186 | cwh->frag_id = 0; |
187 | cwh->frag_off = 0; | |
40a75177 VG |
188 | |
189 | if (mutable->out_key || (mutable->flags & TNL_F_OUT_KEY_ACTION)) { | |
190 | struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1); | |
191 | ||
192 | cwh->begin = CAPWAP_KEYED; | |
193 | ||
194 | /* -1 for wsi_len byte, not included in length as per spec */ | |
195 | wsi->wsi_len = sizeof(struct capwaphdr_wsi) - 1 | |
196 | + sizeof(struct capwaphdr_wsi_key); | |
197 | wsi->flags = CAPWAP_WSI_F_KEY64; | |
198 | wsi->reserved_padding = 0; | |
199 | ||
200 | if (mutable->out_key) { | |
201 | struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1); | |
202 | opt->key = mutable->out_key; | |
203 | } | |
204 | } else { | |
205 | /* make packet readable by old capwap code */ | |
206 | cwh->begin = CAPWAP_NO_WSI; | |
207 | } | |
842cf6f4 JG |
208 | } |
209 | ||
210 | static struct sk_buff *capwap_update_header(const struct vport *vport, | |
211 | const struct tnl_mutable_config *mutable, | |
212 | struct dst_entry *dst, | |
213 | struct sk_buff *skb) | |
214 | { | |
215 | struct udphdr *udph = udp_hdr(skb); | |
e90b1cf9 | 216 | |
40a75177 VG |
217 | if (mutable->flags & TNL_F_OUT_KEY_ACTION) { |
218 | /* first field in WSI is key */ | |
219 | struct capwaphdr *cwh = (struct capwaphdr *)(udph + 1); | |
220 | struct capwaphdr_wsi *wsi = (struct capwaphdr_wsi *)(cwh + 1); | |
221 | struct capwaphdr_wsi_key *opt = (struct capwaphdr_wsi_key *)(wsi + 1); | |
222 | ||
223 | opt->key = OVS_CB(skb)->tun_id; | |
224 | } | |
225 | ||
842cf6f4 JG |
226 | udph->len = htons(skb->len - skb_transport_offset(skb)); |
227 | ||
40a75177 VG |
228 | if (unlikely(skb->len - skb_network_offset(skb) > dst_mtu(dst))) { |
229 | unsigned int hlen = skb_transport_offset(skb) + capwap_hdr_len(mutable); | |
230 | skb = fragment(skb, vport, dst, hlen); | |
231 | } | |
e90b1cf9 JG |
232 | |
233 | return skb; | |
234 | } | |
235 | ||
40a75177 | 236 | static int process_capwap_wsi(struct sk_buff *skb, __be64 *key) |
e90b1cf9 JG |
237 | { |
238 | struct capwaphdr *cwh = capwap_hdr(skb); | |
40a75177 VG |
239 | struct capwaphdr_wsi *wsi; |
240 | int hdr_len; | |
241 | int rmac_len = 0; | |
242 | int wsi_len; | |
e90b1cf9 | 243 | |
40a75177 VG |
244 | if (((cwh->begin & CAPWAP_WBID_MASK) != CAPWAP_WBID_30)) |
245 | return 0; | |
e90b1cf9 | 246 | |
40a75177 VG |
247 | if (cwh->begin & CAPWAP_F_RMAC) |
248 | rmac_len = CAPWAP_RMAC_LEN; | |
249 | ||
250 | hdr_len = ntohl(cwh->begin & CAPWAP_HLEN_MASK) >> CAPWAP_HLEN_SHIFT; | |
251 | ||
252 | if (unlikely(sizeof(struct capwaphdr) + rmac_len + sizeof(struct capwaphdr_wsi) > hdr_len)) | |
253 | return -EINVAL; | |
254 | ||
255 | /* read wsi header to find out how big it really is */ | |
256 | wsi = (struct capwaphdr_wsi *)((u8 *)(cwh + 1) + rmac_len); | |
257 | /* +1 for length byte not included in wsi_len */ | |
258 | wsi_len = 1 + wsi->wsi_len; | |
259 | ||
260 | if (unlikely(sizeof(struct capwaphdr) + rmac_len + wsi_len != hdr_len)) | |
261 | return -EINVAL; | |
262 | ||
263 | wsi_len -= sizeof(struct capwaphdr_wsi); | |
264 | ||
265 | if (wsi->flags & CAPWAP_WSI_F_KEY64) { | |
266 | struct capwaphdr_wsi_key *opt; | |
267 | ||
268 | if (unlikely(wsi_len < sizeof(struct capwaphdr_wsi_key))) | |
269 | return -EINVAL; | |
270 | ||
271 | opt = (struct capwaphdr_wsi_key *)(wsi + 1); | |
272 | *key = opt->key; | |
273 | } | |
274 | ||
275 | return 0; | |
276 | } | |
277 | ||
6455100f | 278 | static struct sk_buff *process_capwap_proto(struct sk_buff *skb, __be64 *key) |
40a75177 VG |
279 | { |
280 | struct capwaphdr *cwh = capwap_hdr(skb); | |
281 | int hdr_len = sizeof(struct udphdr); | |
282 | ||
283 | if (unlikely((cwh->begin & CAPWAP_ZERO_MASK) != 0)) | |
284 | goto error; | |
285 | ||
286 | hdr_len += ntohl(cwh->begin & CAPWAP_HLEN_MASK) >> CAPWAP_HLEN_SHIFT; | |
287 | if (unlikely(hdr_len < CAPWAP_MIN_HLEN)) | |
288 | goto error; | |
289 | ||
290 | if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN))) | |
291 | goto error; | |
292 | ||
293 | cwh = capwap_hdr(skb); | |
294 | __skb_pull(skb, hdr_len); | |
295 | skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN); | |
296 | ||
297 | if (cwh->begin & CAPWAP_F_FRAG) { | |
298 | skb = defrag(skb, (__force bool)(cwh->begin & CAPWAP_F_LASTFRAG)); | |
299 | if (!skb) | |
300 | return NULL; | |
301 | cwh = capwap_hdr(skb); | |
e90b1cf9 | 302 | } |
40a75177 VG |
303 | |
304 | if ((cwh->begin & CAPWAP_F_WSI) && process_capwap_wsi(skb, key)) | |
305 | goto error; | |
306 | ||
307 | return skb; | |
308 | error: | |
309 | kfree_skb(skb); | |
310 | return NULL; | |
e90b1cf9 JG |
311 | } |
312 | ||
313 | /* Called with rcu_read_lock and BH disabled. */ | |
314 | static int capwap_rcv(struct sock *sk, struct sk_buff *skb) | |
315 | { | |
316 | struct vport *vport; | |
317 | const struct tnl_mutable_config *mutable; | |
318 | struct iphdr *iph; | |
40a75177 | 319 | __be64 key = 0; |
e90b1cf9 | 320 | |
40a75177 | 321 | if (unlikely(!pskb_may_pull(skb, CAPWAP_MIN_HLEN + ETH_HLEN))) |
e90b1cf9 JG |
322 | goto error; |
323 | ||
40a75177 | 324 | skb = process_capwap_proto(skb, &key); |
e90b1cf9 JG |
325 | if (unlikely(!skb)) |
326 | goto out; | |
327 | ||
328 | iph = ip_hdr(skb); | |
2a4999f3 PS |
329 | vport = ovs_tnl_find_port(sock_net(sk), iph->daddr, iph->saddr, key, |
330 | TNL_T_PROTO_CAPWAP, &mutable); | |
e90b1cf9 JG |
331 | if (unlikely(!vport)) { |
332 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | |
333 | goto error; | |
334 | } | |
335 | ||
40a75177 VG |
336 | if (mutable->flags & TNL_F_IN_KEY_MATCH) |
337 | OVS_CB(skb)->tun_id = key; | |
338 | else | |
339 | OVS_CB(skb)->tun_id = 0; | |
340 | ||
850b6b3b | 341 | ovs_tnl_rcv(vport, skb, iph->tos); |
e90b1cf9 JG |
342 | goto out; |
343 | ||
344 | error: | |
345 | kfree_skb(skb); | |
346 | out: | |
347 | return 0; | |
348 | } | |
349 | ||
15d90bb6 | 350 | static const struct tnl_ops capwap_tnl_ops = { |
e90b1cf9 JG |
351 | .tunnel_type = TNL_T_PROTO_CAPWAP, |
352 | .ipproto = IPPROTO_UDP, | |
353 | .hdr_len = capwap_hdr_len, | |
354 | .build_header = capwap_build_header, | |
842cf6f4 | 355 | .update_header = capwap_update_header, |
e90b1cf9 JG |
356 | }; |
357 | ||
2a4999f3 | 358 | static inline struct capwap_net *ovs_get_capwap_net(struct net *net) |
e90b1cf9 | 359 | { |
2a4999f3 PS |
360 | struct ovs_net *ovs_net = net_generic(net, ovs_net_id); |
361 | return &ovs_net->vport_net.capwap; | |
e90b1cf9 JG |
362 | } |
363 | ||
7dc05f69 | 364 | /* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */ |
e90b1cf9 | 365 | #define UDP_ENCAP_CAPWAP 10 |
2a4999f3 | 366 | static int init_socket(struct net *net) |
e90b1cf9 JG |
367 | { |
368 | int err; | |
2a4999f3 | 369 | struct capwap_net *capwap_net = ovs_get_capwap_net(net); |
e90b1cf9 JG |
370 | struct sockaddr_in sin; |
371 | ||
2a4999f3 PS |
372 | if (capwap_net->n_tunnels) { |
373 | capwap_net->n_tunnels++; | |
374 | return 0; | |
375 | } | |
376 | ||
377 | err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, | |
378 | &capwap_net->capwap_rcv_socket); | |
e90b1cf9 JG |
379 | if (err) |
380 | goto error; | |
d295e8e9 | 381 | |
2a4999f3 PS |
382 | /* release net ref. */ |
383 | sk_change_net(capwap_net->capwap_rcv_socket->sk, net); | |
384 | ||
e90b1cf9 | 385 | sin.sin_family = AF_INET; |
56b20c59 | 386 | sin.sin_addr.s_addr = htonl(INADDR_ANY); |
e90b1cf9 JG |
387 | sin.sin_port = htons(CAPWAP_DST_PORT); |
388 | ||
2a4999f3 PS |
389 | err = kernel_bind(capwap_net->capwap_rcv_socket, |
390 | (struct sockaddr *)&sin, | |
e90b1cf9 JG |
391 | sizeof(struct sockaddr_in)); |
392 | if (err) | |
393 | goto error_sock; | |
394 | ||
2a4999f3 PS |
395 | udp_sk(capwap_net->capwap_rcv_socket->sk)->encap_type = UDP_ENCAP_CAPWAP; |
396 | udp_sk(capwap_net->capwap_rcv_socket->sk)->encap_rcv = capwap_rcv; | |
397 | ||
398 | capwap_net->frag_state.timeout = CAPWAP_FRAG_TIMEOUT; | |
399 | capwap_net->frag_state.high_thresh = CAPWAP_FRAG_MAX_MEM; | |
400 | capwap_net->frag_state.low_thresh = CAPWAP_FRAG_PRUNE_MEM; | |
e90b1cf9 | 401 | |
2a4999f3 | 402 | inet_frags_init_net(&capwap_net->frag_state); |
e90b1cf9 | 403 | |
2a4999f3 | 404 | capwap_net->n_tunnels++; |
842cf6f4 | 405 | return 0; |
e90b1cf9 JG |
406 | |
407 | error_sock: | |
2a4999f3 | 408 | sk_release_kernel(capwap_net->capwap_rcv_socket->sk); |
e90b1cf9 | 409 | error: |
2a4999f3 | 410 | pr_warn("cannot register capwap protocol handler : %d\n", err); |
e90b1cf9 JG |
411 | return err; |
412 | } | |
413 | ||
2a4999f3 PS |
414 | static void release_socket(struct net *net) |
415 | { | |
416 | struct capwap_net *capwap_net = ovs_get_capwap_net(net); | |
417 | ||
418 | capwap_net->n_tunnels--; | |
419 | if (capwap_net->n_tunnels) | |
420 | return; | |
421 | ||
422 | inet_frags_exit_net(&capwap_net->frag_state, &frag_state); | |
423 | sk_release_kernel(capwap_net->capwap_rcv_socket->sk); | |
424 | } | |
425 | ||
426 | static struct vport *capwap_create(const struct vport_parms *parms) | |
427 | { | |
428 | struct vport *vport; | |
429 | int err; | |
430 | ||
431 | err = init_socket(ovs_dp_get_net(parms->dp)); | |
432 | if (err) | |
433 | return ERR_PTR(err); | |
434 | ||
435 | vport = ovs_tnl_create(parms, &ovs_capwap_vport_ops, &capwap_tnl_ops); | |
436 | if (IS_ERR(vport)) | |
437 | release_socket(ovs_dp_get_net(parms->dp)); | |
438 | ||
439 | return vport; | |
440 | } | |
441 | ||
442 | static void capwap_destroy(struct vport *vport) | |
443 | { | |
444 | ovs_tnl_destroy(vport); | |
445 | release_socket(ovs_dp_get_net(vport->dp)); | |
446 | } | |
447 | ||
448 | static int capwap_init(void) | |
449 | { | |
450 | inet_frags_init(&frag_state); | |
451 | return 0; | |
452 | } | |
453 | ||
e90b1cf9 JG |
454 | static void capwap_exit(void) |
455 | { | |
2a4999f3 | 456 | inet_frags_fini(&frag_state); |
e90b1cf9 JG |
457 | } |
458 | ||
459 | static void copy_skb_metadata(struct sk_buff *from, struct sk_buff *to) | |
460 | { | |
461 | to->pkt_type = from->pkt_type; | |
462 | to->priority = from->priority; | |
463 | to->protocol = from->protocol; | |
464 | skb_dst_set(to, dst_clone(skb_dst(from))); | |
465 | to->dev = from->dev; | |
466 | to->mark = from->mark; | |
467 | ||
468 | if (from->sk) | |
469 | skb_set_owner_w(to, from->sk); | |
470 | ||
471 | #ifdef CONFIG_NET_SCHED | |
472 | to->tc_index = from->tc_index; | |
473 | #endif | |
474 | #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) | |
475 | to->ipvs_property = from->ipvs_property; | |
476 | #endif | |
477 | skb_copy_secmark(to, from); | |
478 | } | |
479 | ||
480 | static struct sk_buff *fragment(struct sk_buff *skb, const struct vport *vport, | |
40a75177 | 481 | struct dst_entry *dst, unsigned int hlen) |
e90b1cf9 JG |
482 | { |
483 | struct tnl_vport *tnl_vport = tnl_vport_priv(vport); | |
842cf6f4 JG |
484 | unsigned int headroom; |
485 | unsigned int max_frame_len = dst_mtu(dst) + skb_network_offset(skb); | |
e90b1cf9 JG |
486 | struct sk_buff *result = NULL, *list_cur = NULL; |
487 | unsigned int remaining; | |
488 | unsigned int offset; | |
489 | __be16 frag_id; | |
490 | ||
842cf6f4 | 491 | if (hlen + ~FRAG_OFF_MASK + 1 > max_frame_len) { |
e90b1cf9 | 492 | if (net_ratelimit()) |
dfffaef1 | 493 | pr_warn("capwap link mtu (%d) is less than minimum packet (%d)\n", |
842cf6f4 JG |
494 | dst_mtu(dst), |
495 | hlen - skb_network_offset(skb) + ~FRAG_OFF_MASK + 1); | |
e90b1cf9 JG |
496 | goto error; |
497 | } | |
498 | ||
499 | remaining = skb->len - hlen; | |
500 | offset = 0; | |
501 | frag_id = htons(atomic_inc_return(&tnl_vport->frag_id)); | |
502 | ||
842cf6f4 JG |
503 | headroom = dst->header_len + 16; |
504 | if (!skb_network_offset(skb)) | |
505 | headroom += LL_RESERVED_SPACE(dst->dev); | |
506 | ||
e90b1cf9 JG |
507 | while (remaining) { |
508 | struct sk_buff *skb2; | |
509 | int frag_size; | |
e90b1cf9 JG |
510 | struct udphdr *udph; |
511 | struct capwaphdr *cwh; | |
512 | ||
842cf6f4 | 513 | frag_size = min(remaining, max_frame_len - hlen); |
e90b1cf9 JG |
514 | if (remaining > frag_size) |
515 | frag_size &= FRAG_OFF_MASK; | |
516 | ||
517 | skb2 = alloc_skb(headroom + hlen + frag_size, GFP_ATOMIC); | |
518 | if (!skb2) | |
519 | goto error; | |
520 | ||
521 | skb_reserve(skb2, headroom); | |
522 | __skb_put(skb2, hlen + frag_size); | |
e90b1cf9 | 523 | |
842cf6f4 JG |
524 | if (skb_network_offset(skb)) |
525 | skb_reset_mac_header(skb2); | |
526 | skb_set_network_header(skb2, skb_network_offset(skb)); | |
527 | skb_set_transport_header(skb2, skb_transport_offset(skb)); | |
528 | ||
529 | /* Copy (Ethernet)/IP/UDP/CAPWAP header. */ | |
e90b1cf9 | 530 | copy_skb_metadata(skb, skb2); |
842cf6f4 | 531 | skb_copy_from_linear_data(skb, skb2->data, hlen); |
e90b1cf9 JG |
532 | |
533 | /* Copy this data chunk. */ | |
534 | if (skb_copy_bits(skb, hlen + offset, skb2->data + hlen, frag_size)) | |
535 | BUG(); | |
536 | ||
e90b1cf9 | 537 | udph = udp_hdr(skb2); |
842cf6f4 | 538 | udph->len = htons(skb2->len - skb_transport_offset(skb2)); |
e90b1cf9 JG |
539 | |
540 | cwh = capwap_hdr(skb2); | |
541 | if (remaining > frag_size) | |
40a75177 | 542 | cwh->begin |= FRAG_HDR; |
e90b1cf9 | 543 | else |
40a75177 | 544 | cwh->begin |= FRAG_LAST_HDR; |
e90b1cf9 JG |
545 | cwh->frag_id = frag_id; |
546 | cwh->frag_off = htons(offset); | |
547 | ||
548 | if (result) { | |
549 | list_cur->next = skb2; | |
550 | list_cur = skb2; | |
551 | } else | |
552 | result = list_cur = skb2; | |
553 | ||
554 | offset += frag_size; | |
555 | remaining -= frag_size; | |
556 | } | |
557 | ||
b59da960 BP |
558 | consume_skb(skb); |
559 | return result; | |
e90b1cf9 JG |
560 | |
561 | error: | |
850b6b3b | 562 | ovs_tnl_free_linked_skbs(result); |
e90b1cf9 | 563 | kfree_skb(skb); |
b59da960 | 564 | return NULL; |
e90b1cf9 JG |
565 | } |
566 | ||
567 | /* All of the following functions relate to fragmentation reassembly. */ | |
568 | ||
6455100f | 569 | static struct frag_queue *ifq_cast(struct inet_frag_queue *ifq) |
e90b1cf9 JG |
570 | { |
571 | return container_of(ifq, struct frag_queue, ifq); | |
572 | } | |
573 | ||
574 | static u32 frag_hash(struct frag_match *match) | |
575 | { | |
576 | return jhash_3words((__force u16)match->id, (__force u32)match->saddr, | |
577 | (__force u32)match->daddr, | |
578 | frag_state.rnd) & (INETFRAGS_HASHSZ - 1); | |
579 | } | |
580 | ||
2a4999f3 PS |
581 | static struct frag_queue *queue_find(struct netns_frags *ns_frag_state, |
582 | struct frag_match *match) | |
e90b1cf9 JG |
583 | { |
584 | struct inet_frag_queue *ifq; | |
585 | ||
586 | read_lock(&frag_state.lock); | |
587 | ||
2a4999f3 | 588 | ifq = inet_frag_find(ns_frag_state, &frag_state, match, frag_hash(match)); |
e90b1cf9 JG |
589 | if (!ifq) |
590 | return NULL; | |
591 | ||
592 | /* Unlock happens inside inet_frag_find(). */ | |
593 | ||
594 | return ifq_cast(ifq); | |
595 | } | |
596 | ||
597 | static struct sk_buff *frag_reasm(struct frag_queue *fq, struct net_device *dev) | |
598 | { | |
599 | struct sk_buff *head = fq->ifq.fragments; | |
600 | struct sk_buff *frag; | |
601 | ||
602 | /* Succeed or fail, we're done with this queue. */ | |
603 | inet_frag_kill(&fq->ifq, &frag_state); | |
604 | ||
605 | if (fq->ifq.len > 65535) | |
606 | return NULL; | |
607 | ||
608 | /* Can't have the head be a clone. */ | |
609 | if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) | |
610 | return NULL; | |
611 | ||
612 | /* | |
613 | * We're about to build frag list for this SKB. If it already has a | |
614 | * frag list, alloc a new SKB and put the existing frag list there. | |
615 | */ | |
616 | if (skb_shinfo(head)->frag_list) { | |
617 | int i; | |
618 | int paged_len = 0; | |
619 | ||
620 | frag = alloc_skb(0, GFP_ATOMIC); | |
621 | if (!frag) | |
622 | return NULL; | |
623 | ||
624 | frag->next = head->next; | |
625 | head->next = frag; | |
626 | skb_shinfo(frag)->frag_list = skb_shinfo(head)->frag_list; | |
627 | skb_shinfo(head)->frag_list = NULL; | |
628 | ||
629 | for (i = 0; i < skb_shinfo(head)->nr_frags; i++) | |
630 | paged_len += skb_shinfo(head)->frags[i].size; | |
631 | frag->len = frag->data_len = head->data_len - paged_len; | |
632 | head->data_len -= frag->len; | |
633 | head->len -= frag->len; | |
634 | ||
635 | frag->ip_summed = head->ip_summed; | |
636 | atomic_add(frag->truesize, &fq->ifq.net->mem); | |
637 | } | |
638 | ||
639 | skb_shinfo(head)->frag_list = head->next; | |
640 | atomic_sub(head->truesize, &fq->ifq.net->mem); | |
641 | ||
642 | /* Properly account for data in various packets. */ | |
643 | for (frag = head->next; frag; frag = frag->next) { | |
644 | head->data_len += frag->len; | |
645 | head->len += frag->len; | |
646 | ||
647 | if (head->ip_summed != frag->ip_summed) | |
648 | head->ip_summed = CHECKSUM_NONE; | |
649 | else if (head->ip_summed == CHECKSUM_COMPLETE) | |
650 | head->csum = csum_add(head->csum, frag->csum); | |
651 | ||
652 | head->truesize += frag->truesize; | |
653 | atomic_sub(frag->truesize, &fq->ifq.net->mem); | |
654 | } | |
655 | ||
656 | head->next = NULL; | |
657 | head->dev = dev; | |
658 | head->tstamp = fq->ifq.stamp; | |
659 | fq->ifq.fragments = NULL; | |
660 | ||
661 | return head; | |
662 | } | |
663 | ||
664 | static struct sk_buff *frag_queue(struct frag_queue *fq, struct sk_buff *skb, | |
665 | u16 offset, bool frag_last) | |
666 | { | |
667 | struct sk_buff *prev, *next; | |
668 | struct net_device *dev; | |
669 | int end; | |
670 | ||
671 | if (fq->ifq.last_in & INET_FRAG_COMPLETE) | |
672 | goto error; | |
673 | ||
674 | if (!skb->len) | |
675 | goto error; | |
676 | ||
677 | end = offset + skb->len; | |
678 | ||
679 | if (frag_last) { | |
680 | /* | |
681 | * Last fragment, shouldn't already have data past our end or | |
682 | * have another last fragment. | |
683 | */ | |
684 | if (end < fq->ifq.len || fq->ifq.last_in & INET_FRAG_LAST_IN) | |
685 | goto error; | |
686 | ||
687 | fq->ifq.last_in |= INET_FRAG_LAST_IN; | |
688 | fq->ifq.len = end; | |
689 | } else { | |
690 | /* Fragments should align to 8 byte chunks. */ | |
691 | if (end & ~FRAG_OFF_MASK) | |
692 | goto error; | |
693 | ||
694 | if (end > fq->ifq.len) { | |
695 | /* | |
696 | * Shouldn't have data past the end, if we already | |
697 | * have one. | |
698 | */ | |
699 | if (fq->ifq.last_in & INET_FRAG_LAST_IN) | |
700 | goto error; | |
701 | ||
702 | fq->ifq.len = end; | |
703 | } | |
704 | } | |
705 | ||
706 | /* Find where we fit in. */ | |
707 | prev = NULL; | |
708 | for (next = fq->ifq.fragments; next != NULL; next = next->next) { | |
709 | if (FRAG_CB(next)->offset >= offset) | |
710 | break; | |
711 | prev = next; | |
712 | } | |
713 | ||
714 | /* | |
715 | * Overlapping fragments aren't allowed. We shouldn't start before | |
716 | * the end of the previous fragment. | |
717 | */ | |
718 | if (prev && FRAG_CB(prev)->offset + prev->len > offset) | |
719 | goto error; | |
720 | ||
721 | /* We also shouldn't end after the beginning of the next fragment. */ | |
722 | if (next && end > FRAG_CB(next)->offset) | |
723 | goto error; | |
724 | ||
725 | FRAG_CB(skb)->offset = offset; | |
726 | ||
727 | /* Link into list. */ | |
728 | skb->next = next; | |
729 | if (prev) | |
730 | prev->next = skb; | |
731 | else | |
732 | fq->ifq.fragments = skb; | |
733 | ||
734 | dev = skb->dev; | |
735 | skb->dev = NULL; | |
736 | ||
737 | fq->ifq.stamp = skb->tstamp; | |
738 | fq->ifq.meat += skb->len; | |
739 | atomic_add(skb->truesize, &fq->ifq.net->mem); | |
740 | if (offset == 0) | |
741 | fq->ifq.last_in |= INET_FRAG_FIRST_IN; | |
742 | ||
743 | /* If we have all fragments do reassembly. */ | |
744 | if (fq->ifq.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && | |
745 | fq->ifq.meat == fq->ifq.len) | |
746 | return frag_reasm(fq, dev); | |
747 | ||
748 | write_lock(&frag_state.lock); | |
749 | list_move_tail(&fq->ifq.lru_list, &fq->ifq.net->lru_list); | |
750 | write_unlock(&frag_state.lock); | |
751 | ||
752 | return NULL; | |
753 | ||
754 | error: | |
755 | kfree_skb(skb); | |
756 | return NULL; | |
757 | } | |
758 | ||
759 | static struct sk_buff *defrag(struct sk_buff *skb, bool frag_last) | |
760 | { | |
761 | struct iphdr *iph = ip_hdr(skb); | |
762 | struct capwaphdr *cwh = capwap_hdr(skb); | |
2a4999f3 PS |
763 | struct capwap_net *capwap_net = ovs_get_capwap_net(dev_net(skb->dev)); |
764 | struct netns_frags *ns_frag_state = &capwap_net->frag_state; | |
e90b1cf9 JG |
765 | struct frag_match match; |
766 | u16 frag_off; | |
767 | struct frag_queue *fq; | |
768 | ||
2a4999f3 PS |
769 | if (atomic_read(&ns_frag_state->mem) > ns_frag_state->high_thresh) |
770 | inet_frag_evictor(ns_frag_state, &frag_state); | |
e90b1cf9 JG |
771 | |
772 | match.daddr = iph->daddr; | |
773 | match.saddr = iph->saddr; | |
774 | match.id = cwh->frag_id; | |
775 | frag_off = ntohs(cwh->frag_off) & FRAG_OFF_MASK; | |
776 | ||
2a4999f3 | 777 | fq = queue_find(ns_frag_state, &match); |
e90b1cf9 JG |
778 | if (fq) { |
779 | spin_lock(&fq->ifq.lock); | |
780 | skb = frag_queue(fq, skb, frag_off, frag_last); | |
781 | spin_unlock(&fq->ifq.lock); | |
782 | ||
783 | inet_frag_put(&fq->ifq, &frag_state); | |
784 | ||
785 | return skb; | |
786 | } | |
787 | ||
788 | kfree_skb(skb); | |
789 | return NULL; | |
790 | } | |
791 | ||
e90b1cf9 JG |
792 | static void capwap_frag_init(struct inet_frag_queue *ifq, void *match_) |
793 | { | |
794 | struct frag_match *match = match_; | |
795 | ||
796 | ifq_cast(ifq)->match = *match; | |
797 | } | |
798 | ||
799 | static unsigned int capwap_frag_hash(struct inet_frag_queue *ifq) | |
800 | { | |
801 | return frag_hash(&ifq_cast(ifq)->match); | |
802 | } | |
803 | ||
804 | static int capwap_frag_match(struct inet_frag_queue *ifq, void *a_) | |
805 | { | |
806 | struct frag_match *a = a_; | |
807 | struct frag_match *b = &ifq_cast(ifq)->match; | |
808 | ||
809 | return a->id == b->id && a->saddr == b->saddr && a->daddr == b->daddr; | |
810 | } | |
811 | ||
812 | /* Run when the timeout for a given queue expires. */ | |
813 | static void capwap_frag_expire(unsigned long ifq) | |
814 | { | |
815 | struct frag_queue *fq; | |
816 | ||
817 | fq = ifq_cast((struct inet_frag_queue *)ifq); | |
818 | ||
819 | spin_lock(&fq->ifq.lock); | |
820 | ||
821 | if (!(fq->ifq.last_in & INET_FRAG_COMPLETE)) | |
822 | inet_frag_kill(&fq->ifq, &frag_state); | |
823 | ||
824 | spin_unlock(&fq->ifq.lock); | |
825 | inet_frag_put(&fq->ifq, &frag_state); | |
826 | } | |
827 | ||
850b6b3b | 828 | const struct vport_ops ovs_capwap_vport_ops = { |
df2c07f4 | 829 | .type = OVS_VPORT_TYPE_CAPWAP, |
f613a0d7 | 830 | .flags = VPORT_F_TUN_ID, |
e90b1cf9 JG |
831 | .init = capwap_init, |
832 | .exit = capwap_exit, | |
833 | .create = capwap_create, | |
2a4999f3 | 834 | .destroy = capwap_destroy, |
850b6b3b JG |
835 | .set_addr = ovs_tnl_set_addr, |
836 | .get_name = ovs_tnl_get_name, | |
837 | .get_addr = ovs_tnl_get_addr, | |
838 | .get_options = ovs_tnl_get_options, | |
839 | .set_options = ovs_tnl_set_options, | |
840 | .get_dev_flags = ovs_vport_gen_get_dev_flags, | |
841 | .is_running = ovs_vport_gen_is_running, | |
842 | .get_operstate = ovs_vport_gen_get_operstate, | |
843 | .send = ovs_tnl_send, | |
e90b1cf9 | 844 | }; |
9d9a0a04 JG |
845 | #else |
846 | #warning CAPWAP tunneling will not be available on kernels before 2.6.26 | |
847 | #endif /* Linux kernel < 2.6.26 */ |