]>
Commit | Line | Data |
---|---|---|
da5bab07 DB |
1 | /* |
2 | * IPV4 GSO/GRO offload support | |
3 | * Linux INET implementation | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU General Public License | |
7 | * as published by the Free Software Foundation; either version | |
8 | * 2 of the License, or (at your option) any later version. | |
9 | * | |
10 | * UDPv4 GSO support | |
11 | */ | |
12 | ||
13 | #include <linux/skbuff.h> | |
14 | #include <net/udp.h> | |
15 | #include <net/protocol.h> | |
16 | ||
b582ef09 | 17 | static DEFINE_SPINLOCK(udp_offload_lock); |
a1d0cd8e | 18 | static struct udp_offload_priv __rcu *udp_offload_base __read_mostly; |
b582ef09 | 19 | |
a664a4f7 SP |
20 | #define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock)) |
21 | ||
b582ef09 OG |
22 | struct udp_offload_priv { |
23 | struct udp_offload *offload; | |
787d7ac3 | 24 | possible_net_t net; |
b582ef09 OG |
25 | struct rcu_head rcu; |
26 | struct udp_offload_priv __rcu *next; | |
27 | }; | |
28 | ||
8bce6d7d TH |
29 | static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, |
30 | netdev_features_t features, | |
31 | struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, | |
32 | netdev_features_t features), | |
4bcb877d | 33 | __be16 new_protocol, bool is_ipv6) |
155e010e TH |
34 | { |
35 | struct sk_buff *segs = ERR_PTR(-EINVAL); | |
36 | u16 mac_offset = skb->mac_header; | |
37 | int mac_len = skb->mac_len; | |
38 | int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); | |
39 | __be16 protocol = skb->protocol; | |
155e010e TH |
40 | int udp_offset, outer_hlen; |
41 | unsigned int oldlen; | |
4bcb877d TH |
42 | bool need_csum = !!(skb_shinfo(skb)->gso_type & |
43 | SKB_GSO_UDP_TUNNEL_CSUM); | |
e585f236 TH |
44 | bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); |
45 | bool offload_csum = false, dont_encap = (need_csum || remcsum); | |
155e010e TH |
46 | |
47 | oldlen = (u16)~skb->len; | |
48 | ||
49 | if (unlikely(!pskb_may_pull(skb, tnl_hlen))) | |
50 | goto out; | |
51 | ||
52 | skb->encapsulation = 0; | |
53 | __skb_pull(skb, tnl_hlen); | |
54 | skb_reset_mac_header(skb); | |
55 | skb_set_network_header(skb, skb_inner_network_offset(skb)); | |
56 | skb->mac_len = skb_inner_network_offset(skb); | |
8bce6d7d | 57 | skb->protocol = new_protocol; |
4bcb877d | 58 | skb->encap_hdr_csum = need_csum; |
e585f236 | 59 | skb->remcsum_offload = remcsum; |
155e010e | 60 | |
4bcb877d TH |
61 | /* Try to offload checksum if possible */ |
62 | offload_csum = !!(need_csum && | |
c8cd0989 TH |
63 | ((skb->dev->features & NETIF_F_HW_CSUM) || |
64 | (skb->dev->features & (is_ipv6 ? | |
65 | NETIF_F_IPV6_CSUM : NETIF_F_IP_CSUM)))); | |
155e010e | 66 | |
bef3c6c9 AD |
67 | features &= skb->dev->hw_enc_features; |
68 | ||
155e010e | 69 | /* segment inner packet. */ |
bef3c6c9 | 70 | segs = gso_inner_segment(skb, features); |
27446442 | 71 | if (IS_ERR_OR_NULL(segs)) { |
155e010e TH |
72 | skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, |
73 | mac_len); | |
74 | goto out; | |
75 | } | |
76 | ||
77 | outer_hlen = skb_tnl_header_len(skb); | |
78 | udp_offset = outer_hlen - tnl_hlen; | |
79 | skb = segs; | |
80 | do { | |
81 | struct udphdr *uh; | |
82 | int len; | |
4bcb877d TH |
83 | __be32 delta; |
84 | ||
85 | if (dont_encap) { | |
86 | skb->encapsulation = 0; | |
87 | skb->ip_summed = CHECKSUM_NONE; | |
88 | } else { | |
89 | /* Only set up inner headers if we might be offloading | |
90 | * inner checksum. | |
91 | */ | |
92 | skb_reset_inner_headers(skb); | |
93 | skb->encapsulation = 1; | |
94 | } | |
155e010e TH |
95 | |
96 | skb->mac_len = mac_len; | |
4bcb877d | 97 | skb->protocol = protocol; |
155e010e TH |
98 | |
99 | skb_push(skb, outer_hlen); | |
100 | skb_reset_mac_header(skb); | |
101 | skb_set_network_header(skb, mac_len); | |
102 | skb_set_transport_header(skb, udp_offset); | |
103 | len = skb->len - udp_offset; | |
104 | uh = udp_hdr(skb); | |
105 | uh->len = htons(len); | |
106 | ||
4bcb877d TH |
107 | if (!need_csum) |
108 | continue; | |
109 | ||
110 | delta = htonl(oldlen + len); | |
155e010e | 111 | |
4bcb877d TH |
112 | uh->check = ~csum_fold((__force __wsum) |
113 | ((__force u32)uh->check + | |
114 | (__force u32)delta)); | |
4bcb877d TH |
115 | if (offload_csum) { |
116 | skb->ip_summed = CHECKSUM_PARTIAL; | |
117 | skb->csum_start = skb_transport_header(skb) - skb->head; | |
118 | skb->csum_offset = offsetof(struct udphdr, check); | |
e585f236 TH |
119 | } else if (remcsum) { |
120 | /* Need to calculate checksum from scratch, | |
121 | * inner checksums are never when doing | |
122 | * remote_checksum_offload. | |
123 | */ | |
124 | ||
125 | skb->csum = skb_checksum(skb, udp_offset, | |
126 | skb->len - udp_offset, | |
127 | 0); | |
128 | uh->check = csum_fold(skb->csum); | |
129 | if (uh->check == 0) | |
130 | uh->check = CSUM_MANGLED_0; | |
4bcb877d | 131 | } else { |
155e010e TH |
132 | uh->check = gso_make_checksum(skb, ~uh->check); |
133 | ||
134 | if (uh->check == 0) | |
135 | uh->check = CSUM_MANGLED_0; | |
136 | } | |
155e010e TH |
137 | } while ((skb = skb->next)); |
138 | out: | |
139 | return segs; | |
140 | } | |
141 | ||
8bce6d7d TH |
142 | struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, |
143 | netdev_features_t features, | |
144 | bool is_ipv6) | |
145 | { | |
146 | __be16 protocol = skb->protocol; | |
147 | const struct net_offload **offloads; | |
148 | const struct net_offload *ops; | |
149 | struct sk_buff *segs = ERR_PTR(-EINVAL); | |
150 | struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, | |
151 | netdev_features_t features); | |
152 | ||
153 | rcu_read_lock(); | |
154 | ||
155 | switch (skb->inner_protocol_type) { | |
156 | case ENCAP_TYPE_ETHER: | |
157 | protocol = skb->inner_protocol; | |
158 | gso_inner_segment = skb_mac_gso_segment; | |
159 | break; | |
160 | case ENCAP_TYPE_IPPROTO: | |
161 | offloads = is_ipv6 ? inet6_offloads : inet_offloads; | |
162 | ops = rcu_dereference(offloads[skb->inner_ipproto]); | |
163 | if (!ops || !ops->callbacks.gso_segment) | |
164 | goto out_unlock; | |
165 | gso_inner_segment = ops->callbacks.gso_segment; | |
166 | break; | |
167 | default: | |
168 | goto out_unlock; | |
169 | } | |
170 | ||
171 | segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment, | |
4bcb877d | 172 | protocol, is_ipv6); |
8bce6d7d TH |
173 | |
174 | out_unlock: | |
175 | rcu_read_unlock(); | |
176 | ||
177 | return segs; | |
178 | } | |
179 | ||
da5bab07 DB |
180 | static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, |
181 | netdev_features_t features) | |
182 | { | |
183 | struct sk_buff *segs = ERR_PTR(-EINVAL); | |
184 | unsigned int mss; | |
7a7ffbab | 185 | __wsum csum; |
f71470b3 TH |
186 | struct udphdr *uh; |
187 | struct iphdr *iph; | |
7a7ffbab WCC |
188 | |
189 | if (skb->encapsulation && | |
0f4f4ffa TH |
190 | (skb_shinfo(skb)->gso_type & |
191 | (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { | |
8bce6d7d | 192 | segs = skb_udp_tunnel_segment(skb, features, false); |
7a7ffbab WCC |
193 | goto out; |
194 | } | |
da5bab07 | 195 | |
f71470b3 TH |
196 | if (!pskb_may_pull(skb, sizeof(struct udphdr))) |
197 | goto out; | |
198 | ||
da5bab07 DB |
199 | mss = skb_shinfo(skb)->gso_size; |
200 | if (unlikely(skb->len <= mss)) | |
201 | goto out; | |
202 | ||
203 | if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { | |
204 | /* Packet is from an untrusted source, reset gso_segs. */ | |
205 | int type = skb_shinfo(skb)->gso_type; | |
206 | ||
207 | if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | | |
208 | SKB_GSO_UDP_TUNNEL | | |
0f4f4ffa | 209 | SKB_GSO_UDP_TUNNEL_CSUM | |
e585f236 | 210 | SKB_GSO_TUNNEL_REMCSUM | |
cb32f511 | 211 | SKB_GSO_IPIP | |
59b93b41 | 212 | SKB_GSO_GRE | SKB_GSO_GRE_CSUM) || |
da5bab07 DB |
213 | !(type & (SKB_GSO_UDP)))) |
214 | goto out; | |
215 | ||
216 | skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); | |
217 | ||
218 | segs = NULL; | |
219 | goto out; | |
220 | } | |
221 | ||
7a7ffbab WCC |
222 | /* Do software UFO. Complete and fill in the UDP checksum as |
223 | * HW cannot do checksum of UDP packets sent as multiple | |
224 | * IP fragments. | |
225 | */ | |
f71470b3 TH |
226 | |
227 | uh = udp_hdr(skb); | |
228 | iph = ip_hdr(skb); | |
229 | ||
230 | uh->check = 0; | |
231 | csum = skb_checksum(skb, 0, skb->len, 0); | |
232 | uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum); | |
233 | if (uh->check == 0) | |
234 | uh->check = CSUM_MANGLED_0; | |
235 | ||
7a7ffbab WCC |
236 | skb->ip_summed = CHECKSUM_NONE; |
237 | ||
da5bab07 DB |
238 | /* Fragment the skb. IP headers of the fragments are updated in |
239 | * inet_gso_segment() | |
240 | */ | |
7a7ffbab | 241 | segs = skb_segment(skb, features); |
da5bab07 DB |
242 | out: |
243 | return segs; | |
244 | } | |
245 | ||
787d7ac3 | 246 | int udp_add_offload(struct net *net, struct udp_offload *uo) |
b582ef09 | 247 | { |
b5aaab12 | 248 | struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC); |
b582ef09 OG |
249 | |
250 | if (!new_offload) | |
251 | return -ENOMEM; | |
252 | ||
787d7ac3 | 253 | write_pnet(&new_offload->net, net); |
b582ef09 OG |
254 | new_offload->offload = uo; |
255 | ||
256 | spin_lock(&udp_offload_lock); | |
a664a4f7 SP |
257 | new_offload->next = udp_offload_base; |
258 | rcu_assign_pointer(udp_offload_base, new_offload); | |
b582ef09 OG |
259 | spin_unlock(&udp_offload_lock); |
260 | ||
261 | return 0; | |
262 | } | |
263 | EXPORT_SYMBOL(udp_add_offload); | |
264 | ||
265 | static void udp_offload_free_routine(struct rcu_head *head) | |
266 | { | |
267 | struct udp_offload_priv *ou_priv = container_of(head, struct udp_offload_priv, rcu); | |
268 | kfree(ou_priv); | |
269 | } | |
270 | ||
271 | void udp_del_offload(struct udp_offload *uo) | |
272 | { | |
273 | struct udp_offload_priv __rcu **head = &udp_offload_base; | |
274 | struct udp_offload_priv *uo_priv; | |
275 | ||
276 | spin_lock(&udp_offload_lock); | |
277 | ||
a664a4f7 | 278 | uo_priv = udp_deref_protected(*head); |
b582ef09 | 279 | for (; uo_priv != NULL; |
a664a4f7 | 280 | uo_priv = udp_deref_protected(*head)) { |
b582ef09 | 281 | if (uo_priv->offload == uo) { |
a664a4f7 SP |
282 | rcu_assign_pointer(*head, |
283 | udp_deref_protected(uo_priv->next)); | |
b582ef09 OG |
284 | goto unlock; |
285 | } | |
286 | head = &uo_priv->next; | |
287 | } | |
a1d0cd8e | 288 | pr_warn("udp_del_offload: didn't find offload for port %d\n", ntohs(uo->port)); |
b582ef09 OG |
289 | unlock: |
290 | spin_unlock(&udp_offload_lock); | |
00db4124 | 291 | if (uo_priv) |
b582ef09 OG |
292 | call_rcu(&uo_priv->rcu, udp_offload_free_routine); |
293 | } | |
294 | EXPORT_SYMBOL(udp_del_offload); | |
295 | ||
57c67ff4 TH |
296 | struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, |
297 | struct udphdr *uh) | |
b582ef09 OG |
298 | { |
299 | struct udp_offload_priv *uo_priv; | |
300 | struct sk_buff *p, **pp = NULL; | |
57c67ff4 TH |
301 | struct udphdr *uh2; |
302 | unsigned int off = skb_gro_offset(skb); | |
b582ef09 OG |
303 | int flush = 1; |
304 | ||
305 | if (NAPI_GRO_CB(skb)->udp_mark || | |
662880f4 TH |
306 | (skb->ip_summed != CHECKSUM_PARTIAL && |
307 | NAPI_GRO_CB(skb)->csum_cnt == 0 && | |
308 | !NAPI_GRO_CB(skb)->csum_valid)) | |
b582ef09 OG |
309 | goto out; |
310 | ||
311 | /* mark that this skb passed once through the udp gro layer */ | |
312 | NAPI_GRO_CB(skb)->udp_mark = 1; | |
b582ef09 OG |
313 | |
314 | rcu_read_lock(); | |
315 | uo_priv = rcu_dereference(udp_offload_base); | |
316 | for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { | |
787d7ac3 HFS |
317 | if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && |
318 | uo_priv->offload->port == uh->dest && | |
b582ef09 OG |
319 | uo_priv->offload->callbacks.gro_receive) |
320 | goto unflush; | |
321 | } | |
322 | goto out_unlock; | |
323 | ||
324 | unflush: | |
325 | flush = 0; | |
326 | ||
327 | for (p = *head; p; p = p->next) { | |
328 | if (!NAPI_GRO_CB(p)->same_flow) | |
329 | continue; | |
330 | ||
331 | uh2 = (struct udphdr *)(p->data + off); | |
57c67ff4 TH |
332 | |
333 | /* Match ports and either checksums are either both zero | |
334 | * or nonzero. | |
335 | */ | |
336 | if ((*(u32 *)&uh->source != *(u32 *)&uh2->source) || | |
337 | (!uh->check ^ !uh2->check)) { | |
b582ef09 OG |
338 | NAPI_GRO_CB(p)->same_flow = 0; |
339 | continue; | |
340 | } | |
341 | } | |
342 | ||
343 | skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ | |
6bae1d4c | 344 | skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); |
afe93325 | 345 | NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; |
a2b12f3c TH |
346 | pp = uo_priv->offload->callbacks.gro_receive(head, skb, |
347 | uo_priv->offload); | |
b582ef09 OG |
348 | |
349 | out_unlock: | |
350 | rcu_read_unlock(); | |
351 | out: | |
352 | NAPI_GRO_CB(skb)->flush |= flush; | |
353 | return pp; | |
354 | } | |
355 | ||
57c67ff4 TH |
356 | static struct sk_buff **udp4_gro_receive(struct sk_buff **head, |
357 | struct sk_buff *skb) | |
358 | { | |
359 | struct udphdr *uh = udp_gro_udphdr(skb); | |
360 | ||
2abb7cdc TH |
361 | if (unlikely(!uh)) |
362 | goto flush; | |
57c67ff4 | 363 | |
2abb7cdc | 364 | /* Don't bother verifying checksum if we're going to flush anyway. */ |
2d8f7e2c | 365 | if (NAPI_GRO_CB(skb)->flush) |
2abb7cdc TH |
366 | goto skip; |
367 | ||
368 | if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check, | |
369 | inet_gro_compute_pseudo)) | |
370 | goto flush; | |
371 | else if (uh->check) | |
372 | skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check, | |
373 | inet_gro_compute_pseudo); | |
374 | skip: | |
efc98d08 | 375 | NAPI_GRO_CB(skb)->is_ipv6 = 0; |
57c67ff4 | 376 | return udp_gro_receive(head, skb, uh); |
2abb7cdc TH |
377 | |
378 | flush: | |
379 | NAPI_GRO_CB(skb)->flush = 1; | |
380 | return NULL; | |
57c67ff4 TH |
381 | } |
382 | ||
383 | int udp_gro_complete(struct sk_buff *skb, int nhoff) | |
b582ef09 OG |
384 | { |
385 | struct udp_offload_priv *uo_priv; | |
386 | __be16 newlen = htons(skb->len - nhoff); | |
387 | struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); | |
388 | int err = -ENOSYS; | |
389 | ||
390 | uh->len = newlen; | |
391 | ||
392 | rcu_read_lock(); | |
393 | ||
394 | uo_priv = rcu_dereference(udp_offload_base); | |
395 | for (; uo_priv != NULL; uo_priv = rcu_dereference(uo_priv->next)) { | |
787d7ac3 HFS |
396 | if (net_eq(read_pnet(&uo_priv->net), dev_net(skb->dev)) && |
397 | uo_priv->offload->port == uh->dest && | |
b582ef09 OG |
398 | uo_priv->offload->callbacks.gro_complete) |
399 | break; | |
400 | } | |
401 | ||
00db4124 | 402 | if (uo_priv) { |
afe93325 | 403 | NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto; |
a2b12f3c TH |
404 | err = uo_priv->offload->callbacks.gro_complete(skb, |
405 | nhoff + sizeof(struct udphdr), | |
406 | uo_priv->offload); | |
afe93325 | 407 | } |
b582ef09 OG |
408 | |
409 | rcu_read_unlock(); | |
6db93ea1 TH |
410 | |
411 | if (skb->remcsum_offload) | |
412 | skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; | |
413 | ||
414 | skb->encapsulation = 1; | |
415 | skb_set_inner_mac_header(skb, nhoff + sizeof(struct udphdr)); | |
416 | ||
b582ef09 OG |
417 | return err; |
418 | } | |
419 | ||
72bb17b3 | 420 | static int udp4_gro_complete(struct sk_buff *skb, int nhoff) |
57c67ff4 TH |
421 | { |
422 | const struct iphdr *iph = ip_hdr(skb); | |
423 | struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); | |
424 | ||
6db93ea1 TH |
425 | if (uh->check) { |
426 | skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; | |
57c67ff4 TH |
427 | uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, |
428 | iph->daddr, 0); | |
6db93ea1 TH |
429 | } else { |
430 | skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; | |
431 | } | |
57c67ff4 TH |
432 | |
433 | return udp_gro_complete(skb, nhoff); | |
434 | } | |
435 | ||
da5bab07 DB |
436 | static const struct net_offload udpv4_offload = { |
437 | .callbacks = { | |
da5bab07 | 438 | .gso_segment = udp4_ufo_fragment, |
57c67ff4 TH |
439 | .gro_receive = udp4_gro_receive, |
440 | .gro_complete = udp4_gro_complete, | |
da5bab07 DB |
441 | }, |
442 | }; | |
443 | ||
444 | int __init udpv4_offload_init(void) | |
445 | { | |
446 | return inet_add_offload(&udpv4_offload, IPPROTO_UDP); | |
447 | } |