]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv4/inet_lro.c
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux...
[mirror_ubuntu-artful-kernel.git] / net / ipv4 / inet_lro.c
CommitLineData
71c87e0c
JBT
1/*
2 * linux/net/ipv4/inet_lro.c
3 *
4 * Large Receive Offload (ipv4 / tcp)
5 *
6 * (C) Copyright IBM Corp. 2007
7 *
8 * Authors:
9 * Jan-Bernd Themann <themann@de.ibm.com>
10 * Christoph Raisch <raisch@de.ibm.com>
11 *
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2, or (at your option)
16 * any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 */
27
28
29#include <linux/module.h>
30#include <linux/if_vlan.h>
31#include <linux/inet_lro.h>
32
33MODULE_LICENSE("GPL");
34MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
35MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
36
37#define TCP_HDR_LEN(tcph) (tcph->doff << 2)
38#define IP_HDR_LEN(iph) (iph->ihl << 2)
39#define TCP_PAYLOAD_LENGTH(iph, tcph) \
40 (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
41
42#define IPH_LEN_WO_OPTIONS 5
43#define TCPH_LEN_WO_OPTIONS 5
44#define TCPH_LEN_W_TIMESTAMP 8
45
46#define LRO_MAX_PG_HLEN 64
47
48#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
49
50/*
51 * Basic tcp checks whether packet is suitable for LRO
52 */
53
b71d1d42
ED
54static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
55 int len, const struct net_lro_desc *lro_desc)
71c87e0c
JBT
56{
57 /* check ip header: don't aggregate padded frames */
58 if (ntohs(iph->tot_len) != len)
59 return -1;
60
61 if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
62 return -1;
63
64 if (iph->ihl != IPH_LEN_WO_OPTIONS)
65 return -1;
66
9d4fb27d
JP
67 if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
68 tcph->rst || tcph->syn || tcph->fin)
71c87e0c
JBT
69 return -1;
70
71 if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
72 return -1;
73
9d4fb27d
JP
74 if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
75 tcph->doff != TCPH_LEN_W_TIMESTAMP)
71c87e0c
JBT
76 return -1;
77
78 /* check tcp options (only timestamp allowed) */
79 if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
9df7c98a 80 __be32 *topt = (__be32 *)(tcph + 1);
71c87e0c
JBT
81
82 if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
83 | (TCPOPT_TIMESTAMP << 8)
84 | TCPOLEN_TIMESTAMP))
85 return -1;
86
87 /* timestamp should be in right order */
88 topt++;
89 if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
90 ntohl(*topt)))
91 return -1;
92
93 /* timestamp reply should not be zero */
94 topt++;
95 if (*topt == 0)
96 return -1;
97 }
98
99 return 0;
100}
101
102static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
103{
104 struct iphdr *iph = lro_desc->iph;
105 struct tcphdr *tcph = lro_desc->tcph;
9df7c98a 106 __be32 *p;
71c87e0c
JBT
107 __wsum tcp_hdr_csum;
108
109 tcph->ack_seq = lro_desc->tcp_ack;
110 tcph->window = lro_desc->tcp_window;
111
112 if (lro_desc->tcp_saw_tstamp) {
9df7c98a 113 p = (__be32 *)(tcph + 1);
71c87e0c
JBT
114 *(p+2) = lro_desc->tcp_rcv_tsecr;
115 }
116
117 iph->tot_len = htons(lro_desc->ip_tot_len);
118
119 iph->check = 0;
120 iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl);
121
122 tcph->check = 0;
07f0757a 123 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
71c87e0c
JBT
124 lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
125 tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
126 lro_desc->ip_tot_len -
127 IP_HDR_LEN(iph), IPPROTO_TCP,
128 lro_desc->data_csum);
129}
130
131static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
132{
133 __wsum tcp_csum;
134 __wsum tcp_hdr_csum;
135 __wsum tcp_ps_hdr_csum;
136
137 tcp_csum = ~csum_unfold(tcph->check);
07f0757a 138 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
71c87e0c
JBT
139
140 tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
141 len + TCP_HDR_LEN(tcph),
142 IPPROTO_TCP, 0);
143
144 return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
145 tcp_ps_hdr_csum);
146}
147
148static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
9fea0330 149 struct iphdr *iph, struct tcphdr *tcph)
71c87e0c
JBT
150{
151 int nr_frags;
9df7c98a 152 __be32 *ptr;
71c87e0c
JBT
153 u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
154
155 nr_frags = skb_shinfo(skb)->nr_frags;
156 lro_desc->parent = skb;
157 lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
158 lro_desc->iph = iph;
159 lro_desc->tcph = tcph;
160 lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
f53f4137 161 lro_desc->tcp_ack = tcph->ack_seq;
71c87e0c
JBT
162 lro_desc->tcp_window = tcph->window;
163
164 lro_desc->pkt_aggr_cnt = 1;
165 lro_desc->ip_tot_len = ntohs(iph->tot_len);
166
167 if (tcph->doff == 8) {
9df7c98a 168 ptr = (__be32 *)(tcph+1);
71c87e0c
JBT
169 lro_desc->tcp_saw_tstamp = 1;
170 lro_desc->tcp_rcv_tsval = *(ptr+1);
171 lro_desc->tcp_rcv_tsecr = *(ptr+2);
172 }
173
174 lro_desc->mss = tcp_data_len;
71c87e0c
JBT
175 lro_desc->active = 1;
176
177 lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
178 tcp_data_len);
179}
180
181static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
182{
183 memset(lro_desc, 0, sizeof(struct net_lro_desc));
184}
185
186static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
187 struct tcphdr *tcph, int tcp_data_len)
188{
189 struct sk_buff *parent = lro_desc->parent;
9df7c98a 190 __be32 *topt;
71c87e0c
JBT
191
192 lro_desc->pkt_aggr_cnt++;
193 lro_desc->ip_tot_len += tcp_data_len;
194 lro_desc->tcp_next_seq += tcp_data_len;
195 lro_desc->tcp_window = tcph->window;
196 lro_desc->tcp_ack = tcph->ack_seq;
197
198 /* don't update tcp_rcv_tsval, would not work with PAWS */
199 if (lro_desc->tcp_saw_tstamp) {
9df7c98a 200 topt = (__be32 *) (tcph + 1);
71c87e0c
JBT
201 lro_desc->tcp_rcv_tsecr = *(topt + 2);
202 }
203
204 lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
205 lro_tcp_data_csum(iph, tcph,
206 tcp_data_len),
207 parent->len);
208
209 parent->len += tcp_data_len;
210 parent->data_len += tcp_data_len;
211 if (tcp_data_len > lro_desc->mss)
212 lro_desc->mss = tcp_data_len;
213}
214
215static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
216 struct iphdr *iph, struct tcphdr *tcph)
217{
218 struct sk_buff *parent = lro_desc->parent;
219 int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
220
221 lro_add_common(lro_desc, iph, tcph, tcp_data_len);
222
223 skb_pull(skb, (skb->len - tcp_data_len));
224 parent->truesize += skb->truesize;
225
226 if (lro_desc->last_skb)
227 lro_desc->last_skb->next = skb;
228 else
229 skb_shinfo(parent)->frag_list = skb;
230
231 lro_desc->last_skb = skb;
232}
233
234static void lro_add_frags(struct net_lro_desc *lro_desc,
235 int len, int hlen, int truesize,
236 struct skb_frag_struct *skb_frags,
237 struct iphdr *iph, struct tcphdr *tcph)
238{
239 struct sk_buff *skb = lro_desc->parent;
240 int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
241
242 lro_add_common(lro_desc, iph, tcph, tcp_data_len);
243
244 skb->truesize += truesize;
245
246 skb_frags[0].page_offset += hlen;
9e903e08 247 skb_frag_size_sub(&skb_frags[0], hlen);
71c87e0c
JBT
248
249 while (tcp_data_len > 0) {
250 *(lro_desc->next_frag) = *skb_frags;
9e903e08 251 tcp_data_len -= skb_frag_size(skb_frags);
71c87e0c
JBT
252 lro_desc->next_frag++;
253 skb_frags++;
254 skb_shinfo(skb)->nr_frags++;
255 }
256}
257
258static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
259 struct iphdr *iph,
260 struct tcphdr *tcph)
261{
9d4fb27d
JP
262 if ((lro_desc->iph->saddr != iph->saddr) ||
263 (lro_desc->iph->daddr != iph->daddr) ||
264 (lro_desc->tcph->source != tcph->source) ||
265 (lro_desc->tcph->dest != tcph->dest))
71c87e0c
JBT
266 return -1;
267 return 0;
268}
269
270static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
271 struct net_lro_desc *lro_arr,
272 struct iphdr *iph,
273 struct tcphdr *tcph)
274{
275 struct net_lro_desc *lro_desc = NULL;
276 struct net_lro_desc *tmp;
277 int max_desc = lro_mgr->max_desc;
278 int i;
279
280 for (i = 0; i < max_desc; i++) {
281 tmp = &lro_arr[i];
282 if (tmp->active)
283 if (!lro_check_tcp_conn(tmp, iph, tcph)) {
284 lro_desc = tmp;
285 goto out;
286 }
287 }
288
289 for (i = 0; i < max_desc; i++) {
290 if (!lro_arr[i].active) {
291 lro_desc = &lro_arr[i];
292 goto out;
293 }
294 }
295
296 LRO_INC_STATS(lro_mgr, no_desc);
297out:
298 return lro_desc;
299}
300
301static void lro_flush(struct net_lro_mgr *lro_mgr,
302 struct net_lro_desc *lro_desc)
303{
304 if (lro_desc->pkt_aggr_cnt > 1)
305 lro_update_tcp_ip_header(lro_desc);
306
307 skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
308
9fea0330
JP
309 if (lro_mgr->features & LRO_F_NAPI)
310 netif_receive_skb(lro_desc->parent);
311 else
312 netif_rx(lro_desc->parent);
71c87e0c
JBT
313
314 LRO_INC_STATS(lro_mgr, flushed);
315 lro_clear_desc(lro_desc);
316}
317
318static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
9fea0330 319 void *priv)
71c87e0c
JBT
320{
321 struct net_lro_desc *lro_desc;
322 struct iphdr *iph;
323 struct tcphdr *tcph;
324 u64 flags;
325 int vlan_hdr_len = 0;
326
9d4fb27d
JP
327 if (!lro_mgr->get_skb_header ||
328 lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
329 &flags, priv))
71c87e0c
JBT
330 goto out;
331
332 if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
333 goto out;
334
335 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
336 if (!lro_desc)
337 goto out;
338
9d4fb27d
JP
339 if ((skb->protocol == htons(ETH_P_8021Q)) &&
340 !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
71c87e0c
JBT
341 vlan_hdr_len = VLAN_HLEN;
342
343 if (!lro_desc->active) { /* start new lro session */
344 if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
345 goto out;
346
347 skb->ip_summed = lro_mgr->ip_summed_aggr;
9fea0330 348 lro_init_desc(lro_desc, skb, iph, tcph);
71c87e0c
JBT
349 LRO_INC_STATS(lro_mgr, aggregated);
350 return 0;
351 }
352
353 if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
354 goto out2;
355
356 if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
357 goto out2;
358
359 lro_add_packet(lro_desc, skb, iph, tcph);
360 LRO_INC_STATS(lro_mgr, aggregated);
361
362 if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
363 lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
364 lro_flush(lro_mgr, lro_desc);
365
366 return 0;
367
368out2: /* send aggregated SKBs to stack */
369 lro_flush(lro_mgr, lro_desc);
370
251a4b32 371out:
71c87e0c
JBT
372 return 1;
373}
374
375
376static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr,
377 struct skb_frag_struct *frags,
378 int len, int true_size,
379 void *mac_hdr,
380 int hlen, __wsum sum,
381 u32 ip_summed)
382{
383 struct sk_buff *skb;
384 struct skb_frag_struct *skb_frags;
385 int data_len = len;
386 int hdr_len = min(len, hlen);
387
621544eb 388 skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad);
71c87e0c
JBT
389 if (!skb)
390 return NULL;
391
621544eb 392 skb_reserve(skb, lro_mgr->frag_align_pad);
71c87e0c
JBT
393 skb->len = len;
394 skb->data_len = len - hdr_len;
395 skb->truesize += true_size;
396 skb->tail += hdr_len;
397
398 memcpy(skb->data, mac_hdr, hdr_len);
399
400 skb_frags = skb_shinfo(skb)->frags;
401 while (data_len > 0) {
402 *skb_frags = *frags;
9e903e08 403 data_len -= skb_frag_size(frags);
71c87e0c
JBT
404 skb_frags++;
405 frags++;
406 skb_shinfo(skb)->nr_frags++;
407 }
408
409 skb_shinfo(skb)->frags[0].page_offset += hdr_len;
9e903e08 410 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len);
71c87e0c
JBT
411
412 skb->ip_summed = ip_summed;
413 skb->csum = sum;
414 skb->protocol = eth_type_trans(skb, lro_mgr->dev);
415 return skb;
416}
417
418static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr,
419 struct skb_frag_struct *frags,
420 int len, int true_size,
9fea0330 421 void *priv, __wsum sum)
71c87e0c
JBT
422{
423 struct net_lro_desc *lro_desc;
424 struct iphdr *iph;
425 struct tcphdr *tcph;
426 struct sk_buff *skb;
427 u64 flags;
428 void *mac_hdr;
429 int mac_hdr_len;
430 int hdr_len = LRO_MAX_PG_HLEN;
431 int vlan_hdr_len = 0;
432
9d4fb27d
JP
433 if (!lro_mgr->get_frag_header ||
434 lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph,
435 (void *)&tcph, &flags, priv)) {
aff65da0 436 mac_hdr = skb_frag_address(frags);
71c87e0c
JBT
437 goto out1;
438 }
439
440 if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
441 goto out1;
442
443 hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr);
444 mac_hdr_len = (int)((void *)(iph) - mac_hdr);
445
446 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
447 if (!lro_desc)
448 goto out1;
449
450 if (!lro_desc->active) { /* start new lro session */
451 if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL))
452 goto out1;
453
454 skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
455 hdr_len, 0, lro_mgr->ip_summed_aggr);
456 if (!skb)
457 goto out;
458
9d4fb27d
JP
459 if ((skb->protocol == htons(ETH_P_8021Q)) &&
460 !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
71c87e0c
JBT
461 vlan_hdr_len = VLAN_HLEN;
462
463 iph = (void *)(skb->data + vlan_hdr_len);
464 tcph = (void *)((u8 *)skb->data + vlan_hdr_len
465 + IP_HDR_LEN(iph));
466
9fea0330 467 lro_init_desc(lro_desc, skb, iph, tcph);
71c87e0c 468 LRO_INC_STATS(lro_mgr, aggregated);
cfcabdcc 469 return NULL;
71c87e0c
JBT
470 }
471
472 if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
473 goto out2;
474
475 if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc))
476 goto out2;
477
478 lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph);
479 LRO_INC_STATS(lro_mgr, aggregated);
480
481 if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) ||
482 lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
483 lro_flush(lro_mgr, lro_desc);
484
485 return NULL;
486
487out2: /* send aggregated packets to the stack */
488 lro_flush(lro_mgr, lro_desc);
489
490out1: /* Original packet has to be posted to the stack */
491 skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr,
492 hdr_len, sum, lro_mgr->ip_summed);
493out:
494 return skb;
495}
496
497void lro_receive_skb(struct net_lro_mgr *lro_mgr,
498 struct sk_buff *skb,
499 void *priv)
500{
9fea0330 501 if (__lro_proc_skb(lro_mgr, skb, priv)) {
877364e6 502 if (lro_mgr->features & LRO_F_NAPI)
71c87e0c
JBT
503 netif_receive_skb(skb);
504 else
505 netif_rx(skb);
506 }
507}
508EXPORT_SYMBOL(lro_receive_skb);
509
71c87e0c
JBT
510void lro_receive_frags(struct net_lro_mgr *lro_mgr,
511 struct skb_frag_struct *frags,
512 int len, int true_size, void *priv, __wsum sum)
513{
514 struct sk_buff *skb;
515
9fea0330 516 skb = __lro_proc_segment(lro_mgr, frags, len, true_size, priv, sum);
71c87e0c
JBT
517 if (!skb)
518 return;
519
877364e6 520 if (lro_mgr->features & LRO_F_NAPI)
71c87e0c
JBT
521 netif_receive_skb(skb);
522 else
523 netif_rx(skb);
524}
525EXPORT_SYMBOL(lro_receive_frags);
526
71c87e0c
JBT
527void lro_flush_all(struct net_lro_mgr *lro_mgr)
528{
529 int i;
530 struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
531
532 for (i = 0; i < lro_mgr->max_desc; i++) {
533 if (lro_desc[i].active)
534 lro_flush(lro_mgr, &lro_desc[i]);
535 }
536}
537EXPORT_SYMBOL(lro_flush_all);
538
539void lro_flush_pkt(struct net_lro_mgr *lro_mgr,
540 struct iphdr *iph, struct tcphdr *tcph)
541{
542 struct net_lro_desc *lro_desc;
543
544 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
545 if (lro_desc->active)
546 lro_flush(lro_mgr, lro_desc);
547}
548EXPORT_SYMBOL(lro_flush_pkt);