]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blame - net/ipv4/tcp_ipv4.c
usb: asix: Patch for Sitecom LN-031
[mirror_ubuntu-eoan-kernel.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
1da177e4 53
eb4dea58 54#include <linux/bottom_half.h>
1da177e4
LT
55#include <linux/types.h>
56#include <linux/fcntl.h>
57#include <linux/module.h>
58#include <linux/random.h>
59#include <linux/cache.h>
60#include <linux/jhash.h>
61#include <linux/init.h>
62#include <linux/times.h>
5a0e3ad6 63#include <linux/slab.h>
1da177e4 64
457c4cbc 65#include <net/net_namespace.h>
1da177e4 66#include <net/icmp.h>
304a1618 67#include <net/inet_hashtables.h>
1da177e4 68#include <net/tcp.h>
20380731 69#include <net/transp_v6.h>
1da177e4
LT
70#include <net/ipv6.h>
71#include <net/inet_common.h>
6d6ee43e 72#include <net/timewait_sock.h>
1da177e4 73#include <net/xfrm.h>
1a2449a8 74#include <net/netdma.h>
6e5714ea 75#include <net/secure_seq.h>
d1a4c0b3 76#include <net/tcp_memcontrol.h>
1da177e4
LT
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83
cfb6eeb4
YH
84#include <linux/crypto.h>
85#include <linux/scatterlist.h>
86
ab32ea5d
BH
87int sysctl_tcp_tw_reuse __read_mostly;
88int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 89EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 90
1da177e4 91
cfb6eeb4 92#ifdef CONFIG_TCP_MD5SIG
7174259e
ACM
93static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
94 __be32 addr);
49a72dfb 95static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa 96 __be32 daddr, __be32 saddr, const struct tcphdr *th);
9501f972
YH
97#else
98static inline
99struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
100{
101 return NULL;
102}
cfb6eeb4
YH
103#endif
104
5caea4ea 105struct inet_hashinfo tcp_hashinfo;
4bc2f18b 106EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 107
cf533ea5 108static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 109{
eddc9ec5
ACM
110 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
111 ip_hdr(skb)->saddr,
aa8223c7
ACM
112 tcp_hdr(skb)->dest,
113 tcp_hdr(skb)->source);
1da177e4
LT
114}
115
6d6ee43e
ACM
116int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
117{
118 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
119 struct tcp_sock *tp = tcp_sk(sk);
120
121 /* With PAWS, it is safe from the viewpoint
122 of data integrity. Even without PAWS it is safe provided sequence
123 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
124
125 Actually, the idea is close to VJ's one, only timestamp cache is
126 held not per host, but per port pair and TW bucket is used as state
127 holder.
128
129 If TW bucket has been already destroyed we fall back to VJ's scheme
130 and use initial timestamp retrieved from peer table.
131 */
132 if (tcptw->tw_ts_recent_stamp &&
133 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 134 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
135 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
136 if (tp->write_seq == 0)
137 tp->write_seq = 1;
138 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
139 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
140 sock_hold(sktw);
141 return 1;
142 }
143
144 return 0;
145}
6d6ee43e
ACM
146EXPORT_SYMBOL_GPL(tcp_twsk_unique);
147
1da177e4
LT
148/* This will initiate an outgoing connection. */
149int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
150{
2d7192d6 151 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
152 struct inet_sock *inet = inet_sk(sk);
153 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 154 __be16 orig_sport, orig_dport;
bada8adc 155 __be32 daddr, nexthop;
da905bd1 156 struct flowi4 *fl4;
2d7192d6 157 struct rtable *rt;
1da177e4 158 int err;
f6d8bd05 159 struct ip_options_rcu *inet_opt;
1da177e4
LT
160
161 if (addr_len < sizeof(struct sockaddr_in))
162 return -EINVAL;
163
164 if (usin->sin_family != AF_INET)
165 return -EAFNOSUPPORT;
166
167 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
168 inet_opt = rcu_dereference_protected(inet->inet_opt,
169 sock_owned_by_user(sk));
170 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
171 if (!daddr)
172 return -EINVAL;
f6d8bd05 173 nexthop = inet_opt->opt.faddr;
1da177e4
LT
174 }
175
dca8b089
DM
176 orig_sport = inet->inet_sport;
177 orig_dport = usin->sin_port;
da905bd1
DM
178 fl4 = &inet->cork.fl.u.ip4;
179 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
180 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
181 IPPROTO_TCP,
182 orig_sport, orig_dport, sk, true);
183 if (IS_ERR(rt)) {
184 err = PTR_ERR(rt);
185 if (err == -ENETUNREACH)
7c73a6fa 186 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 187 return err;
584bdf8c 188 }
1da177e4
LT
189
190 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
191 ip_rt_put(rt);
192 return -ENETUNREACH;
193 }
194
f6d8bd05 195 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 196 daddr = fl4->daddr;
1da177e4 197
c720c7e8 198 if (!inet->inet_saddr)
da905bd1 199 inet->inet_saddr = fl4->saddr;
c720c7e8 200 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 201
c720c7e8 202 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
203 /* Reset inherited state */
204 tp->rx_opt.ts_recent = 0;
205 tp->rx_opt.ts_recent_stamp = 0;
206 tp->write_seq = 0;
207 }
208
295ff7ed 209 if (tcp_death_row.sysctl_tw_recycle &&
da905bd1 210 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
ed2361e6 211 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
7174259e
ACM
212 /*
213 * VJ's idea. We save last timestamp seen from
214 * the destination in peer table, when entering state
215 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
216 * when trying new connection.
1da177e4 217 */
317fe0e6
ED
218 if (peer) {
219 inet_peer_refcheck(peer);
220 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
221 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
222 tp->rx_opt.ts_recent = peer->tcp_ts;
223 }
1da177e4
LT
224 }
225 }
226
c720c7e8
ED
227 inet->inet_dport = usin->sin_port;
228 inet->inet_daddr = daddr;
1da177e4 229
d83d8461 230 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
231 if (inet_opt)
232 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 233
bee7ca9e 234 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
235
236 /* Socket identity is still unknown (sport may be zero).
237 * However we set state to SYN-SENT and not releasing socket
238 * lock select source port, enter ourselves into the hash tables and
239 * complete initialization after this.
240 */
241 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 242 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
243 if (err)
244 goto failure;
245
da905bd1 246 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
247 inet->inet_sport, inet->inet_dport, sk);
248 if (IS_ERR(rt)) {
249 err = PTR_ERR(rt);
250 rt = NULL;
1da177e4 251 goto failure;
b23dd4fe 252 }
1da177e4 253 /* OK, now commit destination to socket. */
bcd76111 254 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 255 sk_setup_caps(sk, &rt->dst);
1da177e4
LT
256
257 if (!tp->write_seq)
c720c7e8
ED
258 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
259 inet->inet_daddr,
260 inet->inet_sport,
1da177e4
LT
261 usin->sin_port);
262
c720c7e8 263 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4
LT
264
265 err = tcp_connect(sk);
266 rt = NULL;
267 if (err)
268 goto failure;
269
270 return 0;
271
272failure:
7174259e
ACM
273 /*
274 * This unhashes the socket and releases the local port,
275 * if necessary.
276 */
1da177e4
LT
277 tcp_set_state(sk, TCP_CLOSE);
278 ip_rt_put(rt);
279 sk->sk_route_caps = 0;
c720c7e8 280 inet->inet_dport = 0;
1da177e4
LT
281 return err;
282}
4bc2f18b 283EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 284
1da177e4
LT
285/*
286 * This routine does path mtu discovery as defined in RFC1191.
287 */
b71d1d42 288static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
1da177e4
LT
289{
290 struct dst_entry *dst;
291 struct inet_sock *inet = inet_sk(sk);
1da177e4
LT
292
293 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
294 * send out by Linux are always <576bytes so they should go through
295 * unfragmented).
296 */
297 if (sk->sk_state == TCP_LISTEN)
298 return;
299
300 /* We don't check in the destentry if pmtu discovery is forbidden
301 * on this route. We just assume that no packet_to_big packets
302 * are send back when pmtu discovery is not active.
e905a9ed 303 * There is a small race when the user changes this flag in the
1da177e4
LT
304 * route, but I think that's acceptable.
305 */
306 if ((dst = __sk_dst_check(sk, 0)) == NULL)
307 return;
308
309 dst->ops->update_pmtu(dst, mtu);
310
311 /* Something is about to be wrong... Remember soft error
312 * for the case, if this connection will not able to recover.
313 */
314 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
315 sk->sk_err_soft = EMSGSIZE;
316
317 mtu = dst_mtu(dst);
318
319 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 320 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
321 tcp_sync_mss(sk, mtu);
322
323 /* Resend the TCP packet because it's
324 * clear that the old packet has been
325 * dropped. This is the new "fast" path mtu
326 * discovery.
327 */
328 tcp_simple_retransmit(sk);
329 } /* else let the usual retransmit timer handle it */
330}
331
332/*
333 * This routine is called by the ICMP module when it gets some
334 * sort of error condition. If err < 0 then the socket should
335 * be closed and the error returned to the user. If err > 0
336 * it's just the icmp type << 8 | icmp code. After adjustment
337 * header points to the first 8 bytes of the tcp header. We need
338 * to find the appropriate port.
339 *
340 * The locking strategy used here is very "optimistic". When
341 * someone else accesses the socket the ICMP is just dropped
342 * and for some paths there is no check at all.
343 * A more general error queue to queue errors for later handling
344 * is probably better.
345 *
346 */
347
4d1a2d9e 348void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 349{
b71d1d42 350 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 351 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 352 struct inet_connection_sock *icsk;
1da177e4
LT
353 struct tcp_sock *tp;
354 struct inet_sock *inet;
4d1a2d9e
DL
355 const int type = icmp_hdr(icmp_skb)->type;
356 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 357 struct sock *sk;
f1ecd5d9 358 struct sk_buff *skb;
1da177e4 359 __u32 seq;
f1ecd5d9 360 __u32 remaining;
1da177e4 361 int err;
4d1a2d9e 362 struct net *net = dev_net(icmp_skb->dev);
1da177e4 363
4d1a2d9e 364 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 365 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
366 return;
367 }
368
fd54d716 369 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 370 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 371 if (!sk) {
dcfc23ca 372 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
373 return;
374 }
375 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 376 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
377 return;
378 }
379
380 bh_lock_sock(sk);
381 /* If too many ICMPs get dropped on busy
382 * servers this needs to be solved differently.
383 */
384 if (sock_owned_by_user(sk))
de0744af 385 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
1da177e4
LT
386
387 if (sk->sk_state == TCP_CLOSE)
388 goto out;
389
97e3ecd1 390 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
391 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
392 goto out;
393 }
394
f1ecd5d9 395 icsk = inet_csk(sk);
1da177e4
LT
396 tp = tcp_sk(sk);
397 seq = ntohl(th->seq);
398 if (sk->sk_state != TCP_LISTEN &&
399 !between(seq, tp->snd_una, tp->snd_nxt)) {
de0744af 400 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
401 goto out;
402 }
403
404 switch (type) {
405 case ICMP_SOURCE_QUENCH:
406 /* Just silently ignore these. */
407 goto out;
408 case ICMP_PARAMETERPROB:
409 err = EPROTO;
410 break;
411 case ICMP_DEST_UNREACH:
412 if (code > NR_ICMP_UNREACH)
413 goto out;
414
415 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
416 if (!sock_owned_by_user(sk))
417 do_pmtu_discovery(sk, iph, info);
418 goto out;
419 }
420
421 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
422 /* check if icmp_skb allows revert of backoff
423 * (see draft-zimmermann-tcp-lcd) */
424 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
425 break;
426 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
427 !icsk->icsk_backoff)
428 break;
429
8f49c270
DM
430 if (sock_owned_by_user(sk))
431 break;
432
f1ecd5d9 433 icsk->icsk_backoff--;
9ad7c049
JC
434 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
435 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
436 tcp_bound_rto(sk);
437
438 skb = tcp_write_queue_head(sk);
439 BUG_ON(!skb);
440
441 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
442 tcp_time_stamp - TCP_SKB_CB(skb)->when);
443
444 if (remaining) {
445 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
446 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
447 } else {
448 /* RTO revert clocked out retransmission.
449 * Will retransmit now */
450 tcp_retransmit_timer(sk);
451 }
452
1da177e4
LT
453 break;
454 case ICMP_TIME_EXCEEDED:
455 err = EHOSTUNREACH;
456 break;
457 default:
458 goto out;
459 }
460
461 switch (sk->sk_state) {
60236fdd 462 struct request_sock *req, **prev;
1da177e4
LT
463 case TCP_LISTEN:
464 if (sock_owned_by_user(sk))
465 goto out;
466
463c84b9
ACM
467 req = inet_csk_search_req(sk, &prev, th->dest,
468 iph->daddr, iph->saddr);
1da177e4
LT
469 if (!req)
470 goto out;
471
472 /* ICMPs are not backlogged, hence we cannot get
473 an established socket here.
474 */
547b792c 475 WARN_ON(req->sk);
1da177e4 476
2e6599cb 477 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 478 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
479 goto out;
480 }
481
482 /*
483 * Still in SYN_RECV, just remove it silently.
484 * There is no good way to pass the error to the newly
485 * created socket, and POSIX does not want network
486 * errors returned from accept().
487 */
463c84b9 488 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
489 goto out;
490
491 case TCP_SYN_SENT:
492 case TCP_SYN_RECV: /* Cannot happen.
493 It can f.e. if SYNs crossed.
494 */
495 if (!sock_owned_by_user(sk)) {
1da177e4
LT
496 sk->sk_err = err;
497
498 sk->sk_error_report(sk);
499
500 tcp_done(sk);
501 } else {
502 sk->sk_err_soft = err;
503 }
504 goto out;
505 }
506
507 /* If we've already connected we will keep trying
508 * until we time out, or the user gives up.
509 *
510 * rfc1122 4.2.3.9 allows to consider as hard errors
511 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
512 * but it is obsoleted by pmtu discovery).
513 *
514 * Note, that in modern internet, where routing is unreliable
515 * and in each dark corner broken firewalls sit, sending random
516 * errors ordered by their masters even this two messages finally lose
517 * their original sense (even Linux sends invalid PORT_UNREACHs)
518 *
519 * Now we are in compliance with RFCs.
520 * --ANK (980905)
521 */
522
523 inet = inet_sk(sk);
524 if (!sock_owned_by_user(sk) && inet->recverr) {
525 sk->sk_err = err;
526 sk->sk_error_report(sk);
527 } else { /* Only an error on timeout */
528 sk->sk_err_soft = err;
529 }
530
531out:
532 bh_unlock_sock(sk);
533 sock_put(sk);
534}
535
419f9f89
HX
536static void __tcp_v4_send_check(struct sk_buff *skb,
537 __be32 saddr, __be32 daddr)
1da177e4 538{
aa8223c7 539 struct tcphdr *th = tcp_hdr(skb);
1da177e4 540
84fa7933 541 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 542 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 543 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 544 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 545 } else {
419f9f89 546 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 547 csum_partial(th,
1da177e4
LT
548 th->doff << 2,
549 skb->csum));
550 }
551}
552
419f9f89 553/* This routine computes an IPv4 TCP checksum. */
bb296246 554void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 555{
cf533ea5 556 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
557
558 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
559}
4bc2f18b 560EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 561
a430a43d
HX
562int tcp_v4_gso_send_check(struct sk_buff *skb)
563{
eddc9ec5 564 const struct iphdr *iph;
a430a43d
HX
565 struct tcphdr *th;
566
567 if (!pskb_may_pull(skb, sizeof(*th)))
568 return -EINVAL;
569
eddc9ec5 570 iph = ip_hdr(skb);
aa8223c7 571 th = tcp_hdr(skb);
a430a43d
HX
572
573 th->check = 0;
84fa7933 574 skb->ip_summed = CHECKSUM_PARTIAL;
419f9f89 575 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
a430a43d
HX
576 return 0;
577}
578
1da177e4
LT
579/*
580 * This routine will send an RST to the other tcp.
581 *
582 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
583 * for reset.
584 * Answer: if a packet caused RST, it is not for a socket
585 * existing in our system, if it is matched to a socket,
586 * it is just duplicate segment or bug in other side's TCP.
587 * So that we build reply only basing on parameters
588 * arrived with segment.
589 * Exception: precedence violation. We do not implement it in any case.
590 */
591
cfb6eeb4 592static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 593{
cf533ea5 594 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
595 struct {
596 struct tcphdr th;
597#ifdef CONFIG_TCP_MD5SIG
714e85be 598 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
599#endif
600 } rep;
1da177e4 601 struct ip_reply_arg arg;
cfb6eeb4
YH
602#ifdef CONFIG_TCP_MD5SIG
603 struct tcp_md5sig_key *key;
604#endif
a86b1e30 605 struct net *net;
1da177e4
LT
606
607 /* Never send a reset in response to a reset. */
608 if (th->rst)
609 return;
610
511c3f92 611 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
612 return;
613
614 /* Swap the send and the receive. */
cfb6eeb4
YH
615 memset(&rep, 0, sizeof(rep));
616 rep.th.dest = th->source;
617 rep.th.source = th->dest;
618 rep.th.doff = sizeof(struct tcphdr) / 4;
619 rep.th.rst = 1;
1da177e4
LT
620
621 if (th->ack) {
cfb6eeb4 622 rep.th.seq = th->ack_seq;
1da177e4 623 } else {
cfb6eeb4
YH
624 rep.th.ack = 1;
625 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
626 skb->len - (th->doff << 2));
1da177e4
LT
627 }
628
7174259e 629 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
630 arg.iov[0].iov_base = (unsigned char *)&rep;
631 arg.iov[0].iov_len = sizeof(rep.th);
632
633#ifdef CONFIG_TCP_MD5SIG
8a622e71 634 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
cfb6eeb4
YH
635 if (key) {
636 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
637 (TCPOPT_NOP << 16) |
638 (TCPOPT_MD5SIG << 8) |
639 TCPOLEN_MD5SIG);
640 /* Update length and the length the header thinks exists */
641 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
642 rep.th.doff = arg.iov[0].iov_len / 4;
643
49a72dfb 644 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
645 key, ip_hdr(skb)->saddr,
646 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
647 }
648#endif
eddc9ec5
ACM
649 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
650 ip_hdr(skb)->saddr, /* XXX */
52cd5750 651 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 652 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 653 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa
SL
654 /* When socket is gone, all binding information is lost.
655 * routing might fail in this case. using iif for oif to
656 * make sure we can deliver it
657 */
658 arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
1da177e4 659
adf30907 660 net = dev_net(skb_dst(skb)->dev);
66b13d99 661 arg.tos = ip_hdr(skb)->tos;
0a5ebb80 662 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7feb49c8 663 &arg, arg.iov[0].iov_len);
1da177e4 664
63231bdd
PE
665 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
666 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1da177e4
LT
667}
668
669/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
670 outside socket context is ugly, certainly. What can I do?
671 */
672
9501f972
YH
673static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
674 u32 win, u32 ts, int oif,
88ef4a5a 675 struct tcp_md5sig_key *key,
66b13d99 676 int reply_flags, u8 tos)
1da177e4 677{
cf533ea5 678 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
679 struct {
680 struct tcphdr th;
714e85be 681 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 682#ifdef CONFIG_TCP_MD5SIG
714e85be 683 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
684#endif
685 ];
1da177e4
LT
686 } rep;
687 struct ip_reply_arg arg;
adf30907 688 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
689
690 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 691 memset(&arg, 0, sizeof(arg));
1da177e4
LT
692
693 arg.iov[0].iov_base = (unsigned char *)&rep;
694 arg.iov[0].iov_len = sizeof(rep.th);
695 if (ts) {
cfb6eeb4
YH
696 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
697 (TCPOPT_TIMESTAMP << 8) |
698 TCPOLEN_TIMESTAMP);
699 rep.opt[1] = htonl(tcp_time_stamp);
700 rep.opt[2] = htonl(ts);
cb48cfe8 701 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
702 }
703
704 /* Swap the send and the receive. */
705 rep.th.dest = th->source;
706 rep.th.source = th->dest;
707 rep.th.doff = arg.iov[0].iov_len / 4;
708 rep.th.seq = htonl(seq);
709 rep.th.ack_seq = htonl(ack);
710 rep.th.ack = 1;
711 rep.th.window = htons(win);
712
cfb6eeb4 713#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4
YH
714 if (key) {
715 int offset = (ts) ? 3 : 0;
716
717 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
718 (TCPOPT_NOP << 16) |
719 (TCPOPT_MD5SIG << 8) |
720 TCPOLEN_MD5SIG);
721 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
722 rep.th.doff = arg.iov[0].iov_len/4;
723
49a72dfb 724 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
725 key, ip_hdr(skb)->saddr,
726 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
727 }
728#endif
88ef4a5a 729 arg.flags = reply_flags;
eddc9ec5
ACM
730 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
731 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
732 arg.iov[0].iov_len, IPPROTO_TCP, 0);
733 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
734 if (oif)
735 arg.bound_dev_if = oif;
66b13d99 736 arg.tos = tos;
0a5ebb80 737 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7feb49c8 738 &arg, arg.iov[0].iov_len);
1da177e4 739
63231bdd 740 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
741}
742
743static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
744{
8feaf0c0 745 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 746 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 747
9501f972 748 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 749 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
9501f972
YH
750 tcptw->tw_ts_recent,
751 tw->tw_bound_dev_if,
88ef4a5a 752 tcp_twsk_md5_key(tcptw),
66b13d99
ED
753 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
754 tw->tw_tos
9501f972 755 );
1da177e4 756
8feaf0c0 757 inet_twsk_put(tw);
1da177e4
LT
758}
759
6edafaaf 760static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 761 struct request_sock *req)
1da177e4 762{
9501f972 763 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
cfb6eeb4 764 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
9501f972
YH
765 req->ts_recent,
766 0,
88ef4a5a 767 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
66b13d99
ED
768 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
769 ip_hdr(skb)->tos);
1da177e4
LT
770}
771
1da177e4 772/*
9bf1d83e 773 * Send a SYN-ACK after having received a SYN.
60236fdd 774 * This still operates on a request_sock only, not on a big
1da177e4
LT
775 * socket.
776 */
72659ecc
OP
777static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
778 struct request_sock *req,
779 struct request_values *rvp)
1da177e4 780{
2e6599cb 781 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 782 struct flowi4 fl4;
1da177e4
LT
783 int err = -1;
784 struct sk_buff * skb;
785
786 /* First, grab a route. */
6bd023f3 787 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 788 return -1;
1da177e4 789
e6b4d113 790 skb = tcp_make_synack(sk, dst, req, rvp);
1da177e4
LT
791
792 if (skb) {
419f9f89 793 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
1da177e4 794
2e6599cb
ACM
795 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
796 ireq->rmt_addr,
797 ireq->opt);
b9df3cb8 798 err = net_xmit_eval(err);
1da177e4
LT
799 }
800
1da177e4
LT
801 dst_release(dst);
802 return err;
803}
804
72659ecc 805static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
e6b4d113 806 struct request_values *rvp)
fd80eb94 807{
72659ecc
OP
808 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
809 return tcp_v4_send_synack(sk, NULL, req, rvp);
fd80eb94
DL
810}
811
1da177e4 812/*
60236fdd 813 * IPv4 request_sock destructor.
1da177e4 814 */
60236fdd 815static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 816{
a51482bd 817 kfree(inet_rsk(req)->opt);
1da177e4
LT
818}
819
946cedcc
ED
820/*
821 * Return 1 if a syncookie should be sent
822 */
823int tcp_syn_flood_action(struct sock *sk,
824 const struct sk_buff *skb,
825 const char *proto)
1da177e4 826{
946cedcc
ED
827 const char *msg = "Dropping request";
828 int want_cookie = 0;
829 struct listen_sock *lopt;
830
831
1da177e4 832
2a1d4bd4 833#ifdef CONFIG_SYN_COOKIES
946cedcc 834 if (sysctl_tcp_syncookies) {
2a1d4bd4 835 msg = "Sending cookies";
946cedcc
ED
836 want_cookie = 1;
837 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
838 } else
80e40daa 839#endif
946cedcc
ED
840 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
841
842 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
843 if (!lopt->synflood_warned) {
844 lopt->synflood_warned = 1;
845 pr_info("%s: Possible SYN flooding on port %d. %s. "
846 " Check SNMP counters.\n",
847 proto, ntohs(tcp_hdr(skb)->dest), msg);
848 }
849 return want_cookie;
2a1d4bd4 850}
946cedcc 851EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
852
853/*
60236fdd 854 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 855 */
f6d8bd05
ED
856static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
857 struct sk_buff *skb)
1da177e4 858{
f6d8bd05
ED
859 const struct ip_options *opt = &(IPCB(skb)->opt);
860 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
861
862 if (opt && opt->optlen) {
f6d8bd05
ED
863 int opt_size = sizeof(*dopt) + opt->optlen;
864
1da177e4
LT
865 dopt = kmalloc(opt_size, GFP_ATOMIC);
866 if (dopt) {
f6d8bd05 867 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
868 kfree(dopt);
869 dopt = NULL;
870 }
871 }
872 }
873 return dopt;
874}
875
cfb6eeb4
YH
876#ifdef CONFIG_TCP_MD5SIG
877/*
878 * RFC2385 MD5 checksumming requires a mapping of
879 * IP address->MD5 Key.
880 * We need to maintain these in the sk structure.
881 */
882
883/* Find the Key structure for an address. */
7174259e
ACM
884static struct tcp_md5sig_key *
885 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
cfb6eeb4
YH
886{
887 struct tcp_sock *tp = tcp_sk(sk);
888 int i;
889
890 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
891 return NULL;
892 for (i = 0; i < tp->md5sig_info->entries4; i++) {
893 if (tp->md5sig_info->keys4[i].addr == addr)
f8ab18d2 894 return &tp->md5sig_info->keys4[i].base;
cfb6eeb4
YH
895 }
896 return NULL;
897}
898
899struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
900 struct sock *addr_sk)
901{
c720c7e8 902 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
cfb6eeb4 903}
cfb6eeb4
YH
904EXPORT_SYMBOL(tcp_v4_md5_lookup);
905
f5b99bcd
AB
906static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
907 struct request_sock *req)
cfb6eeb4
YH
908{
909 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
910}
911
912/* This can be called on a newly created socket, from other files */
913int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
914 u8 *newkey, u8 newkeylen)
915{
916 /* Add Key to the list */
b0a713e9 917 struct tcp_md5sig_key *key;
cfb6eeb4
YH
918 struct tcp_sock *tp = tcp_sk(sk);
919 struct tcp4_md5sig_key *keys;
920
b0a713e9 921 key = tcp_v4_md5_do_lookup(sk, addr);
cfb6eeb4
YH
922 if (key) {
923 /* Pre-existing entry - just update that one. */
b0a713e9
MD
924 kfree(key->key);
925 key->key = newkey;
926 key->keylen = newkeylen;
cfb6eeb4 927 } else {
f6685938
ACM
928 struct tcp_md5sig_info *md5sig;
929
cfb6eeb4 930 if (!tp->md5sig_info) {
f6685938
ACM
931 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
932 GFP_ATOMIC);
cfb6eeb4
YH
933 if (!tp->md5sig_info) {
934 kfree(newkey);
935 return -ENOMEM;
936 }
a465419b 937 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
cfb6eeb4 938 }
260fcbeb
YZ
939
940 md5sig = tp->md5sig_info;
941 if (md5sig->entries4 == 0 &&
942 tcp_alloc_md5sig_pool(sk) == NULL) {
cfb6eeb4
YH
943 kfree(newkey);
944 return -ENOMEM;
945 }
f6685938
ACM
946
947 if (md5sig->alloced4 == md5sig->entries4) {
948 keys = kmalloc((sizeof(*keys) *
e905a9ed 949 (md5sig->entries4 + 1)), GFP_ATOMIC);
cfb6eeb4
YH
950 if (!keys) {
951 kfree(newkey);
260fcbeb
YZ
952 if (md5sig->entries4 == 0)
953 tcp_free_md5sig_pool();
cfb6eeb4
YH
954 return -ENOMEM;
955 }
956
f6685938
ACM
957 if (md5sig->entries4)
958 memcpy(keys, md5sig->keys4,
959 sizeof(*keys) * md5sig->entries4);
cfb6eeb4
YH
960
961 /* Free old key list, and reference new one */
a80cc20d 962 kfree(md5sig->keys4);
f6685938
ACM
963 md5sig->keys4 = keys;
964 md5sig->alloced4++;
cfb6eeb4 965 }
f6685938 966 md5sig->entries4++;
f8ab18d2
DM
967 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
968 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
969 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
cfb6eeb4
YH
970 }
971 return 0;
972}
cfb6eeb4
YH
973EXPORT_SYMBOL(tcp_v4_md5_do_add);
974
975static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
976 u8 *newkey, u8 newkeylen)
977{
c720c7e8 978 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->inet_daddr,
cfb6eeb4
YH
979 newkey, newkeylen);
980}
981
982int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
983{
984 struct tcp_sock *tp = tcp_sk(sk);
985 int i;
986
987 for (i = 0; i < tp->md5sig_info->entries4; i++) {
988 if (tp->md5sig_info->keys4[i].addr == addr) {
989 /* Free the key */
f8ab18d2 990 kfree(tp->md5sig_info->keys4[i].base.key);
cfb6eeb4
YH
991 tp->md5sig_info->entries4--;
992
993 if (tp->md5sig_info->entries4 == 0) {
994 kfree(tp->md5sig_info->keys4);
995 tp->md5sig_info->keys4 = NULL;
8228a18d 996 tp->md5sig_info->alloced4 = 0;
260fcbeb 997 tcp_free_md5sig_pool();
7174259e 998 } else if (tp->md5sig_info->entries4 != i) {
cfb6eeb4 999 /* Need to do some manipulation */
354faf09
YH
1000 memmove(&tp->md5sig_info->keys4[i],
1001 &tp->md5sig_info->keys4[i+1],
1002 (tp->md5sig_info->entries4 - i) *
1003 sizeof(struct tcp4_md5sig_key));
cfb6eeb4 1004 }
cfb6eeb4
YH
1005 return 0;
1006 }
1007 }
1008 return -ENOENT;
1009}
cfb6eeb4
YH
1010EXPORT_SYMBOL(tcp_v4_md5_do_del);
1011
7174259e 1012static void tcp_v4_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1013{
1014 struct tcp_sock *tp = tcp_sk(sk);
1015
1016 /* Free each key, then the set of key keys,
1017 * the crypto element, and then decrement our
1018 * hold on the last resort crypto.
1019 */
1020 if (tp->md5sig_info->entries4) {
1021 int i;
1022 for (i = 0; i < tp->md5sig_info->entries4; i++)
f8ab18d2 1023 kfree(tp->md5sig_info->keys4[i].base.key);
cfb6eeb4
YH
1024 tp->md5sig_info->entries4 = 0;
1025 tcp_free_md5sig_pool();
1026 }
1027 if (tp->md5sig_info->keys4) {
1028 kfree(tp->md5sig_info->keys4);
1029 tp->md5sig_info->keys4 = NULL;
1030 tp->md5sig_info->alloced4 = 0;
1031 }
1032}
1033
7174259e
ACM
1034static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1035 int optlen)
cfb6eeb4
YH
1036{
1037 struct tcp_md5sig cmd;
1038 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1039 u8 *newkey;
1040
1041 if (optlen < sizeof(cmd))
1042 return -EINVAL;
1043
7174259e 1044 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1045 return -EFAULT;
1046
1047 if (sin->sin_family != AF_INET)
1048 return -EINVAL;
1049
1050 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1051 if (!tcp_sk(sk)->md5sig_info)
1052 return -ENOENT;
1053 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1054 }
1055
1056 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1057 return -EINVAL;
1058
1059 if (!tcp_sk(sk)->md5sig_info) {
1060 struct tcp_sock *tp = tcp_sk(sk);
aa133076 1061 struct tcp_md5sig_info *p;
cfb6eeb4 1062
aa133076 1063 p = kzalloc(sizeof(*p), sk->sk_allocation);
cfb6eeb4
YH
1064 if (!p)
1065 return -EINVAL;
1066
1067 tp->md5sig_info = p;
a465419b 1068 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1069 }
1070
aa133076 1071 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
cfb6eeb4
YH
1072 if (!newkey)
1073 return -ENOMEM;
cfb6eeb4
YH
1074 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1075 newkey, cmd.tcpm_keylen);
1076}
1077
49a72dfb
AL
1078static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1079 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1080{
cfb6eeb4 1081 struct tcp4_pseudohdr *bp;
49a72dfb 1082 struct scatterlist sg;
cfb6eeb4
YH
1083
1084 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1085
1086 /*
49a72dfb 1087 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1088 * destination IP address, zero-padded protocol number, and
1089 * segment length)
1090 */
1091 bp->saddr = saddr;
1092 bp->daddr = daddr;
1093 bp->pad = 0;
076fb722 1094 bp->protocol = IPPROTO_TCP;
49a72dfb 1095 bp->len = cpu_to_be16(nbytes);
c7da57a1 1096
49a72dfb
AL
1097 sg_init_one(&sg, bp, sizeof(*bp));
1098 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1099}
1100
1101static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa 1102 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1103{
1104 struct tcp_md5sig_pool *hp;
1105 struct hash_desc *desc;
1106
1107 hp = tcp_get_md5sig_pool();
1108 if (!hp)
1109 goto clear_hash_noput;
1110 desc = &hp->md5_desc;
1111
1112 if (crypto_hash_init(desc))
1113 goto clear_hash;
1114 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1115 goto clear_hash;
1116 if (tcp_md5_hash_header(hp, th))
1117 goto clear_hash;
1118 if (tcp_md5_hash_key(hp, key))
1119 goto clear_hash;
1120 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1121 goto clear_hash;
1122
cfb6eeb4 1123 tcp_put_md5sig_pool();
cfb6eeb4 1124 return 0;
49a72dfb 1125
cfb6eeb4
YH
1126clear_hash:
1127 tcp_put_md5sig_pool();
1128clear_hash_noput:
1129 memset(md5_hash, 0, 16);
49a72dfb 1130 return 1;
cfb6eeb4
YH
1131}
1132
49a72dfb 1133int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1134 const struct sock *sk, const struct request_sock *req,
1135 const struct sk_buff *skb)
cfb6eeb4 1136{
49a72dfb
AL
1137 struct tcp_md5sig_pool *hp;
1138 struct hash_desc *desc;
318cf7aa 1139 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1140 __be32 saddr, daddr;
1141
1142 if (sk) {
c720c7e8
ED
1143 saddr = inet_sk(sk)->inet_saddr;
1144 daddr = inet_sk(sk)->inet_daddr;
49a72dfb
AL
1145 } else if (req) {
1146 saddr = inet_rsk(req)->loc_addr;
1147 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1148 } else {
49a72dfb
AL
1149 const struct iphdr *iph = ip_hdr(skb);
1150 saddr = iph->saddr;
1151 daddr = iph->daddr;
cfb6eeb4 1152 }
49a72dfb
AL
1153
1154 hp = tcp_get_md5sig_pool();
1155 if (!hp)
1156 goto clear_hash_noput;
1157 desc = &hp->md5_desc;
1158
1159 if (crypto_hash_init(desc))
1160 goto clear_hash;
1161
1162 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1163 goto clear_hash;
1164 if (tcp_md5_hash_header(hp, th))
1165 goto clear_hash;
1166 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1167 goto clear_hash;
1168 if (tcp_md5_hash_key(hp, key))
1169 goto clear_hash;
1170 if (crypto_hash_final(desc, md5_hash))
1171 goto clear_hash;
1172
1173 tcp_put_md5sig_pool();
1174 return 0;
1175
1176clear_hash:
1177 tcp_put_md5sig_pool();
1178clear_hash_noput:
1179 memset(md5_hash, 0, 16);
1180 return 1;
cfb6eeb4 1181}
49a72dfb 1182EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1183
318cf7aa 1184static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1185{
1186 /*
1187 * This gets called for each TCP segment that arrives
1188 * so we want to be efficient.
1189 * We have 3 drop cases:
1190 * o No MD5 hash and one expected.
1191 * o MD5 hash and we're not expecting one.
1192 * o MD5 hash and its wrong.
1193 */
cf533ea5 1194 const __u8 *hash_location = NULL;
cfb6eeb4 1195 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1196 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1197 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1198 int genhash;
cfb6eeb4
YH
1199 unsigned char newhash[16];
1200
1201 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
7d5d5525 1202 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1203
cfb6eeb4
YH
1204 /* We've parsed the options - do we have a hash? */
1205 if (!hash_expected && !hash_location)
1206 return 0;
1207
1208 if (hash_expected && !hash_location) {
785957d3 1209 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
cfb6eeb4
YH
1210 return 1;
1211 }
1212
1213 if (!hash_expected && hash_location) {
785957d3 1214 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
cfb6eeb4
YH
1215 return 1;
1216 }
1217
1218 /* Okay, so this is hash_expected and hash_location -
1219 * so we need to calculate the checksum.
1220 */
49a72dfb
AL
1221 genhash = tcp_v4_md5_hash_skb(newhash,
1222 hash_expected,
1223 NULL, NULL, skb);
cfb6eeb4
YH
1224
1225 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1226 if (net_ratelimit()) {
673d57e7
HH
1227 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1228 &iph->saddr, ntohs(th->source),
1229 &iph->daddr, ntohs(th->dest),
cfb6eeb4 1230 genhash ? " tcp_v4_calc_md5_hash failed" : "");
cfb6eeb4
YH
1231 }
1232 return 1;
1233 }
1234 return 0;
1235}
1236
1237#endif
1238
72a3effa 1239struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1240 .family = PF_INET,
2e6599cb 1241 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1242 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1243 .send_ack = tcp_v4_reqsk_send_ack,
1244 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1245 .send_reset = tcp_v4_send_reset,
72659ecc 1246 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1247};
1248
cfb6eeb4 1249#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1250static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1251 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1252 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1253};
b6332e6c 1254#endif
cfb6eeb4 1255
1da177e4
LT
1256int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1257{
4957faad 1258 struct tcp_extend_values tmp_ext;
1da177e4 1259 struct tcp_options_received tmp_opt;
cf533ea5 1260 const u8 *hash_location;
60236fdd 1261 struct request_sock *req;
e6b4d113 1262 struct inet_request_sock *ireq;
4957faad 1263 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1264 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1265 __be32 saddr = ip_hdr(skb)->saddr;
1266 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1267 __u32 isn = TCP_SKB_CB(skb)->when;
1da177e4 1268 int want_cookie = 0;
1da177e4
LT
1269
1270 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1271 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1272 goto drop;
1273
1274 /* TW buckets are converted to open requests without
1275 * limitations, they conserve resources and peer is
1276 * evidently real one.
1277 */
463c84b9 1278 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
946cedcc
ED
1279 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1280 if (!want_cookie)
1281 goto drop;
1da177e4
LT
1282 }
1283
1284 /* Accept backlog is full. If we have already queued enough
1285 * of warm entries in syn queue, drop request. It is better than
1286 * clogging syn queue with openreqs with exponentially increasing
1287 * timeout.
1288 */
463c84b9 1289 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1290 goto drop;
1291
ce4a7d0d 1292 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1293 if (!req)
1294 goto drop;
1295
cfb6eeb4
YH
1296#ifdef CONFIG_TCP_MD5SIG
1297 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1298#endif
1299
1da177e4 1300 tcp_clear_options(&tmp_opt);
bee7ca9e 1301 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1302 tmp_opt.user_mss = tp->rx_opt.user_mss;
bb5b7c11 1303 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
4957faad
WAS
1304
1305 if (tmp_opt.cookie_plus > 0 &&
1306 tmp_opt.saw_tstamp &&
1307 !tp->rx_opt.cookie_out_never &&
1308 (sysctl_tcp_cookie_size > 0 ||
1309 (tp->cookie_values != NULL &&
1310 tp->cookie_values->cookie_desired > 0))) {
1311 u8 *c;
1312 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1313 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1314
1315 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1316 goto drop_and_release;
1317
1318 /* Secret recipe starts with IP addresses */
0eae88f3
ED
1319 *mess++ ^= (__force u32)daddr;
1320 *mess++ ^= (__force u32)saddr;
1da177e4 1321
4957faad
WAS
1322 /* plus variable length Initiator Cookie */
1323 c = (u8 *)mess;
1324 while (l-- > 0)
1325 *c++ ^= *hash_location++;
1326
4957faad 1327 want_cookie = 0; /* not our kind of cookie */
4957faad
WAS
1328 tmp_ext.cookie_out_never = 0; /* false */
1329 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1330 } else if (!tp->rx_opt.cookie_in_always) {
1331 /* redundant indications, but ensure initialization. */
1332 tmp_ext.cookie_out_never = 1; /* true */
1333 tmp_ext.cookie_plus = 0;
1334 } else {
1335 goto drop_and_release;
1336 }
1337 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1da177e4 1338
4dfc2817 1339 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1340 tcp_clear_options(&tmp_opt);
1da177e4 1341
1da177e4 1342 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1343 tcp_openreq_init(req, &tmp_opt, skb);
1344
bb5b7c11
DM
1345 ireq = inet_rsk(req);
1346 ireq->loc_addr = daddr;
1347 ireq->rmt_addr = saddr;
1348 ireq->no_srccheck = inet_sk(sk)->transparent;
1349 ireq->opt = tcp_v4_save_options(sk, skb);
1350
284904aa 1351 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1352 goto drop_and_free;
284904aa 1353
172d69e6 1354 if (!want_cookie || tmp_opt.tstamp_ok)
aa8223c7 1355 TCP_ECN_create_request(req, tcp_hdr(skb));
1da177e4
LT
1356
1357 if (want_cookie) {
1da177e4 1358 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1359 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4
LT
1360 } else if (!isn) {
1361 struct inet_peer *peer = NULL;
6bd023f3 1362 struct flowi4 fl4;
1da177e4
LT
1363
1364 /* VJ's idea. We save last timestamp seen
1365 * from the destination in peer table, when entering
1366 * state TIME-WAIT, and check against it before
1367 * accepting new connection request.
1368 *
1369 * If "isn" is not zero, this request hit alive
1370 * timewait bucket, so that all the necessary checks
1371 * are made in the function processing timewait state.
1372 */
1373 if (tmp_opt.saw_tstamp &&
295ff7ed 1374 tcp_death_row.sysctl_tw_recycle &&
6bd023f3 1375 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
ed2361e6
DM
1376 fl4.daddr == saddr &&
1377 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
317fe0e6 1378 inet_peer_refcheck(peer);
2c1409a0 1379 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1da177e4
LT
1380 (s32)(peer->tcp_ts - req->ts_recent) >
1381 TCP_PAWS_WINDOW) {
de0744af 1382 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1383 goto drop_and_release;
1da177e4
LT
1384 }
1385 }
1386 /* Kill the following clause, if you dislike this way. */
1387 else if (!sysctl_tcp_syncookies &&
463c84b9 1388 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4
LT
1389 (sysctl_max_syn_backlog >> 2)) &&
1390 (!peer || !peer->tcp_ts_stamp) &&
1391 (!dst || !dst_metric(dst, RTAX_RTT))) {
1392 /* Without syncookies last quarter of
1393 * backlog is filled with destinations,
1394 * proven to be alive.
1395 * It means that we continue to communicate
1396 * to destinations, already remembered
1397 * to the moment of synflood.
1398 */
673d57e7
HH
1399 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1400 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1401 goto drop_and_release;
1da177e4
LT
1402 }
1403
a94f723d 1404 isn = tcp_v4_init_sequence(skb);
1da177e4 1405 }
2e6599cb 1406 tcp_rsk(req)->snt_isn = isn;
9ad7c049 1407 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4 1408
72659ecc
OP
1409 if (tcp_v4_send_synack(sk, dst, req,
1410 (struct request_values *)&tmp_ext) ||
4957faad 1411 want_cookie)
1da177e4
LT
1412 goto drop_and_free;
1413
7cd04fa7 1414 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1415 return 0;
1416
7cd04fa7
DL
1417drop_and_release:
1418 dst_release(dst);
1da177e4 1419drop_and_free:
60236fdd 1420 reqsk_free(req);
1da177e4 1421drop:
1da177e4
LT
1422 return 0;
1423}
4bc2f18b 1424EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1425
1426
1427/*
1428 * The three way handshake has completed - we got a valid synack -
1429 * now create the new socket.
1430 */
1431struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1432 struct request_sock *req,
1da177e4
LT
1433 struct dst_entry *dst)
1434{
2e6599cb 1435 struct inet_request_sock *ireq;
1da177e4
LT
1436 struct inet_sock *newinet;
1437 struct tcp_sock *newtp;
1438 struct sock *newsk;
cfb6eeb4
YH
1439#ifdef CONFIG_TCP_MD5SIG
1440 struct tcp_md5sig_key *key;
1441#endif
f6d8bd05 1442 struct ip_options_rcu *inet_opt;
1da177e4
LT
1443
1444 if (sk_acceptq_is_full(sk))
1445 goto exit_overflow;
1446
1da177e4
LT
1447 newsk = tcp_create_openreq_child(sk, req, skb);
1448 if (!newsk)
093d2823 1449 goto exit_nonewsk;
1da177e4 1450
bcd76111 1451 newsk->sk_gso_type = SKB_GSO_TCPV4;
1da177e4
LT
1452
1453 newtp = tcp_sk(newsk);
1454 newinet = inet_sk(newsk);
2e6599cb 1455 ireq = inet_rsk(req);
c720c7e8
ED
1456 newinet->inet_daddr = ireq->rmt_addr;
1457 newinet->inet_rcv_saddr = ireq->loc_addr;
1458 newinet->inet_saddr = ireq->loc_addr;
f6d8bd05
ED
1459 inet_opt = ireq->opt;
1460 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1461 ireq->opt = NULL;
463c84b9 1462 newinet->mc_index = inet_iif(skb);
eddc9ec5 1463 newinet->mc_ttl = ip_hdr(skb)->ttl;
d83d8461 1464 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1465 if (inet_opt)
1466 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1467 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1468
0e734419
DM
1469 if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1470 goto put_and_exit;
1471
1472 sk_setup_caps(newsk, dst);
1473
5d424d5a 1474 tcp_mtup_init(newsk);
1da177e4 1475 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1476 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1477 if (tcp_sk(sk)->rx_opt.user_mss &&
1478 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1479 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1480
1da177e4 1481 tcp_initialize_rcv_mss(newsk);
9ad7c049
JC
1482 if (tcp_rsk(req)->snt_synack)
1483 tcp_valid_rtt_meas(newsk,
1484 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1485 newtp->total_retrans = req->retrans;
1da177e4 1486
cfb6eeb4
YH
1487#ifdef CONFIG_TCP_MD5SIG
1488 /* Copy over the MD5 key from the original socket */
c720c7e8
ED
1489 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1490 if (key != NULL) {
cfb6eeb4
YH
1491 /*
1492 * We're using one, so create a matching key
1493 * on the newsk structure. If we fail to get
1494 * memory, then we end up not copying the key
1495 * across. Shucks.
1496 */
f6685938
ACM
1497 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1498 if (newkey != NULL)
c720c7e8 1499 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
cfb6eeb4 1500 newkey, key->keylen);
a465419b 1501 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1502 }
1503#endif
1504
0e734419
DM
1505 if (__inet_inherit_port(sk, newsk) < 0)
1506 goto put_and_exit;
9327f705 1507 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1508
1509 return newsk;
1510
1511exit_overflow:
de0744af 1512 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1513exit_nonewsk:
1514 dst_release(dst);
1da177e4 1515exit:
de0744af 1516 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1517 return NULL;
0e734419 1518put_and_exit:
709e8697 1519 tcp_clear_xmit_timers(newsk);
d8a6e65f 1520 tcp_cleanup_congestion_control(newsk);
918eb399 1521 bh_unlock_sock(newsk);
0e734419
DM
1522 sock_put(newsk);
1523 goto exit;
1da177e4 1524}
4bc2f18b 1525EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1526
1527static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1528{
aa8223c7 1529 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1530 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1531 struct sock *nsk;
60236fdd 1532 struct request_sock **prev;
1da177e4 1533 /* Find possible connection requests. */
463c84b9
ACM
1534 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1535 iph->saddr, iph->daddr);
1da177e4
LT
1536 if (req)
1537 return tcp_check_req(sk, skb, req, prev);
1538
3b1e0a65 1539 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1540 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1541
1542 if (nsk) {
1543 if (nsk->sk_state != TCP_TIME_WAIT) {
1544 bh_lock_sock(nsk);
1545 return nsk;
1546 }
9469c7b4 1547 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1548 return NULL;
1549 }
1550
1551#ifdef CONFIG_SYN_COOKIES
af9b4738 1552 if (!th->syn)
1da177e4
LT
1553 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1554#endif
1555 return sk;
1556}
1557
b51655b9 1558static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1559{
eddc9ec5
ACM
1560 const struct iphdr *iph = ip_hdr(skb);
1561
84fa7933 1562 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1563 if (!tcp_v4_check(skb->len, iph->saddr,
1564 iph->daddr, skb->csum)) {
fb286bb2 1565 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1566 return 0;
fb286bb2 1567 }
1da177e4 1568 }
fb286bb2 1569
eddc9ec5 1570 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1571 skb->len, IPPROTO_TCP, 0);
1572
1da177e4 1573 if (skb->len <= 76) {
fb286bb2 1574 return __skb_checksum_complete(skb);
1da177e4
LT
1575 }
1576 return 0;
1577}
1578
1579
1580/* The socket must have it's spinlock held when we get
1581 * here.
1582 *
1583 * We have a potential double-lock case here, so even when
1584 * doing backlog processing we use the BH locking scheme.
1585 * This is because we cannot sleep with the original spinlock
1586 * held.
1587 */
1588int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1589{
cfb6eeb4
YH
1590 struct sock *rsk;
1591#ifdef CONFIG_TCP_MD5SIG
1592 /*
1593 * We really want to reject the packet as early as possible
1594 * if:
1595 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1596 * o There is an MD5 option and we're not expecting one
1597 */
7174259e 1598 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1599 goto discard;
1600#endif
1601
1da177e4 1602 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
bdeab991 1603 sock_rps_save_rxhash(sk, skb);
aa8223c7 1604 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1605 rsk = sk;
1da177e4 1606 goto reset;
cfb6eeb4 1607 }
1da177e4
LT
1608 return 0;
1609 }
1610
ab6a5bb6 1611 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1612 goto csum_err;
1613
1614 if (sk->sk_state == TCP_LISTEN) {
1615 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1616 if (!nsk)
1617 goto discard;
1618
1619 if (nsk != sk) {
bdeab991 1620 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1621 if (tcp_child_process(sk, nsk, skb)) {
1622 rsk = nsk;
1da177e4 1623 goto reset;
cfb6eeb4 1624 }
1da177e4
LT
1625 return 0;
1626 }
ca55158c 1627 } else
bdeab991 1628 sock_rps_save_rxhash(sk, skb);
ca55158c 1629
aa8223c7 1630 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1631 rsk = sk;
1da177e4 1632 goto reset;
cfb6eeb4 1633 }
1da177e4
LT
1634 return 0;
1635
1636reset:
cfb6eeb4 1637 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1638discard:
1639 kfree_skb(skb);
1640 /* Be careful here. If this function gets more complicated and
1641 * gcc suffers from register pressure on the x86, sk (in %ebx)
1642 * might be destroyed here. This current version compiles correctly,
1643 * but you have been warned.
1644 */
1645 return 0;
1646
1647csum_err:
63231bdd 1648 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1649 goto discard;
1650}
4bc2f18b 1651EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4
LT
1652
1653/*
1654 * From tcp_input.c
1655 */
1656
1657int tcp_v4_rcv(struct sk_buff *skb)
1658{
eddc9ec5 1659 const struct iphdr *iph;
cf533ea5 1660 const struct tcphdr *th;
1da177e4
LT
1661 struct sock *sk;
1662 int ret;
a86b1e30 1663 struct net *net = dev_net(skb->dev);
1da177e4
LT
1664
1665 if (skb->pkt_type != PACKET_HOST)
1666 goto discard_it;
1667
1668 /* Count it even if it's bad */
63231bdd 1669 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1670
1671 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1672 goto discard_it;
1673
aa8223c7 1674 th = tcp_hdr(skb);
1da177e4
LT
1675
1676 if (th->doff < sizeof(struct tcphdr) / 4)
1677 goto bad_packet;
1678 if (!pskb_may_pull(skb, th->doff * 4))
1679 goto discard_it;
1680
1681 /* An explanation is required here, I think.
1682 * Packet length and doff are validated by header prediction,
caa20d9a 1683 * provided case of th->doff==0 is eliminated.
1da177e4 1684 * So, we defer the checks. */
60476372 1685 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1da177e4
LT
1686 goto bad_packet;
1687
aa8223c7 1688 th = tcp_hdr(skb);
eddc9ec5 1689 iph = ip_hdr(skb);
1da177e4
LT
1690 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1691 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1692 skb->len - th->doff * 4);
1693 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1694 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1695 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1696 TCP_SKB_CB(skb)->sacked = 0;
1697
9a1f27c4 1698 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1699 if (!sk)
1700 goto no_tcp_socket;
1701
bb134d5d
ED
1702process:
1703 if (sk->sk_state == TCP_TIME_WAIT)
1704 goto do_time_wait;
1705
6cce09f8
ED
1706 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1707 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1708 goto discard_and_relse;
6cce09f8 1709 }
d218d111 1710
1da177e4
LT
1711 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1712 goto discard_and_relse;
b59c2701 1713 nf_reset(skb);
1da177e4 1714
fda9ef5d 1715 if (sk_filter(sk, skb))
1da177e4
LT
1716 goto discard_and_relse;
1717
1718 skb->dev = NULL;
1719
c6366184 1720 bh_lock_sock_nested(sk);
1da177e4
LT
1721 ret = 0;
1722 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1723#ifdef CONFIG_NET_DMA
1724 struct tcp_sock *tp = tcp_sk(sk);
1725 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
f67b4599 1726 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1a2449a8 1727 if (tp->ucopy.dma_chan)
1da177e4 1728 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1729 else
1730#endif
1731 {
1732 if (!tcp_prequeue(sk, skb))
ae8d7f88 1733 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1734 }
6cce09f8 1735 } else if (unlikely(sk_add_backlog(sk, skb))) {
6b03a53a 1736 bh_unlock_sock(sk);
6cce09f8 1737 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1738 goto discard_and_relse;
1739 }
1da177e4
LT
1740 bh_unlock_sock(sk);
1741
1742 sock_put(sk);
1743
1744 return ret;
1745
1746no_tcp_socket:
1747 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1748 goto discard_it;
1749
1750 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1751bad_packet:
63231bdd 1752 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1753 } else {
cfb6eeb4 1754 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1755 }
1756
1757discard_it:
1758 /* Discard frame. */
1759 kfree_skb(skb);
e905a9ed 1760 return 0;
1da177e4
LT
1761
1762discard_and_relse:
1763 sock_put(sk);
1764 goto discard_it;
1765
1766do_time_wait:
1767 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1768 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1769 goto discard_it;
1770 }
1771
1772 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
63231bdd 1773 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
9469c7b4 1774 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1775 goto discard_it;
1776 }
9469c7b4 1777 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1778 case TCP_TW_SYN: {
c346dca1 1779 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1780 &tcp_hashinfo,
eddc9ec5 1781 iph->daddr, th->dest,
463c84b9 1782 inet_iif(skb));
1da177e4 1783 if (sk2) {
9469c7b4
YH
1784 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1785 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1786 sk = sk2;
1787 goto process;
1788 }
1789 /* Fall through to ACK */
1790 }
1791 case TCP_TW_ACK:
1792 tcp_v4_timewait_ack(sk, skb);
1793 break;
1794 case TCP_TW_RST:
1795 goto no_tcp_socket;
1796 case TCP_TW_SUCCESS:;
1797 }
1798 goto discard_it;
1799}
1800
3f419d2d 1801struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1da177e4 1802{
3f419d2d 1803 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1da177e4 1804 struct inet_sock *inet = inet_sk(sk);
3f419d2d 1805 struct inet_peer *peer;
1da177e4 1806
c5216cc7
DM
1807 if (!rt ||
1808 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
b534ecf1 1809 peer = inet_getpeer_v4(inet->inet_daddr, 1);
3f419d2d 1810 *release_it = true;
1da177e4
LT
1811 } else {
1812 if (!rt->peer)
a48eff12 1813 rt_bind_peer(rt, inet->inet_daddr, 1);
1da177e4 1814 peer = rt->peer;
3f419d2d 1815 *release_it = false;
1da177e4
LT
1816 }
1817
3f419d2d 1818 return peer;
1da177e4 1819}
3f419d2d 1820EXPORT_SYMBOL(tcp_v4_get_peer);
1da177e4 1821
ccb7c410 1822void *tcp_v4_tw_get_peer(struct sock *sk)
1da177e4 1823{
cf533ea5 1824 const struct inet_timewait_sock *tw = inet_twsk(sk);
1da177e4 1825
ccb7c410 1826 return inet_getpeer_v4(tw->tw_daddr, 1);
1da177e4 1827}
ccb7c410
DM
1828EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1829
1830static struct timewait_sock_ops tcp_timewait_sock_ops = {
1831 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1832 .twsk_unique = tcp_twsk_unique,
1833 .twsk_destructor= tcp_twsk_destructor,
1834 .twsk_getpeer = tcp_v4_tw_get_peer,
1835};
1da177e4 1836
3b401a81 1837const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1838 .queue_xmit = ip_queue_xmit,
1839 .send_check = tcp_v4_send_check,
1840 .rebuild_header = inet_sk_rebuild_header,
1841 .conn_request = tcp_v4_conn_request,
1842 .syn_recv_sock = tcp_v4_syn_recv_sock,
3f419d2d 1843 .get_peer = tcp_v4_get_peer,
543d9cfe
ACM
1844 .net_header_len = sizeof(struct iphdr),
1845 .setsockopt = ip_setsockopt,
1846 .getsockopt = ip_getsockopt,
1847 .addr2sockaddr = inet_csk_addr2sockaddr,
1848 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1849 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1850#ifdef CONFIG_COMPAT
543d9cfe
ACM
1851 .compat_setsockopt = compat_ip_setsockopt,
1852 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1853#endif
1da177e4 1854};
4bc2f18b 1855EXPORT_SYMBOL(ipv4_specific);
1da177e4 1856
cfb6eeb4 1857#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1858static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1859 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1860 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4
YH
1861 .md5_add = tcp_v4_md5_add_func,
1862 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1863};
b6332e6c 1864#endif
cfb6eeb4 1865
1da177e4
LT
1866/* NOTE: A lot of things set to zero explicitly by call to
1867 * sk_alloc() so need not be done here.
1868 */
1869static int tcp_v4_init_sock(struct sock *sk)
1870{
6687e988 1871 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
1872 struct tcp_sock *tp = tcp_sk(sk);
1873
1874 skb_queue_head_init(&tp->out_of_order_queue);
1875 tcp_init_xmit_timers(sk);
1876 tcp_prequeue_init(tp);
1877
6687e988 1878 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
1879 tp->mdev = TCP_TIMEOUT_INIT;
1880
1881 /* So many TCP implementations out there (incorrectly) count the
1882 * initial SYN frame in their delayed-ACK and congestion control
1883 * algorithms that we must have the following bandaid to talk
1884 * efficiently to them. -DaveM
1885 */
9ad7c049 1886 tp->snd_cwnd = TCP_INIT_CWND;
1da177e4
LT
1887
1888 /* See draft-stevens-tcpca-spec-01 for discussion of the
1889 * initialization of these values.
1890 */
0b6a05c1 1891 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1da177e4 1892 tp->snd_cwnd_clamp = ~0;
bee7ca9e 1893 tp->mss_cache = TCP_MSS_DEFAULT;
1da177e4
LT
1894
1895 tp->reordering = sysctl_tcp_reordering;
6687e988 1896 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
1897
1898 sk->sk_state = TCP_CLOSE;
1899
1900 sk->sk_write_space = sk_stream_write_space;
1901 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1902
8292a17a 1903 icsk->icsk_af_ops = &ipv4_specific;
d83d8461 1904 icsk->icsk_sync_mss = tcp_sync_mss;
cfb6eeb4
YH
1905#ifdef CONFIG_TCP_MD5SIG
1906 tp->af_specific = &tcp_sock_ipv4_specific;
1907#endif
1da177e4 1908
435cf559
WAS
1909 /* TCP Cookie Transactions */
1910 if (sysctl_tcp_cookie_size > 0) {
1911 /* Default, cookies without s_data_payload. */
1912 tp->cookie_values =
1913 kzalloc(sizeof(*tp->cookie_values),
1914 sk->sk_allocation);
1915 if (tp->cookie_values != NULL)
1916 kref_init(&tp->cookie_values->kref);
1917 }
1918 /* Presumed zeroed, in order of appearance:
1919 * cookie_in_always, cookie_out_never,
1920 * s_data_constant, s_data_in, s_data_out
1921 */
1da177e4
LT
1922 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1923 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1924
eb4dea58 1925 local_bh_disable();
d1a4c0b3 1926 sock_update_memcg(sk);
180d8cd9 1927 sk_sockets_allocated_inc(sk);
eb4dea58 1928 local_bh_enable();
1da177e4
LT
1929
1930 return 0;
1931}
1932
7d06b2e0 1933void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1934{
1935 struct tcp_sock *tp = tcp_sk(sk);
1936
1937 tcp_clear_xmit_timers(sk);
1938
6687e988 1939 tcp_cleanup_congestion_control(sk);
317a76f9 1940
1da177e4 1941 /* Cleanup up the write buffer. */
fe067e8a 1942 tcp_write_queue_purge(sk);
1da177e4
LT
1943
1944 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1945 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1946
cfb6eeb4
YH
1947#ifdef CONFIG_TCP_MD5SIG
1948 /* Clean up the MD5 key list, if any */
1949 if (tp->md5sig_info) {
1950 tcp_v4_clear_md5_list(sk);
1951 kfree(tp->md5sig_info);
1952 tp->md5sig_info = NULL;
1953 }
1954#endif
1955
1a2449a8
CL
1956#ifdef CONFIG_NET_DMA
1957 /* Cleans up our sk_async_wait_queue */
e905a9ed 1958 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1959#endif
1960
1da177e4
LT
1961 /* Clean prequeue, it must be empty really */
1962 __skb_queue_purge(&tp->ucopy.prequeue);
1963
1964 /* Clean up a referenced TCP bind bucket. */
463c84b9 1965 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1966 inet_put_port(sk);
1da177e4
LT
1967
1968 /*
1969 * If sendmsg cached page exists, toss it.
1970 */
1971 if (sk->sk_sndmsg_page) {
1972 __free_page(sk->sk_sndmsg_page);
1973 sk->sk_sndmsg_page = NULL;
1974 }
1975
435cf559
WAS
1976 /* TCP Cookie Transactions */
1977 if (tp->cookie_values != NULL) {
1978 kref_put(&tp->cookie_values->kref,
1979 tcp_cookie_values_release);
1980 tp->cookie_values = NULL;
1981 }
1982
180d8cd9 1983 sk_sockets_allocated_dec(sk);
d1a4c0b3 1984 sock_release_memcg(sk);
1da177e4 1985}
1da177e4
LT
1986EXPORT_SYMBOL(tcp_v4_destroy_sock);
1987
1988#ifdef CONFIG_PROC_FS
1989/* Proc filesystem TCP sock list dumping. */
1990
3ab5aee7 1991static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1da177e4 1992{
3ab5aee7 1993 return hlist_nulls_empty(head) ? NULL :
8feaf0c0 1994 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
1995}
1996
8feaf0c0 1997static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4 1998{
3ab5aee7
ED
1999 return !is_a_nulls(tw->tw_node.next) ?
2000 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1da177e4
LT
2001}
2002
a8b690f9
TH
2003/*
2004 * Get next listener socket follow cur. If cur is NULL, get first socket
2005 * starting from bucket given in st->bucket; when st->bucket is zero the
2006 * very first socket in the hash table is returned.
2007 */
1da177e4
LT
2008static void *listening_get_next(struct seq_file *seq, void *cur)
2009{
463c84b9 2010 struct inet_connection_sock *icsk;
c25eb3bf 2011 struct hlist_nulls_node *node;
1da177e4 2012 struct sock *sk = cur;
5caea4ea 2013 struct inet_listen_hashbucket *ilb;
5799de0b 2014 struct tcp_iter_state *st = seq->private;
a4146b1b 2015 struct net *net = seq_file_net(seq);
1da177e4
LT
2016
2017 if (!sk) {
a8b690f9 2018 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2019 spin_lock_bh(&ilb->lock);
c25eb3bf 2020 sk = sk_nulls_head(&ilb->head);
a8b690f9 2021 st->offset = 0;
1da177e4
LT
2022 goto get_sk;
2023 }
5caea4ea 2024 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2025 ++st->num;
a8b690f9 2026 ++st->offset;
1da177e4
LT
2027
2028 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2029 struct request_sock *req = cur;
1da177e4 2030
72a3effa 2031 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2032 req = req->dl_next;
2033 while (1) {
2034 while (req) {
bdccc4ca 2035 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2036 cur = req;
2037 goto out;
2038 }
2039 req = req->dl_next;
2040 }
72a3effa 2041 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2042 break;
2043get_req:
463c84b9 2044 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2045 }
1bde5ac4 2046 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2047 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2048 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2049 } else {
e905a9ed 2050 icsk = inet_csk(sk);
463c84b9
ACM
2051 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2052 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2053 goto start_req;
463c84b9 2054 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2055 sk = sk_nulls_next(sk);
1da177e4
LT
2056 }
2057get_sk:
c25eb3bf 2058 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2059 if (!net_eq(sock_net(sk), net))
2060 continue;
2061 if (sk->sk_family == st->family) {
1da177e4
LT
2062 cur = sk;
2063 goto out;
2064 }
e905a9ed 2065 icsk = inet_csk(sk);
463c84b9
ACM
2066 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2067 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2068start_req:
2069 st->uid = sock_i_uid(sk);
2070 st->syn_wait_sk = sk;
2071 st->state = TCP_SEQ_STATE_OPENREQ;
2072 st->sbucket = 0;
2073 goto get_req;
2074 }
463c84b9 2075 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2076 }
5caea4ea 2077 spin_unlock_bh(&ilb->lock);
a8b690f9 2078 st->offset = 0;
0f7ff927 2079 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2080 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2081 spin_lock_bh(&ilb->lock);
c25eb3bf 2082 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2083 goto get_sk;
2084 }
2085 cur = NULL;
2086out:
2087 return cur;
2088}
2089
2090static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2091{
a8b690f9
TH
2092 struct tcp_iter_state *st = seq->private;
2093 void *rc;
2094
2095 st->bucket = 0;
2096 st->offset = 0;
2097 rc = listening_get_next(seq, NULL);
1da177e4
LT
2098
2099 while (rc && *pos) {
2100 rc = listening_get_next(seq, rc);
2101 --*pos;
2102 }
2103 return rc;
2104}
2105
6eac5604
AK
2106static inline int empty_bucket(struct tcp_iter_state *st)
2107{
3ab5aee7
ED
2108 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2109 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
6eac5604
AK
2110}
2111
a8b690f9
TH
2112/*
2113 * Get first established socket starting from bucket given in st->bucket.
2114 * If st->bucket is zero, the very first socket in the hash is returned.
2115 */
1da177e4
LT
2116static void *established_get_first(struct seq_file *seq)
2117{
5799de0b 2118 struct tcp_iter_state *st = seq->private;
a4146b1b 2119 struct net *net = seq_file_net(seq);
1da177e4
LT
2120 void *rc = NULL;
2121
a8b690f9
TH
2122 st->offset = 0;
2123 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2124 struct sock *sk;
3ab5aee7 2125 struct hlist_nulls_node *node;
8feaf0c0 2126 struct inet_timewait_sock *tw;
9db66bdc 2127 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2128
6eac5604
AK
2129 /* Lockless fast path for the common case of empty buckets */
2130 if (empty_bucket(st))
2131 continue;
2132
9db66bdc 2133 spin_lock_bh(lock);
3ab5aee7 2134 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2135 if (sk->sk_family != st->family ||
878628fb 2136 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2137 continue;
2138 }
2139 rc = sk;
2140 goto out;
2141 }
2142 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 2143 inet_twsk_for_each(tw, node,
dbca9b27 2144 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 2145 if (tw->tw_family != st->family ||
878628fb 2146 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
2147 continue;
2148 }
2149 rc = tw;
2150 goto out;
2151 }
9db66bdc 2152 spin_unlock_bh(lock);
1da177e4
LT
2153 st->state = TCP_SEQ_STATE_ESTABLISHED;
2154 }
2155out:
2156 return rc;
2157}
2158
2159static void *established_get_next(struct seq_file *seq, void *cur)
2160{
2161 struct sock *sk = cur;
8feaf0c0 2162 struct inet_timewait_sock *tw;
3ab5aee7 2163 struct hlist_nulls_node *node;
5799de0b 2164 struct tcp_iter_state *st = seq->private;
a4146b1b 2165 struct net *net = seq_file_net(seq);
1da177e4
LT
2166
2167 ++st->num;
a8b690f9 2168 ++st->offset;
1da177e4
LT
2169
2170 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2171 tw = cur;
2172 tw = tw_next(tw);
2173get_tw:
878628fb 2174 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2175 tw = tw_next(tw);
2176 }
2177 if (tw) {
2178 cur = tw;
2179 goto out;
2180 }
9db66bdc 2181 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2182 st->state = TCP_SEQ_STATE_ESTABLISHED;
2183
6eac5604 2184 /* Look for next non empty bucket */
a8b690f9 2185 st->offset = 0;
f373b53b 2186 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
6eac5604
AK
2187 empty_bucket(st))
2188 ;
f373b53b 2189 if (st->bucket > tcp_hashinfo.ehash_mask)
6eac5604
AK
2190 return NULL;
2191
9db66bdc 2192 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
3ab5aee7 2193 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4 2194 } else
3ab5aee7 2195 sk = sk_nulls_next(sk);
1da177e4 2196
3ab5aee7 2197 sk_nulls_for_each_from(sk, node) {
878628fb 2198 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2199 goto found;
2200 }
2201
2202 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2203 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2204 goto get_tw;
2205found:
2206 cur = sk;
2207out:
2208 return cur;
2209}
2210
2211static void *established_get_idx(struct seq_file *seq, loff_t pos)
2212{
a8b690f9
TH
2213 struct tcp_iter_state *st = seq->private;
2214 void *rc;
2215
2216 st->bucket = 0;
2217 rc = established_get_first(seq);
1da177e4
LT
2218
2219 while (rc && pos) {
2220 rc = established_get_next(seq, rc);
2221 --pos;
7174259e 2222 }
1da177e4
LT
2223 return rc;
2224}
2225
2226static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2227{
2228 void *rc;
5799de0b 2229 struct tcp_iter_state *st = seq->private;
1da177e4 2230
1da177e4
LT
2231 st->state = TCP_SEQ_STATE_LISTENING;
2232 rc = listening_get_idx(seq, &pos);
2233
2234 if (!rc) {
1da177e4
LT
2235 st->state = TCP_SEQ_STATE_ESTABLISHED;
2236 rc = established_get_idx(seq, pos);
2237 }
2238
2239 return rc;
2240}
2241
a8b690f9
TH
2242static void *tcp_seek_last_pos(struct seq_file *seq)
2243{
2244 struct tcp_iter_state *st = seq->private;
2245 int offset = st->offset;
2246 int orig_num = st->num;
2247 void *rc = NULL;
2248
2249 switch (st->state) {
2250 case TCP_SEQ_STATE_OPENREQ:
2251 case TCP_SEQ_STATE_LISTENING:
2252 if (st->bucket >= INET_LHTABLE_SIZE)
2253 break;
2254 st->state = TCP_SEQ_STATE_LISTENING;
2255 rc = listening_get_next(seq, NULL);
2256 while (offset-- && rc)
2257 rc = listening_get_next(seq, rc);
2258 if (rc)
2259 break;
2260 st->bucket = 0;
2261 /* Fallthrough */
2262 case TCP_SEQ_STATE_ESTABLISHED:
2263 case TCP_SEQ_STATE_TIME_WAIT:
2264 st->state = TCP_SEQ_STATE_ESTABLISHED;
2265 if (st->bucket > tcp_hashinfo.ehash_mask)
2266 break;
2267 rc = established_get_first(seq);
2268 while (offset-- && rc)
2269 rc = established_get_next(seq, rc);
2270 }
2271
2272 st->num = orig_num;
2273
2274 return rc;
2275}
2276
1da177e4
LT
2277static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2278{
5799de0b 2279 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2280 void *rc;
2281
2282 if (*pos && *pos == st->last_pos) {
2283 rc = tcp_seek_last_pos(seq);
2284 if (rc)
2285 goto out;
2286 }
2287
1da177e4
LT
2288 st->state = TCP_SEQ_STATE_LISTENING;
2289 st->num = 0;
a8b690f9
TH
2290 st->bucket = 0;
2291 st->offset = 0;
2292 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2293
2294out:
2295 st->last_pos = *pos;
2296 return rc;
1da177e4
LT
2297}
2298
2299static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2300{
a8b690f9 2301 struct tcp_iter_state *st = seq->private;
1da177e4 2302 void *rc = NULL;
1da177e4
LT
2303
2304 if (v == SEQ_START_TOKEN) {
2305 rc = tcp_get_idx(seq, 0);
2306 goto out;
2307 }
1da177e4
LT
2308
2309 switch (st->state) {
2310 case TCP_SEQ_STATE_OPENREQ:
2311 case TCP_SEQ_STATE_LISTENING:
2312 rc = listening_get_next(seq, v);
2313 if (!rc) {
1da177e4 2314 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2315 st->bucket = 0;
2316 st->offset = 0;
1da177e4
LT
2317 rc = established_get_first(seq);
2318 }
2319 break;
2320 case TCP_SEQ_STATE_ESTABLISHED:
2321 case TCP_SEQ_STATE_TIME_WAIT:
2322 rc = established_get_next(seq, v);
2323 break;
2324 }
2325out:
2326 ++*pos;
a8b690f9 2327 st->last_pos = *pos;
1da177e4
LT
2328 return rc;
2329}
2330
2331static void tcp_seq_stop(struct seq_file *seq, void *v)
2332{
5799de0b 2333 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2334
2335 switch (st->state) {
2336 case TCP_SEQ_STATE_OPENREQ:
2337 if (v) {
463c84b9
ACM
2338 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2339 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2340 }
2341 case TCP_SEQ_STATE_LISTENING:
2342 if (v != SEQ_START_TOKEN)
5caea4ea 2343 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4
LT
2344 break;
2345 case TCP_SEQ_STATE_TIME_WAIT:
2346 case TCP_SEQ_STATE_ESTABLISHED:
2347 if (v)
9db66bdc 2348 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2349 break;
2350 }
2351}
2352
73cb88ec 2353int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4
LT
2354{
2355 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1da177e4 2356 struct tcp_iter_state *s;
52d6f3f1 2357 int err;
1da177e4 2358
52d6f3f1
DL
2359 err = seq_open_net(inode, file, &afinfo->seq_ops,
2360 sizeof(struct tcp_iter_state));
2361 if (err < 0)
2362 return err;
f40c8174 2363
52d6f3f1 2364 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2365 s->family = afinfo->family;
a8b690f9 2366 s->last_pos = 0;
f40c8174
DL
2367 return 0;
2368}
73cb88ec 2369EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2370
6f8b13bc 2371int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2372{
2373 int rc = 0;
2374 struct proc_dir_entry *p;
2375
9427c4b3
DL
2376 afinfo->seq_ops.start = tcp_seq_start;
2377 afinfo->seq_ops.next = tcp_seq_next;
2378 afinfo->seq_ops.stop = tcp_seq_stop;
2379
84841c3c 2380 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2381 afinfo->seq_fops, afinfo);
84841c3c 2382 if (!p)
1da177e4
LT
2383 rc = -ENOMEM;
2384 return rc;
2385}
4bc2f18b 2386EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2387
6f8b13bc 2388void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2389{
6f8b13bc 2390 proc_net_remove(net, afinfo->name);
1da177e4 2391}
4bc2f18b 2392EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2393
cf533ea5 2394static void get_openreq4(const struct sock *sk, const struct request_sock *req,
5e659e4c 2395 struct seq_file *f, int i, int uid, int *len)
1da177e4 2396{
2e6599cb 2397 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
2398 int ttd = req->expires - jiffies;
2399
5e659e4c 2400 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2401 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
1da177e4 2402 i,
2e6599cb 2403 ireq->loc_addr,
c720c7e8 2404 ntohs(inet_sk(sk)->inet_sport),
2e6599cb
ACM
2405 ireq->rmt_addr,
2406 ntohs(ireq->rmt_port),
1da177e4
LT
2407 TCP_SYN_RECV,
2408 0, 0, /* could print option size, but that is af dependent. */
2409 1, /* timers active (only the expire timer) */
2410 jiffies_to_clock_t(ttd),
2411 req->retrans,
2412 uid,
2413 0, /* non standard timer */
2414 0, /* open_requests have no inode */
2415 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2416 req,
2417 len);
1da177e4
LT
2418}
2419
5e659e4c 2420static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2421{
2422 int timer_active;
2423 unsigned long timer_expires;
cf533ea5 2424 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2425 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2426 const struct inet_sock *inet = inet_sk(sk);
c720c7e8
ED
2427 __be32 dest = inet->inet_daddr;
2428 __be32 src = inet->inet_rcv_saddr;
2429 __u16 destp = ntohs(inet->inet_dport);
2430 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2431 int rx_queue;
1da177e4 2432
463c84b9 2433 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2434 timer_active = 1;
463c84b9
ACM
2435 timer_expires = icsk->icsk_timeout;
2436 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2437 timer_active = 4;
463c84b9 2438 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2439 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2440 timer_active = 2;
cf4c6bf8 2441 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2442 } else {
2443 timer_active = 0;
2444 timer_expires = jiffies;
2445 }
2446
49d09007
ED
2447 if (sk->sk_state == TCP_LISTEN)
2448 rx_queue = sk->sk_ack_backlog;
2449 else
2450 /*
2451 * because we dont lock socket, we might find a transient negative value
2452 */
2453 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2454
5e659e4c 2455 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
71338aa7 2456 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
cf4c6bf8 2457 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2458 tp->write_seq - tp->snd_una,
49d09007 2459 rx_queue,
1da177e4
LT
2460 timer_active,
2461 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 2462 icsk->icsk_retransmits,
cf4c6bf8 2463 sock_i_uid(sk),
6687e988 2464 icsk->icsk_probes_out,
cf4c6bf8
IJ
2465 sock_i_ino(sk),
2466 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2467 jiffies_to_clock_t(icsk->icsk_rto),
2468 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2469 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2470 tp->snd_cwnd,
0b6a05c1 2471 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
5e659e4c 2472 len);
1da177e4
LT
2473}
2474
cf533ea5 2475static void get_timewait4_sock(const struct inet_timewait_sock *tw,
5e659e4c 2476 struct seq_file *f, int i, int *len)
1da177e4 2477{
23f33c2d 2478 __be32 dest, src;
1da177e4
LT
2479 __u16 destp, srcp;
2480 int ttd = tw->tw_ttd - jiffies;
2481
2482 if (ttd < 0)
2483 ttd = 0;
2484
2485 dest = tw->tw_daddr;
2486 src = tw->tw_rcv_saddr;
2487 destp = ntohs(tw->tw_dport);
2488 srcp = ntohs(tw->tw_sport);
2489
5e659e4c 2490 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2491 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
1da177e4
LT
2492 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2493 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
5e659e4c 2494 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2495}
2496
2497#define TMPSZ 150
2498
2499static int tcp4_seq_show(struct seq_file *seq, void *v)
2500{
5799de0b 2501 struct tcp_iter_state *st;
5e659e4c 2502 int len;
1da177e4
LT
2503
2504 if (v == SEQ_START_TOKEN) {
2505 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2506 " sl local_address rem_address st tx_queue "
2507 "rx_queue tr tm->when retrnsmt uid timeout "
2508 "inode");
2509 goto out;
2510 }
2511 st = seq->private;
2512
2513 switch (st->state) {
2514 case TCP_SEQ_STATE_LISTENING:
2515 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2516 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2517 break;
2518 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2519 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2520 break;
2521 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2522 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2523 break;
2524 }
5e659e4c 2525 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2526out:
2527 return 0;
2528}
2529
73cb88ec
AV
2530static const struct file_operations tcp_afinfo_seq_fops = {
2531 .owner = THIS_MODULE,
2532 .open = tcp_seq_open,
2533 .read = seq_read,
2534 .llseek = seq_lseek,
2535 .release = seq_release_net
2536};
2537
1da177e4 2538static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2539 .name = "tcp",
2540 .family = AF_INET,
73cb88ec 2541 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2542 .seq_ops = {
2543 .show = tcp4_seq_show,
2544 },
1da177e4
LT
2545};
2546
2c8c1e72 2547static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2548{
2549 return tcp_proc_register(net, &tcp4_seq_afinfo);
2550}
2551
2c8c1e72 2552static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2553{
2554 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2555}
2556
2557static struct pernet_operations tcp4_net_ops = {
2558 .init = tcp4_proc_init_net,
2559 .exit = tcp4_proc_exit_net,
2560};
2561
1da177e4
LT
2562int __init tcp4_proc_init(void)
2563{
757764f6 2564 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2565}
2566
2567void tcp4_proc_exit(void)
2568{
757764f6 2569 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2570}
2571#endif /* CONFIG_PROC_FS */
2572
bf296b12
HX
2573struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2574{
b71d1d42 2575 const struct iphdr *iph = skb_gro_network_header(skb);
bf296b12
HX
2576
2577 switch (skb->ip_summed) {
2578 case CHECKSUM_COMPLETE:
86911732 2579 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
bf296b12
HX
2580 skb->csum)) {
2581 skb->ip_summed = CHECKSUM_UNNECESSARY;
2582 break;
2583 }
2584
2585 /* fall through */
2586 case CHECKSUM_NONE:
2587 NAPI_GRO_CB(skb)->flush = 1;
2588 return NULL;
2589 }
2590
2591 return tcp_gro_receive(head, skb);
2592}
bf296b12
HX
2593
2594int tcp4_gro_complete(struct sk_buff *skb)
2595{
b71d1d42 2596 const struct iphdr *iph = ip_hdr(skb);
bf296b12
HX
2597 struct tcphdr *th = tcp_hdr(skb);
2598
2599 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2600 iph->saddr, iph->daddr, 0);
2601 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2602
2603 return tcp_gro_complete(skb);
2604}
bf296b12 2605
1da177e4
LT
2606struct proto tcp_prot = {
2607 .name = "TCP",
2608 .owner = THIS_MODULE,
2609 .close = tcp_close,
2610 .connect = tcp_v4_connect,
2611 .disconnect = tcp_disconnect,
463c84b9 2612 .accept = inet_csk_accept,
1da177e4
LT
2613 .ioctl = tcp_ioctl,
2614 .init = tcp_v4_init_sock,
2615 .destroy = tcp_v4_destroy_sock,
2616 .shutdown = tcp_shutdown,
2617 .setsockopt = tcp_setsockopt,
2618 .getsockopt = tcp_getsockopt,
1da177e4 2619 .recvmsg = tcp_recvmsg,
7ba42910
CG
2620 .sendmsg = tcp_sendmsg,
2621 .sendpage = tcp_sendpage,
1da177e4 2622 .backlog_rcv = tcp_v4_do_rcv,
ab1e0a13
ACM
2623 .hash = inet_hash,
2624 .unhash = inet_unhash,
2625 .get_port = inet_csk_get_port,
1da177e4
LT
2626 .enter_memory_pressure = tcp_enter_memory_pressure,
2627 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2628 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2629 .memory_allocated = &tcp_memory_allocated,
2630 .memory_pressure = &tcp_memory_pressure,
1da177e4
LT
2631 .sysctl_wmem = sysctl_tcp_wmem,
2632 .sysctl_rmem = sysctl_tcp_rmem,
2633 .max_header = MAX_TCP_HEADER,
2634 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2635 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2636 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2637 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2638 .h.hashinfo = &tcp_hashinfo,
7ba42910 2639 .no_autobind = true,
543d9cfe
ACM
2640#ifdef CONFIG_COMPAT
2641 .compat_setsockopt = compat_tcp_setsockopt,
2642 .compat_getsockopt = compat_tcp_getsockopt,
2643#endif
d1a4c0b3
GC
2644#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2645 .init_cgroup = tcp_init_cgroup,
2646 .destroy_cgroup = tcp_destroy_cgroup,
2647 .proto_cgroup = tcp_proto_cgroup,
2648#endif
1da177e4 2649};
4bc2f18b 2650EXPORT_SYMBOL(tcp_prot);
1da177e4 2651
046ee902
DL
2652static int __net_init tcp_sk_init(struct net *net)
2653{
2654 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2655 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2656}
2657
2658static void __net_exit tcp_sk_exit(struct net *net)
2659{
2660 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
b099ce26
EB
2661}
2662
2663static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2664{
2665 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2666}
2667
2668static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2669 .init = tcp_sk_init,
2670 .exit = tcp_sk_exit,
2671 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2672};
2673
9b0f976f 2674void __init tcp_v4_init(void)
1da177e4 2675{
5caea4ea 2676 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2677 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2678 panic("Failed to create the TCP control socket.\n");
1da177e4 2679}