]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blame - net/ipv4/tcp_ipv4.c
tcp: unify tcp_v4_rtx_synack and tcp_v6_rtx_synack
[mirror_ubuntu-eoan-kernel.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
076bb0c8 78#include <net/busy_poll.h>
1da177e4
LT
79
80#include <linux/inet.h>
81#include <linux/ipv6.h>
82#include <linux/stddef.h>
83#include <linux/proc_fs.h>
84#include <linux/seq_file.h>
85
cfb6eeb4
YH
86#include <linux/crypto.h>
87#include <linux/scatterlist.h>
88
ab32ea5d
BH
89int sysctl_tcp_tw_reuse __read_mostly;
90int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 91EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 92
1da177e4 93
cfb6eeb4 94#ifdef CONFIG_TCP_MD5SIG
a915da9b 95static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 96 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
97#endif
98
5caea4ea 99struct inet_hashinfo tcp_hashinfo;
4bc2f18b 100EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 101
936b8bdb 102static __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 103{
eddc9ec5
ACM
104 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
105 ip_hdr(skb)->saddr,
aa8223c7
ACM
106 tcp_hdr(skb)->dest,
107 tcp_hdr(skb)->source);
1da177e4
LT
108}
109
6d6ee43e
ACM
110int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
111{
112 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
113 struct tcp_sock *tp = tcp_sk(sk);
114
115 /* With PAWS, it is safe from the viewpoint
116 of data integrity. Even without PAWS it is safe provided sequence
117 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
118
119 Actually, the idea is close to VJ's one, only timestamp cache is
120 held not per host, but per port pair and TW bucket is used as state
121 holder.
122
123 If TW bucket has been already destroyed we fall back to VJ's scheme
124 and use initial timestamp retrieved from peer table.
125 */
126 if (tcptw->tw_ts_recent_stamp &&
127 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 128 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
129 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
130 if (tp->write_seq == 0)
131 tp->write_seq = 1;
132 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
133 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
134 sock_hold(sktw);
135 return 1;
136 }
137
138 return 0;
139}
6d6ee43e
ACM
140EXPORT_SYMBOL_GPL(tcp_twsk_unique);
141
1da177e4
LT
142/* This will initiate an outgoing connection. */
143int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
144{
2d7192d6 145 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
146 struct inet_sock *inet = inet_sk(sk);
147 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 148 __be16 orig_sport, orig_dport;
bada8adc 149 __be32 daddr, nexthop;
da905bd1 150 struct flowi4 *fl4;
2d7192d6 151 struct rtable *rt;
1da177e4 152 int err;
f6d8bd05 153 struct ip_options_rcu *inet_opt;
1da177e4
LT
154
155 if (addr_len < sizeof(struct sockaddr_in))
156 return -EINVAL;
157
158 if (usin->sin_family != AF_INET)
159 return -EAFNOSUPPORT;
160
161 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
162 inet_opt = rcu_dereference_protected(inet->inet_opt,
163 sock_owned_by_user(sk));
164 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
165 if (!daddr)
166 return -EINVAL;
f6d8bd05 167 nexthop = inet_opt->opt.faddr;
1da177e4
LT
168 }
169
dca8b089
DM
170 orig_sport = inet->inet_sport;
171 orig_dport = usin->sin_port;
da905bd1
DM
172 fl4 = &inet->cork.fl.u.ip4;
173 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 IPPROTO_TCP,
0e0d44ab 176 orig_sport, orig_dport, sk);
b23dd4fe
DM
177 if (IS_ERR(rt)) {
178 err = PTR_ERR(rt);
179 if (err == -ENETUNREACH)
f1d8cba6 180 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 181 return err;
584bdf8c 182 }
1da177e4
LT
183
184 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
185 ip_rt_put(rt);
186 return -ENETUNREACH;
187 }
188
f6d8bd05 189 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 190 daddr = fl4->daddr;
1da177e4 191
c720c7e8 192 if (!inet->inet_saddr)
da905bd1 193 inet->inet_saddr = fl4->saddr;
c720c7e8 194 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 195
c720c7e8 196 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
197 /* Reset inherited state */
198 tp->rx_opt.ts_recent = 0;
199 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
200 if (likely(!tp->repair))
201 tp->write_seq = 0;
1da177e4
LT
202 }
203
295ff7ed 204 if (tcp_death_row.sysctl_tw_recycle &&
81166dd6
DM
205 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
206 tcp_fetch_timewait_stamp(sk, &rt->dst);
1da177e4 207
c720c7e8
ED
208 inet->inet_dport = usin->sin_port;
209 inet->inet_daddr = daddr;
1da177e4 210
d83d8461 211 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
212 if (inet_opt)
213 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 214
bee7ca9e 215 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
216
217 /* Socket identity is still unknown (sport may be zero).
218 * However we set state to SYN-SENT and not releasing socket
219 * lock select source port, enter ourselves into the hash tables and
220 * complete initialization after this.
221 */
222 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 223 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
224 if (err)
225 goto failure;
226
da905bd1 227 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
228 inet->inet_sport, inet->inet_dport, sk);
229 if (IS_ERR(rt)) {
230 err = PTR_ERR(rt);
231 rt = NULL;
1da177e4 232 goto failure;
b23dd4fe 233 }
1da177e4 234 /* OK, now commit destination to socket. */
bcd76111 235 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 236 sk_setup_caps(sk, &rt->dst);
1da177e4 237
ee995283 238 if (!tp->write_seq && likely(!tp->repair))
c720c7e8
ED
239 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
240 inet->inet_daddr,
241 inet->inet_sport,
1da177e4
LT
242 usin->sin_port);
243
c720c7e8 244 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 245
2b916477 246 err = tcp_connect(sk);
ee995283 247
1da177e4
LT
248 rt = NULL;
249 if (err)
250 goto failure;
251
252 return 0;
253
254failure:
7174259e
ACM
255 /*
256 * This unhashes the socket and releases the local port,
257 * if necessary.
258 */
1da177e4
LT
259 tcp_set_state(sk, TCP_CLOSE);
260 ip_rt_put(rt);
261 sk->sk_route_caps = 0;
c720c7e8 262 inet->inet_dport = 0;
1da177e4
LT
263 return err;
264}
4bc2f18b 265EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 266
1da177e4 267/*
563d34d0
ED
268 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
269 * It can be called through tcp_release_cb() if socket was owned by user
270 * at the time tcp_v4_err() was called to handle ICMP message.
1da177e4 271 */
563d34d0 272static void tcp_v4_mtu_reduced(struct sock *sk)
1da177e4
LT
273{
274 struct dst_entry *dst;
275 struct inet_sock *inet = inet_sk(sk);
563d34d0 276 u32 mtu = tcp_sk(sk)->mtu_info;
1da177e4 277
80d0a69f
DM
278 dst = inet_csk_update_pmtu(sk, mtu);
279 if (!dst)
1da177e4
LT
280 return;
281
1da177e4
LT
282 /* Something is about to be wrong... Remember soft error
283 * for the case, if this connection will not able to recover.
284 */
285 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
286 sk->sk_err_soft = EMSGSIZE;
287
288 mtu = dst_mtu(dst);
289
290 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
482fc609 291 ip_sk_accept_pmtu(sk) &&
d83d8461 292 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
293 tcp_sync_mss(sk, mtu);
294
295 /* Resend the TCP packet because it's
296 * clear that the old packet has been
297 * dropped. This is the new "fast" path mtu
298 * discovery.
299 */
300 tcp_simple_retransmit(sk);
301 } /* else let the usual retransmit timer handle it */
302}
303
55be7a9c
DM
304static void do_redirect(struct sk_buff *skb, struct sock *sk)
305{
306 struct dst_entry *dst = __sk_dst_check(sk, 0);
307
1ed5c48f 308 if (dst)
6700c270 309 dst->ops->redirect(dst, sk, skb);
55be7a9c
DM
310}
311
1da177e4
LT
312/*
313 * This routine is called by the ICMP module when it gets some
314 * sort of error condition. If err < 0 then the socket should
315 * be closed and the error returned to the user. If err > 0
316 * it's just the icmp type << 8 | icmp code. After adjustment
317 * header points to the first 8 bytes of the tcp header. We need
318 * to find the appropriate port.
319 *
320 * The locking strategy used here is very "optimistic". When
321 * someone else accesses the socket the ICMP is just dropped
322 * and for some paths there is no check at all.
323 * A more general error queue to queue errors for later handling
324 * is probably better.
325 *
326 */
327
4d1a2d9e 328void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 329{
b71d1d42 330 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 331 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 332 struct inet_connection_sock *icsk;
1da177e4
LT
333 struct tcp_sock *tp;
334 struct inet_sock *inet;
4d1a2d9e
DL
335 const int type = icmp_hdr(icmp_skb)->type;
336 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 337 struct sock *sk;
f1ecd5d9 338 struct sk_buff *skb;
0a672f74
YC
339 struct request_sock *fastopen;
340 __u32 seq, snd_una;
f1ecd5d9 341 __u32 remaining;
1da177e4 342 int err;
4d1a2d9e 343 struct net *net = dev_net(icmp_skb->dev);
1da177e4 344
4d1a2d9e 345 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 346 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
347 return;
348 }
349
fd54d716 350 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 351 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 352 if (!sk) {
dcfc23ca 353 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
354 return;
355 }
356 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 357 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
358 return;
359 }
360
361 bh_lock_sock(sk);
362 /* If too many ICMPs get dropped on busy
363 * servers this needs to be solved differently.
563d34d0
ED
364 * We do take care of PMTU discovery (RFC1191) special case :
365 * we can receive locally generated ICMP messages while socket is held.
1da177e4 366 */
b74aa930
ED
367 if (sock_owned_by_user(sk)) {
368 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
369 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
370 }
1da177e4
LT
371 if (sk->sk_state == TCP_CLOSE)
372 goto out;
373
97e3ecd1 374 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
375 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
376 goto out;
377 }
378
f1ecd5d9 379 icsk = inet_csk(sk);
1da177e4
LT
380 tp = tcp_sk(sk);
381 seq = ntohl(th->seq);
0a672f74
YC
382 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
383 fastopen = tp->fastopen_rsk;
384 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
1da177e4 385 if (sk->sk_state != TCP_LISTEN &&
0a672f74 386 !between(seq, snd_una, tp->snd_nxt)) {
de0744af 387 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
388 goto out;
389 }
390
391 switch (type) {
55be7a9c
DM
392 case ICMP_REDIRECT:
393 do_redirect(icmp_skb, sk);
394 goto out;
1da177e4
LT
395 case ICMP_SOURCE_QUENCH:
396 /* Just silently ignore these. */
397 goto out;
398 case ICMP_PARAMETERPROB:
399 err = EPROTO;
400 break;
401 case ICMP_DEST_UNREACH:
402 if (code > NR_ICMP_UNREACH)
403 goto out;
404
405 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
0d4f0608
ED
406 /* We are not interested in TCP_LISTEN and open_requests
407 * (SYN-ACKs send out by Linux are always <576bytes so
408 * they should go through unfragmented).
409 */
410 if (sk->sk_state == TCP_LISTEN)
411 goto out;
412
563d34d0 413 tp->mtu_info = info;
144d56e9 414 if (!sock_owned_by_user(sk)) {
563d34d0 415 tcp_v4_mtu_reduced(sk);
144d56e9
ED
416 } else {
417 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
418 sock_hold(sk);
419 }
1da177e4
LT
420 goto out;
421 }
422
423 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
424 /* check if icmp_skb allows revert of backoff
425 * (see draft-zimmermann-tcp-lcd) */
426 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
427 break;
428 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
0a672f74 429 !icsk->icsk_backoff || fastopen)
f1ecd5d9
DL
430 break;
431
8f49c270
DM
432 if (sock_owned_by_user(sk))
433 break;
434
f1ecd5d9 435 icsk->icsk_backoff--;
740b0f18 436 inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
9ad7c049 437 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
438 tcp_bound_rto(sk);
439
440 skb = tcp_write_queue_head(sk);
441 BUG_ON(!skb);
442
443 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
444 tcp_time_stamp - TCP_SKB_CB(skb)->when);
445
446 if (remaining) {
447 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
448 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
449 } else {
450 /* RTO revert clocked out retransmission.
451 * Will retransmit now */
452 tcp_retransmit_timer(sk);
453 }
454
1da177e4
LT
455 break;
456 case ICMP_TIME_EXCEEDED:
457 err = EHOSTUNREACH;
458 break;
459 default:
460 goto out;
461 }
462
463 switch (sk->sk_state) {
60236fdd 464 struct request_sock *req, **prev;
1da177e4
LT
465 case TCP_LISTEN:
466 if (sock_owned_by_user(sk))
467 goto out;
468
463c84b9
ACM
469 req = inet_csk_search_req(sk, &prev, th->dest,
470 iph->daddr, iph->saddr);
1da177e4
LT
471 if (!req)
472 goto out;
473
474 /* ICMPs are not backlogged, hence we cannot get
475 an established socket here.
476 */
547b792c 477 WARN_ON(req->sk);
1da177e4 478
2e6599cb 479 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 480 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
481 goto out;
482 }
483
484 /*
485 * Still in SYN_RECV, just remove it silently.
486 * There is no good way to pass the error to the newly
487 * created socket, and POSIX does not want network
488 * errors returned from accept().
489 */
463c84b9 490 inet_csk_reqsk_queue_drop(sk, req, prev);
848bf15f 491 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
492 goto out;
493
494 case TCP_SYN_SENT:
0a672f74
YC
495 case TCP_SYN_RECV:
496 /* Only in fast or simultaneous open. If a fast open socket is
497 * is already accepted it is treated as a connected one below.
498 */
499 if (fastopen && fastopen->sk == NULL)
500 break;
501
1da177e4 502 if (!sock_owned_by_user(sk)) {
1da177e4
LT
503 sk->sk_err = err;
504
505 sk->sk_error_report(sk);
506
507 tcp_done(sk);
508 } else {
509 sk->sk_err_soft = err;
510 }
511 goto out;
512 }
513
514 /* If we've already connected we will keep trying
515 * until we time out, or the user gives up.
516 *
517 * rfc1122 4.2.3.9 allows to consider as hard errors
518 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
519 * but it is obsoleted by pmtu discovery).
520 *
521 * Note, that in modern internet, where routing is unreliable
522 * and in each dark corner broken firewalls sit, sending random
523 * errors ordered by their masters even this two messages finally lose
524 * their original sense (even Linux sends invalid PORT_UNREACHs)
525 *
526 * Now we are in compliance with RFCs.
527 * --ANK (980905)
528 */
529
530 inet = inet_sk(sk);
531 if (!sock_owned_by_user(sk) && inet->recverr) {
532 sk->sk_err = err;
533 sk->sk_error_report(sk);
534 } else { /* Only an error on timeout */
535 sk->sk_err_soft = err;
536 }
537
538out:
539 bh_unlock_sock(sk);
540 sock_put(sk);
541}
542
28850dc7 543void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 544{
aa8223c7 545 struct tcphdr *th = tcp_hdr(skb);
1da177e4 546
84fa7933 547 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 548 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 549 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 550 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 551 } else {
419f9f89 552 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 553 csum_partial(th,
1da177e4
LT
554 th->doff << 2,
555 skb->csum));
556 }
557}
558
419f9f89 559/* This routine computes an IPv4 TCP checksum. */
bb296246 560void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 561{
cf533ea5 562 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
563
564 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
565}
4bc2f18b 566EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 567
1da177e4
LT
568/*
569 * This routine will send an RST to the other tcp.
570 *
571 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
572 * for reset.
573 * Answer: if a packet caused RST, it is not for a socket
574 * existing in our system, if it is matched to a socket,
575 * it is just duplicate segment or bug in other side's TCP.
576 * So that we build reply only basing on parameters
577 * arrived with segment.
578 * Exception: precedence violation. We do not implement it in any case.
579 */
580
cfb6eeb4 581static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 582{
cf533ea5 583 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
584 struct {
585 struct tcphdr th;
586#ifdef CONFIG_TCP_MD5SIG
714e85be 587 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
588#endif
589 } rep;
1da177e4 590 struct ip_reply_arg arg;
cfb6eeb4
YH
591#ifdef CONFIG_TCP_MD5SIG
592 struct tcp_md5sig_key *key;
658ddaaf
SL
593 const __u8 *hash_location = NULL;
594 unsigned char newhash[16];
595 int genhash;
596 struct sock *sk1 = NULL;
cfb6eeb4 597#endif
a86b1e30 598 struct net *net;
1da177e4
LT
599
600 /* Never send a reset in response to a reset. */
601 if (th->rst)
602 return;
603
511c3f92 604 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
605 return;
606
607 /* Swap the send and the receive. */
cfb6eeb4
YH
608 memset(&rep, 0, sizeof(rep));
609 rep.th.dest = th->source;
610 rep.th.source = th->dest;
611 rep.th.doff = sizeof(struct tcphdr) / 4;
612 rep.th.rst = 1;
1da177e4
LT
613
614 if (th->ack) {
cfb6eeb4 615 rep.th.seq = th->ack_seq;
1da177e4 616 } else {
cfb6eeb4
YH
617 rep.th.ack = 1;
618 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
619 skb->len - (th->doff << 2));
1da177e4
LT
620 }
621
7174259e 622 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
623 arg.iov[0].iov_base = (unsigned char *)&rep;
624 arg.iov[0].iov_len = sizeof(rep.th);
625
626#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
627 hash_location = tcp_parse_md5sig_option(th);
628 if (!sk && hash_location) {
629 /*
630 * active side is lost. Try to find listening socket through
631 * source port, and then find md5 key through listening socket.
632 * we are not loose security here:
633 * Incoming packet is checked with md5 hash with finding key,
634 * no RST generated if md5 hash doesn't match.
635 */
636 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
da5e3630
TH
637 &tcp_hashinfo, ip_hdr(skb)->saddr,
638 th->source, ip_hdr(skb)->daddr,
658ddaaf
SL
639 ntohs(th->source), inet_iif(skb));
640 /* don't send rst if it can't find key */
641 if (!sk1)
642 return;
643 rcu_read_lock();
644 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
645 &ip_hdr(skb)->saddr, AF_INET);
646 if (!key)
647 goto release_sk1;
648
649 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
650 if (genhash || memcmp(hash_location, newhash, 16) != 0)
651 goto release_sk1;
652 } else {
653 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
654 &ip_hdr(skb)->saddr,
655 AF_INET) : NULL;
656 }
657
cfb6eeb4
YH
658 if (key) {
659 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
660 (TCPOPT_NOP << 16) |
661 (TCPOPT_MD5SIG << 8) |
662 TCPOLEN_MD5SIG);
663 /* Update length and the length the header thinks exists */
664 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
665 rep.th.doff = arg.iov[0].iov_len / 4;
666
49a72dfb 667 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
668 key, ip_hdr(skb)->saddr,
669 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
670 }
671#endif
eddc9ec5
ACM
672 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
673 ip_hdr(skb)->saddr, /* XXX */
52cd5750 674 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 675 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 676 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa 677 /* When socket is gone, all binding information is lost.
4c675258
AK
678 * routing might fail in this case. No choice here, if we choose to force
679 * input interface, we will misroute in case of asymmetric route.
e2446eaa 680 */
4c675258
AK
681 if (sk)
682 arg.bound_dev_if = sk->sk_bound_dev_if;
1da177e4 683
adf30907 684 net = dev_net(skb_dst(skb)->dev);
66b13d99 685 arg.tos = ip_hdr(skb)->tos;
be9f4a44 686 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 687 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 688
63231bdd
PE
689 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
690 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
691
692#ifdef CONFIG_TCP_MD5SIG
693release_sk1:
694 if (sk1) {
695 rcu_read_unlock();
696 sock_put(sk1);
697 }
698#endif
1da177e4
LT
699}
700
701/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
702 outside socket context is ugly, certainly. What can I do?
703 */
704
9501f972 705static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
ee684b6f 706 u32 win, u32 tsval, u32 tsecr, int oif,
88ef4a5a 707 struct tcp_md5sig_key *key,
66b13d99 708 int reply_flags, u8 tos)
1da177e4 709{
cf533ea5 710 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
711 struct {
712 struct tcphdr th;
714e85be 713 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 714#ifdef CONFIG_TCP_MD5SIG
714e85be 715 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
716#endif
717 ];
1da177e4
LT
718 } rep;
719 struct ip_reply_arg arg;
adf30907 720 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
721
722 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 723 memset(&arg, 0, sizeof(arg));
1da177e4
LT
724
725 arg.iov[0].iov_base = (unsigned char *)&rep;
726 arg.iov[0].iov_len = sizeof(rep.th);
ee684b6f 727 if (tsecr) {
cfb6eeb4
YH
728 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
729 (TCPOPT_TIMESTAMP << 8) |
730 TCPOLEN_TIMESTAMP);
ee684b6f
AV
731 rep.opt[1] = htonl(tsval);
732 rep.opt[2] = htonl(tsecr);
cb48cfe8 733 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
734 }
735
736 /* Swap the send and the receive. */
737 rep.th.dest = th->source;
738 rep.th.source = th->dest;
739 rep.th.doff = arg.iov[0].iov_len / 4;
740 rep.th.seq = htonl(seq);
741 rep.th.ack_seq = htonl(ack);
742 rep.th.ack = 1;
743 rep.th.window = htons(win);
744
cfb6eeb4 745#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 746 if (key) {
ee684b6f 747 int offset = (tsecr) ? 3 : 0;
cfb6eeb4
YH
748
749 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
750 (TCPOPT_NOP << 16) |
751 (TCPOPT_MD5SIG << 8) |
752 TCPOLEN_MD5SIG);
753 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
754 rep.th.doff = arg.iov[0].iov_len/4;
755
49a72dfb 756 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
757 key, ip_hdr(skb)->saddr,
758 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
759 }
760#endif
88ef4a5a 761 arg.flags = reply_flags;
eddc9ec5
ACM
762 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
763 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
764 arg.iov[0].iov_len, IPPROTO_TCP, 0);
765 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
766 if (oif)
767 arg.bound_dev_if = oif;
66b13d99 768 arg.tos = tos;
be9f4a44 769 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 770 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 771
63231bdd 772 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
773}
774
775static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
776{
8feaf0c0 777 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 778 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 779
9501f972 780 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 781 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
ee684b6f 782 tcp_time_stamp + tcptw->tw_ts_offset,
9501f972
YH
783 tcptw->tw_ts_recent,
784 tw->tw_bound_dev_if,
88ef4a5a 785 tcp_twsk_md5_key(tcptw),
66b13d99
ED
786 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
787 tw->tw_tos
9501f972 788 );
1da177e4 789
8feaf0c0 790 inet_twsk_put(tw);
1da177e4
LT
791}
792
6edafaaf 793static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 794 struct request_sock *req)
1da177e4 795{
168a8f58
JC
796 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
797 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
798 */
799 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
800 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
801 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
ee684b6f 802 tcp_time_stamp,
9501f972
YH
803 req->ts_recent,
804 0,
a915da9b
ED
805 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
806 AF_INET),
66b13d99
ED
807 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
808 ip_hdr(skb)->tos);
1da177e4
LT
809}
810
1da177e4 811/*
9bf1d83e 812 * Send a SYN-ACK after having received a SYN.
60236fdd 813 * This still operates on a request_sock only, not on a big
1da177e4
LT
814 * socket.
815 */
72659ecc 816static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
d6274bd8 817 struct flowi *fl,
72659ecc 818 struct request_sock *req,
843f4a55
YC
819 u16 queue_mapping,
820 struct tcp_fastopen_cookie *foc)
1da177e4 821{
2e6599cb 822 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 823 struct flowi4 fl4;
1da177e4 824 int err = -1;
d41db5af 825 struct sk_buff *skb;
1da177e4
LT
826
827 /* First, grab a route. */
ba3f7f04 828 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 829 return -1;
1da177e4 830
843f4a55 831 skb = tcp_make_synack(sk, dst, req, foc);
1da177e4
LT
832
833 if (skb) {
634fb979 834 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
1da177e4 835
fff32699 836 skb_set_queue_mapping(skb, queue_mapping);
634fb979
ED
837 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
838 ireq->ir_rmt_addr,
2e6599cb 839 ireq->opt);
b9df3cb8 840 err = net_xmit_eval(err);
016818d0
NC
841 if (!tcp_rsk(req)->snt_synack && !err)
842 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4
LT
843 }
844
1da177e4
LT
845 return err;
846}
847
848/*
60236fdd 849 * IPv4 request_sock destructor.
1da177e4 850 */
60236fdd 851static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 852{
a51482bd 853 kfree(inet_rsk(req)->opt);
1da177e4
LT
854}
855
946cedcc 856/*
a2a385d6 857 * Return true if a syncookie should be sent
946cedcc 858 */
a2a385d6 859bool tcp_syn_flood_action(struct sock *sk,
946cedcc
ED
860 const struct sk_buff *skb,
861 const char *proto)
1da177e4 862{
946cedcc 863 const char *msg = "Dropping request";
a2a385d6 864 bool want_cookie = false;
946cedcc
ED
865 struct listen_sock *lopt;
866
2a1d4bd4 867#ifdef CONFIG_SYN_COOKIES
946cedcc 868 if (sysctl_tcp_syncookies) {
2a1d4bd4 869 msg = "Sending cookies";
a2a385d6 870 want_cookie = true;
946cedcc
ED
871 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
872 } else
80e40daa 873#endif
946cedcc
ED
874 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
875
876 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
5ad37d5d 877 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
946cedcc 878 lopt->synflood_warned = 1;
afd46503 879 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
880 proto, ntohs(tcp_hdr(skb)->dest), msg);
881 }
882 return want_cookie;
2a1d4bd4 883}
946cedcc 884EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
885
886/*
60236fdd 887 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 888 */
5dff747b 889static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
1da177e4 890{
f6d8bd05
ED
891 const struct ip_options *opt = &(IPCB(skb)->opt);
892 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
893
894 if (opt && opt->optlen) {
f6d8bd05
ED
895 int opt_size = sizeof(*dopt) + opt->optlen;
896
1da177e4
LT
897 dopt = kmalloc(opt_size, GFP_ATOMIC);
898 if (dopt) {
f6d8bd05 899 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
900 kfree(dopt);
901 dopt = NULL;
902 }
903 }
904 }
905 return dopt;
906}
907
cfb6eeb4
YH
908#ifdef CONFIG_TCP_MD5SIG
909/*
910 * RFC2385 MD5 checksumming requires a mapping of
911 * IP address->MD5 Key.
912 * We need to maintain these in the sk structure.
913 */
914
915/* Find the Key structure for an address. */
a915da9b
ED
916struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
917 const union tcp_md5_addr *addr,
918 int family)
cfb6eeb4
YH
919{
920 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 921 struct tcp_md5sig_key *key;
a915da9b 922 unsigned int size = sizeof(struct in_addr);
a8afca03 923 struct tcp_md5sig_info *md5sig;
cfb6eeb4 924
a8afca03
ED
925 /* caller either holds rcu_read_lock() or socket lock */
926 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
927 sock_owned_by_user(sk) ||
928 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 929 if (!md5sig)
cfb6eeb4 930 return NULL;
a915da9b
ED
931#if IS_ENABLED(CONFIG_IPV6)
932 if (family == AF_INET6)
933 size = sizeof(struct in6_addr);
934#endif
b67bfe0d 935 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
a915da9b
ED
936 if (key->family != family)
937 continue;
938 if (!memcmp(&key->addr, addr, size))
939 return key;
cfb6eeb4
YH
940 }
941 return NULL;
942}
a915da9b 943EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
944
945struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
946 struct sock *addr_sk)
947{
a915da9b
ED
948 union tcp_md5_addr *addr;
949
950 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
951 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 952}
cfb6eeb4
YH
953EXPORT_SYMBOL(tcp_v4_md5_lookup);
954
f5b99bcd
AB
955static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
956 struct request_sock *req)
cfb6eeb4 957{
a915da9b
ED
958 union tcp_md5_addr *addr;
959
634fb979 960 addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
a915da9b 961 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
962}
963
964/* This can be called on a newly created socket, from other files */
a915da9b
ED
965int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
966 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
967{
968 /* Add Key to the list */
b0a713e9 969 struct tcp_md5sig_key *key;
cfb6eeb4 970 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 971 struct tcp_md5sig_info *md5sig;
cfb6eeb4 972
c0353c7b 973 key = tcp_md5_do_lookup(sk, addr, family);
cfb6eeb4
YH
974 if (key) {
975 /* Pre-existing entry - just update that one. */
a915da9b 976 memcpy(key->key, newkey, newkeylen);
b0a713e9 977 key->keylen = newkeylen;
a915da9b
ED
978 return 0;
979 }
260fcbeb 980
a8afca03
ED
981 md5sig = rcu_dereference_protected(tp->md5sig_info,
982 sock_owned_by_user(sk));
a915da9b
ED
983 if (!md5sig) {
984 md5sig = kmalloc(sizeof(*md5sig), gfp);
985 if (!md5sig)
cfb6eeb4 986 return -ENOMEM;
cfb6eeb4 987
a915da9b
ED
988 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
989 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 990 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 991 }
cfb6eeb4 992
5f3d9cb2 993 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
994 if (!key)
995 return -ENOMEM;
71cea17e 996 if (!tcp_alloc_md5sig_pool()) {
5f3d9cb2 997 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 998 return -ENOMEM;
cfb6eeb4 999 }
a915da9b
ED
1000
1001 memcpy(key->key, newkey, newkeylen);
1002 key->keylen = newkeylen;
1003 key->family = family;
1004 memcpy(&key->addr, addr,
1005 (family == AF_INET6) ? sizeof(struct in6_addr) :
1006 sizeof(struct in_addr));
1007 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1008 return 0;
1009}
a915da9b 1010EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1011
a915da9b 1012int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4 1013{
a915da9b
ED
1014 struct tcp_md5sig_key *key;
1015
c0353c7b 1016 key = tcp_md5_do_lookup(sk, addr, family);
a915da9b
ED
1017 if (!key)
1018 return -ENOENT;
1019 hlist_del_rcu(&key->node);
5f3d9cb2 1020 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1021 kfree_rcu(key, rcu);
a915da9b 1022 return 0;
cfb6eeb4 1023}
a915da9b 1024EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1025
e0683e70 1026static void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1027{
1028 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1029 struct tcp_md5sig_key *key;
b67bfe0d 1030 struct hlist_node *n;
a8afca03 1031 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1032
a8afca03
ED
1033 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1034
b67bfe0d 1035 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
a915da9b 1036 hlist_del_rcu(&key->node);
5f3d9cb2 1037 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1038 kfree_rcu(key, rcu);
cfb6eeb4
YH
1039 }
1040}
1041
7174259e
ACM
1042static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1043 int optlen)
cfb6eeb4
YH
1044{
1045 struct tcp_md5sig cmd;
1046 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1047
1048 if (optlen < sizeof(cmd))
1049 return -EINVAL;
1050
7174259e 1051 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1052 return -EFAULT;
1053
1054 if (sin->sin_family != AF_INET)
1055 return -EINVAL;
1056
a8afca03 1057 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1058 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1059 AF_INET);
cfb6eeb4
YH
1060
1061 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1062 return -EINVAL;
1063
a915da9b
ED
1064 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1065 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1066 GFP_KERNEL);
cfb6eeb4
YH
1067}
1068
49a72dfb
AL
1069static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1070 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1071{
cfb6eeb4 1072 struct tcp4_pseudohdr *bp;
49a72dfb 1073 struct scatterlist sg;
cfb6eeb4
YH
1074
1075 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1076
1077 /*
49a72dfb 1078 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1079 * destination IP address, zero-padded protocol number, and
1080 * segment length)
1081 */
1082 bp->saddr = saddr;
1083 bp->daddr = daddr;
1084 bp->pad = 0;
076fb722 1085 bp->protocol = IPPROTO_TCP;
49a72dfb 1086 bp->len = cpu_to_be16(nbytes);
c7da57a1 1087
49a72dfb
AL
1088 sg_init_one(&sg, bp, sizeof(*bp));
1089 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1090}
1091
a915da9b 1092static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1093 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1094{
1095 struct tcp_md5sig_pool *hp;
1096 struct hash_desc *desc;
1097
1098 hp = tcp_get_md5sig_pool();
1099 if (!hp)
1100 goto clear_hash_noput;
1101 desc = &hp->md5_desc;
1102
1103 if (crypto_hash_init(desc))
1104 goto clear_hash;
1105 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1106 goto clear_hash;
1107 if (tcp_md5_hash_header(hp, th))
1108 goto clear_hash;
1109 if (tcp_md5_hash_key(hp, key))
1110 goto clear_hash;
1111 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1112 goto clear_hash;
1113
cfb6eeb4 1114 tcp_put_md5sig_pool();
cfb6eeb4 1115 return 0;
49a72dfb 1116
cfb6eeb4
YH
1117clear_hash:
1118 tcp_put_md5sig_pool();
1119clear_hash_noput:
1120 memset(md5_hash, 0, 16);
49a72dfb 1121 return 1;
cfb6eeb4
YH
1122}
1123
49a72dfb 1124int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1125 const struct sock *sk, const struct request_sock *req,
1126 const struct sk_buff *skb)
cfb6eeb4 1127{
49a72dfb
AL
1128 struct tcp_md5sig_pool *hp;
1129 struct hash_desc *desc;
318cf7aa 1130 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1131 __be32 saddr, daddr;
1132
1133 if (sk) {
c720c7e8
ED
1134 saddr = inet_sk(sk)->inet_saddr;
1135 daddr = inet_sk(sk)->inet_daddr;
49a72dfb 1136 } else if (req) {
634fb979
ED
1137 saddr = inet_rsk(req)->ir_loc_addr;
1138 daddr = inet_rsk(req)->ir_rmt_addr;
cfb6eeb4 1139 } else {
49a72dfb
AL
1140 const struct iphdr *iph = ip_hdr(skb);
1141 saddr = iph->saddr;
1142 daddr = iph->daddr;
cfb6eeb4 1143 }
49a72dfb
AL
1144
1145 hp = tcp_get_md5sig_pool();
1146 if (!hp)
1147 goto clear_hash_noput;
1148 desc = &hp->md5_desc;
1149
1150 if (crypto_hash_init(desc))
1151 goto clear_hash;
1152
1153 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1154 goto clear_hash;
1155 if (tcp_md5_hash_header(hp, th))
1156 goto clear_hash;
1157 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1158 goto clear_hash;
1159 if (tcp_md5_hash_key(hp, key))
1160 goto clear_hash;
1161 if (crypto_hash_final(desc, md5_hash))
1162 goto clear_hash;
1163
1164 tcp_put_md5sig_pool();
1165 return 0;
1166
1167clear_hash:
1168 tcp_put_md5sig_pool();
1169clear_hash_noput:
1170 memset(md5_hash, 0, 16);
1171 return 1;
cfb6eeb4 1172}
49a72dfb 1173EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1174
a2a385d6 1175static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1176{
1177 /*
1178 * This gets called for each TCP segment that arrives
1179 * so we want to be efficient.
1180 * We have 3 drop cases:
1181 * o No MD5 hash and one expected.
1182 * o MD5 hash and we're not expecting one.
1183 * o MD5 hash and its wrong.
1184 */
cf533ea5 1185 const __u8 *hash_location = NULL;
cfb6eeb4 1186 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1187 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1188 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1189 int genhash;
cfb6eeb4
YH
1190 unsigned char newhash[16];
1191
a915da9b
ED
1192 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1193 AF_INET);
7d5d5525 1194 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1195
cfb6eeb4
YH
1196 /* We've parsed the options - do we have a hash? */
1197 if (!hash_expected && !hash_location)
a2a385d6 1198 return false;
cfb6eeb4
YH
1199
1200 if (hash_expected && !hash_location) {
785957d3 1201 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1202 return true;
cfb6eeb4
YH
1203 }
1204
1205 if (!hash_expected && hash_location) {
785957d3 1206 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1207 return true;
cfb6eeb4
YH
1208 }
1209
1210 /* Okay, so this is hash_expected and hash_location -
1211 * so we need to calculate the checksum.
1212 */
49a72dfb
AL
1213 genhash = tcp_v4_md5_hash_skb(newhash,
1214 hash_expected,
1215 NULL, NULL, skb);
cfb6eeb4
YH
1216
1217 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
e87cc472
JP
1218 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1219 &iph->saddr, ntohs(th->source),
1220 &iph->daddr, ntohs(th->dest),
1221 genhash ? " tcp_v4_calc_md5_hash failed"
1222 : "");
a2a385d6 1223 return true;
cfb6eeb4 1224 }
a2a385d6 1225 return false;
cfb6eeb4
YH
1226}
1227
1228#endif
1229
16bea70a
OP
1230static void tcp_v4_init_req(struct request_sock *req, struct sock *sk,
1231 struct sk_buff *skb)
1232{
1233 struct inet_request_sock *ireq = inet_rsk(req);
1234
1235 ireq->ir_loc_addr = ip_hdr(skb)->daddr;
1236 ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
1237 ireq->no_srccheck = inet_sk(sk)->transparent;
1238 ireq->opt = tcp_v4_save_options(skb);
1239}
1240
d94e0417
OP
1241static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
1242 const struct request_sock *req,
1243 bool *strict)
1244{
1245 struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1246
1247 if (strict) {
1248 if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1249 *strict = true;
1250 else
1251 *strict = false;
1252 }
1253
1254 return dst;
1255}
1256
72a3effa 1257struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1258 .family = PF_INET,
2e6599cb 1259 .obj_size = sizeof(struct tcp_request_sock),
5db92c99 1260 .rtx_syn_ack = tcp_rtx_synack,
60236fdd
ACM
1261 .send_ack = tcp_v4_reqsk_send_ack,
1262 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1263 .send_reset = tcp_v4_send_reset,
72659ecc 1264 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1265};
1266
b2e4b3de 1267static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
16bea70a 1268#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 1269 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1270 .calc_md5_hash = tcp_v4_md5_hash_skb,
b6332e6c 1271#endif
16bea70a 1272 .init_req = tcp_v4_init_req,
fb7b37a7
OP
1273#ifdef CONFIG_SYN_COOKIES
1274 .cookie_init_seq = cookie_v4_init_sequence,
1275#endif
d94e0417 1276 .route_req = tcp_v4_route_req,
936b8bdb 1277 .init_seq = tcp_v4_init_sequence,
d6274bd8 1278 .send_synack = tcp_v4_send_synack,
16bea70a 1279};
cfb6eeb4 1280
1da177e4
LT
1281int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1282{
1283 struct tcp_options_received tmp_opt;
60236fdd 1284 struct request_sock *req;
4957faad 1285 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1286 struct dst_entry *dst = NULL;
eddc9ec5 1287 __be32 saddr = ip_hdr(skb)->saddr;
1da177e4 1288 __u32 isn = TCP_SKB_CB(skb)->when;
843f4a55 1289 bool want_cookie = false, fastopen;
168a8f58
JC
1290 struct flowi4 fl4;
1291 struct tcp_fastopen_cookie foc = { .len = -1 };
16bea70a 1292 const struct tcp_request_sock_ops *af_ops;
843f4a55 1293 int err;
1da177e4
LT
1294
1295 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1296 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1297 goto drop;
1298
1299 /* TW buckets are converted to open requests without
1300 * limitations, they conserve resources and peer is
1301 * evidently real one.
1302 */
5ad37d5d
HFS
1303 if ((sysctl_tcp_syncookies == 2 ||
1304 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
946cedcc
ED
1305 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1306 if (!want_cookie)
1307 goto drop;
1da177e4
LT
1308 }
1309
1310 /* Accept backlog is full. If we have already queued enough
1311 * of warm entries in syn queue, drop request. It is better than
1312 * clogging syn queue with openreqs with exponentially increasing
1313 * timeout.
1314 */
2aeef18d
NS
1315 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1316 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1317 goto drop;
2aeef18d 1318 }
1da177e4 1319
ce4a7d0d 1320 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1321 if (!req)
1322 goto drop;
1323
16bea70a 1324 af_ops = tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
cfb6eeb4 1325
1da177e4 1326 tcp_clear_options(&tmp_opt);
bee7ca9e 1327 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1328 tmp_opt.user_mss = tp->rx_opt.user_mss;
1a2c6181 1329 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1da177e4 1330
4dfc2817 1331 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1332 tcp_clear_options(&tmp_opt);
1da177e4 1333
1da177e4 1334 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
e0f802fb 1335 tcp_openreq_init(req, &tmp_opt, skb, sk);
1da177e4 1336
16bea70a 1337 af_ops->init_req(req, sk, skb);
bb5b7c11 1338
284904aa 1339 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1340 goto drop_and_free;
284904aa 1341
172d69e6 1342 if (!want_cookie || tmp_opt.tstamp_ok)
5d134f1c 1343 TCP_ECN_create_request(req, skb, sock_net(sk));
1da177e4
LT
1344
1345 if (want_cookie) {
fb7b37a7 1346 isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
172d69e6 1347 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4 1348 } else if (!isn) {
1da177e4
LT
1349 /* VJ's idea. We save last timestamp seen
1350 * from the destination in peer table, when entering
1351 * state TIME-WAIT, and check against it before
1352 * accepting new connection request.
1353 *
1354 * If "isn" is not zero, this request hit alive
1355 * timewait bucket, so that all the necessary checks
1356 * are made in the function processing timewait state.
1357 */
d94e0417
OP
1358 if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) {
1359 bool strict;
1360
1361 dst = af_ops->route_req(sk, (struct flowi *)&fl4, req,
1362 &strict);
1363 if (dst && strict &&
1364 !tcp_peer_is_proven(req, dst, true)) {
de0744af 1365 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1366 goto drop_and_release;
1da177e4
LT
1367 }
1368 }
1369 /* Kill the following clause, if you dislike this way. */
1370 else if (!sysctl_tcp_syncookies &&
463c84b9 1371 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4 1372 (sysctl_max_syn_backlog >> 2)) &&
81166dd6 1373 !tcp_peer_is_proven(req, dst, false)) {
1da177e4
LT
1374 /* Without syncookies last quarter of
1375 * backlog is filled with destinations,
1376 * proven to be alive.
1377 * It means that we continue to communicate
1378 * to destinations, already remembered
1379 * to the moment of synflood.
1380 */
afd46503 1381 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1382 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1383 goto drop_and_release;
1da177e4
LT
1384 }
1385
936b8bdb 1386 isn = af_ops->init_seq(skb);
1da177e4 1387 }
d94e0417
OP
1388 if (!dst) {
1389 dst = af_ops->route_req(sk, (struct flowi *)&fl4, req, NULL);
1390 if (!dst)
1391 goto drop_and_free;
1392 }
168a8f58 1393
843f4a55 1394 tcp_rsk(req)->snt_isn = isn;
843f4a55
YC
1395 tcp_openreq_init_rwin(req, sk, dst);
1396 fastopen = !want_cookie &&
1397 tcp_try_fastopen(sk, skb, req, &foc, dst);
d6274bd8
OP
1398 err = af_ops->send_synack(sk, dst, NULL, req,
1399 skb_get_queue_mapping(skb), &foc);
843f4a55 1400 if (!fastopen) {
168a8f58
JC
1401 if (err || want_cookie)
1402 goto drop_and_free;
1403
1404 tcp_rsk(req)->listener = NULL;
168a8f58 1405 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
843f4a55 1406 }
1da177e4 1407
1da177e4
LT
1408 return 0;
1409
7cd04fa7
DL
1410drop_and_release:
1411 dst_release(dst);
1da177e4 1412drop_and_free:
60236fdd 1413 reqsk_free(req);
1da177e4 1414drop:
848bf15f 1415 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1416 return 0;
1417}
4bc2f18b 1418EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1419
1420
1421/*
1422 * The three way handshake has completed - we got a valid synack -
1423 * now create the new socket.
1424 */
1425struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1426 struct request_sock *req,
1da177e4
LT
1427 struct dst_entry *dst)
1428{
2e6599cb 1429 struct inet_request_sock *ireq;
1da177e4
LT
1430 struct inet_sock *newinet;
1431 struct tcp_sock *newtp;
1432 struct sock *newsk;
cfb6eeb4
YH
1433#ifdef CONFIG_TCP_MD5SIG
1434 struct tcp_md5sig_key *key;
1435#endif
f6d8bd05 1436 struct ip_options_rcu *inet_opt;
1da177e4
LT
1437
1438 if (sk_acceptq_is_full(sk))
1439 goto exit_overflow;
1440
1da177e4
LT
1441 newsk = tcp_create_openreq_child(sk, req, skb);
1442 if (!newsk)
093d2823 1443 goto exit_nonewsk;
1da177e4 1444
bcd76111 1445 newsk->sk_gso_type = SKB_GSO_TCPV4;
fae6ef87 1446 inet_sk_rx_dst_set(newsk, skb);
1da177e4
LT
1447
1448 newtp = tcp_sk(newsk);
1449 newinet = inet_sk(newsk);
2e6599cb 1450 ireq = inet_rsk(req);
634fb979
ED
1451 newinet->inet_daddr = ireq->ir_rmt_addr;
1452 newinet->inet_rcv_saddr = ireq->ir_loc_addr;
1453 newinet->inet_saddr = ireq->ir_loc_addr;
f6d8bd05
ED
1454 inet_opt = ireq->opt;
1455 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1456 ireq->opt = NULL;
463c84b9 1457 newinet->mc_index = inet_iif(skb);
eddc9ec5 1458 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1459 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1460 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1461 if (inet_opt)
1462 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1463 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1464
dfd25fff
ED
1465 if (!dst) {
1466 dst = inet_csk_route_child_sock(sk, newsk, req);
1467 if (!dst)
1468 goto put_and_exit;
1469 } else {
1470 /* syncookie case : see end of cookie_v4_check() */
1471 }
0e734419
DM
1472 sk_setup_caps(newsk, dst);
1473
1da177e4 1474 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1475 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1476 if (tcp_sk(sk)->rx_opt.user_mss &&
1477 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1478 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1479
1da177e4
LT
1480 tcp_initialize_rcv_mss(newsk);
1481
cfb6eeb4
YH
1482#ifdef CONFIG_TCP_MD5SIG
1483 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1484 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1485 AF_INET);
c720c7e8 1486 if (key != NULL) {
cfb6eeb4
YH
1487 /*
1488 * We're using one, so create a matching key
1489 * on the newsk structure. If we fail to get
1490 * memory, then we end up not copying the key
1491 * across. Shucks.
1492 */
a915da9b
ED
1493 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1494 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1495 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1496 }
1497#endif
1498
0e734419
DM
1499 if (__inet_inherit_port(sk, newsk) < 0)
1500 goto put_and_exit;
9327f705 1501 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1502
1503 return newsk;
1504
1505exit_overflow:
de0744af 1506 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1507exit_nonewsk:
1508 dst_release(dst);
1da177e4 1509exit:
de0744af 1510 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1511 return NULL;
0e734419 1512put_and_exit:
e337e24d
CP
1513 inet_csk_prepare_forced_close(newsk);
1514 tcp_done(newsk);
0e734419 1515 goto exit;
1da177e4 1516}
4bc2f18b 1517EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1518
1519static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1520{
aa8223c7 1521 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1522 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1523 struct sock *nsk;
60236fdd 1524 struct request_sock **prev;
1da177e4 1525 /* Find possible connection requests. */
463c84b9
ACM
1526 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1527 iph->saddr, iph->daddr);
1da177e4 1528 if (req)
8336886f 1529 return tcp_check_req(sk, skb, req, prev, false);
1da177e4 1530
3b1e0a65 1531 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1532 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1533
1534 if (nsk) {
1535 if (nsk->sk_state != TCP_TIME_WAIT) {
1536 bh_lock_sock(nsk);
1537 return nsk;
1538 }
9469c7b4 1539 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1540 return NULL;
1541 }
1542
1543#ifdef CONFIG_SYN_COOKIES
af9b4738 1544 if (!th->syn)
1da177e4
LT
1545 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1546#endif
1547 return sk;
1548}
1549
1da177e4
LT
1550/* The socket must have it's spinlock held when we get
1551 * here.
1552 *
1553 * We have a potential double-lock case here, so even when
1554 * doing backlog processing we use the BH locking scheme.
1555 * This is because we cannot sleep with the original spinlock
1556 * held.
1557 */
1558int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1559{
cfb6eeb4
YH
1560 struct sock *rsk;
1561#ifdef CONFIG_TCP_MD5SIG
1562 /*
1563 * We really want to reject the packet as early as possible
1564 * if:
1565 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1566 * o There is an MD5 option and we're not expecting one
1567 */
7174259e 1568 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1569 goto discard;
1570#endif
1571
1da177e4 1572 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
404e0a8b
ED
1573 struct dst_entry *dst = sk->sk_rx_dst;
1574
bdeab991 1575 sock_rps_save_rxhash(sk, skb);
404e0a8b 1576 if (dst) {
505fbcf0
ED
1577 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1578 dst->ops->check(dst, 0) == NULL) {
92101b3b
DM
1579 dst_release(dst);
1580 sk->sk_rx_dst = NULL;
1581 }
1582 }
c995ae22 1583 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1da177e4
LT
1584 return 0;
1585 }
1586
ab6a5bb6 1587 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1588 goto csum_err;
1589
1590 if (sk->sk_state == TCP_LISTEN) {
1591 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1592 if (!nsk)
1593 goto discard;
1594
1595 if (nsk != sk) {
bdeab991 1596 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1597 if (tcp_child_process(sk, nsk, skb)) {
1598 rsk = nsk;
1da177e4 1599 goto reset;
cfb6eeb4 1600 }
1da177e4
LT
1601 return 0;
1602 }
ca55158c 1603 } else
bdeab991 1604 sock_rps_save_rxhash(sk, skb);
ca55158c 1605
aa8223c7 1606 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1607 rsk = sk;
1da177e4 1608 goto reset;
cfb6eeb4 1609 }
1da177e4
LT
1610 return 0;
1611
1612reset:
cfb6eeb4 1613 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1614discard:
1615 kfree_skb(skb);
1616 /* Be careful here. If this function gets more complicated and
1617 * gcc suffers from register pressure on the x86, sk (in %ebx)
1618 * might be destroyed here. This current version compiles correctly,
1619 * but you have been warned.
1620 */
1621 return 0;
1622
1623csum_err:
6a5dc9e5 1624 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
63231bdd 1625 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1626 goto discard;
1627}
4bc2f18b 1628EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1629
160eb5a6 1630void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d 1631{
41063e9d
DM
1632 const struct iphdr *iph;
1633 const struct tcphdr *th;
1634 struct sock *sk;
41063e9d 1635
41063e9d 1636 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1637 return;
41063e9d 1638
45f00f99 1639 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
160eb5a6 1640 return;
41063e9d
DM
1641
1642 iph = ip_hdr(skb);
45f00f99 1643 th = tcp_hdr(skb);
41063e9d
DM
1644
1645 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1646 return;
41063e9d 1647
45f00f99 1648 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
41063e9d 1649 iph->saddr, th->source,
7011d085 1650 iph->daddr, ntohs(th->dest),
9cb429d6 1651 skb->skb_iif);
41063e9d
DM
1652 if (sk) {
1653 skb->sk = sk;
1654 skb->destructor = sock_edemux;
1655 if (sk->sk_state != TCP_TIME_WAIT) {
1656 struct dst_entry *dst = sk->sk_rx_dst;
505fbcf0 1657
41063e9d
DM
1658 if (dst)
1659 dst = dst_check(dst, 0);
92101b3b 1660 if (dst &&
505fbcf0 1661 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
92101b3b 1662 skb_dst_set_noref(skb, dst);
41063e9d
DM
1663 }
1664 }
41063e9d
DM
1665}
1666
b2fb4f54
ED
1667/* Packet is added to VJ-style prequeue for processing in process
1668 * context, if a reader task is waiting. Apparently, this exciting
1669 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1670 * failed somewhere. Latency? Burstiness? Well, at least now we will
1671 * see, why it failed. 8)8) --ANK
1672 *
1673 */
1674bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1675{
1676 struct tcp_sock *tp = tcp_sk(sk);
1677
1678 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1679 return false;
1680
1681 if (skb->len <= tcp_hdrlen(skb) &&
1682 skb_queue_len(&tp->ucopy.prequeue) == 0)
1683 return false;
1684
58717686 1685 skb_dst_force(skb);
b2fb4f54
ED
1686 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1687 tp->ucopy.memory += skb->truesize;
1688 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1689 struct sk_buff *skb1;
1690
1691 BUG_ON(sock_owned_by_user(sk));
1692
1693 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1694 sk_backlog_rcv(sk, skb1);
1695 NET_INC_STATS_BH(sock_net(sk),
1696 LINUX_MIB_TCPPREQUEUEDROPPED);
1697 }
1698
1699 tp->ucopy.memory = 0;
1700 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1701 wake_up_interruptible_sync_poll(sk_sleep(sk),
1702 POLLIN | POLLRDNORM | POLLRDBAND);
1703 if (!inet_csk_ack_scheduled(sk))
1704 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1705 (3 * tcp_rto_min(sk)) / 4,
1706 TCP_RTO_MAX);
1707 }
1708 return true;
1709}
1710EXPORT_SYMBOL(tcp_prequeue);
1711
1da177e4
LT
1712/*
1713 * From tcp_input.c
1714 */
1715
1716int tcp_v4_rcv(struct sk_buff *skb)
1717{
eddc9ec5 1718 const struct iphdr *iph;
cf533ea5 1719 const struct tcphdr *th;
1da177e4
LT
1720 struct sock *sk;
1721 int ret;
a86b1e30 1722 struct net *net = dev_net(skb->dev);
1da177e4
LT
1723
1724 if (skb->pkt_type != PACKET_HOST)
1725 goto discard_it;
1726
1727 /* Count it even if it's bad */
63231bdd 1728 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1729
1730 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1731 goto discard_it;
1732
aa8223c7 1733 th = tcp_hdr(skb);
1da177e4
LT
1734
1735 if (th->doff < sizeof(struct tcphdr) / 4)
1736 goto bad_packet;
1737 if (!pskb_may_pull(skb, th->doff * 4))
1738 goto discard_it;
1739
1740 /* An explanation is required here, I think.
1741 * Packet length and doff are validated by header prediction,
caa20d9a 1742 * provided case of th->doff==0 is eliminated.
1da177e4 1743 * So, we defer the checks. */
ed70fcfc
TH
1744
1745 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
6a5dc9e5 1746 goto csum_error;
1da177e4 1747
aa8223c7 1748 th = tcp_hdr(skb);
eddc9ec5 1749 iph = ip_hdr(skb);
1da177e4
LT
1750 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1751 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1752 skb->len - th->doff * 4);
1753 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1754 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1755 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1756 TCP_SKB_CB(skb)->sacked = 0;
1757
9a1f27c4 1758 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1759 if (!sk)
1760 goto no_tcp_socket;
1761
bb134d5d
ED
1762process:
1763 if (sk->sk_state == TCP_TIME_WAIT)
1764 goto do_time_wait;
1765
6cce09f8
ED
1766 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1767 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1768 goto discard_and_relse;
6cce09f8 1769 }
d218d111 1770
1da177e4
LT
1771 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1772 goto discard_and_relse;
b59c2701 1773 nf_reset(skb);
1da177e4 1774
fda9ef5d 1775 if (sk_filter(sk, skb))
1da177e4
LT
1776 goto discard_and_relse;
1777
8b80cda5 1778 sk_mark_napi_id(sk, skb);
1da177e4
LT
1779 skb->dev = NULL;
1780
c6366184 1781 bh_lock_sock_nested(sk);
1da177e4
LT
1782 ret = 0;
1783 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1784#ifdef CONFIG_NET_DMA
1785 struct tcp_sock *tp = tcp_sk(sk);
1786 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 1787 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 1788 if (tp->ucopy.dma_chan)
1da177e4 1789 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1790 else
1791#endif
1792 {
1793 if (!tcp_prequeue(sk, skb))
ae8d7f88 1794 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1795 }
da882c1f
ED
1796 } else if (unlikely(sk_add_backlog(sk, skb,
1797 sk->sk_rcvbuf + sk->sk_sndbuf))) {
6b03a53a 1798 bh_unlock_sock(sk);
6cce09f8 1799 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1800 goto discard_and_relse;
1801 }
1da177e4
LT
1802 bh_unlock_sock(sk);
1803
1804 sock_put(sk);
1805
1806 return ret;
1807
1808no_tcp_socket:
1809 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1810 goto discard_it;
1811
1812 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
6a5dc9e5
ED
1813csum_error:
1814 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1da177e4 1815bad_packet:
63231bdd 1816 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1817 } else {
cfb6eeb4 1818 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1819 }
1820
1821discard_it:
1822 /* Discard frame. */
1823 kfree_skb(skb);
e905a9ed 1824 return 0;
1da177e4
LT
1825
1826discard_and_relse:
1827 sock_put(sk);
1828 goto discard_it;
1829
1830do_time_wait:
1831 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1832 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1833 goto discard_it;
1834 }
1835
6a5dc9e5 1836 if (skb->len < (th->doff << 2)) {
9469c7b4 1837 inet_twsk_put(inet_twsk(sk));
6a5dc9e5
ED
1838 goto bad_packet;
1839 }
1840 if (tcp_checksum_complete(skb)) {
1841 inet_twsk_put(inet_twsk(sk));
1842 goto csum_error;
1da177e4 1843 }
9469c7b4 1844 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1845 case TCP_TW_SYN: {
c346dca1 1846 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1847 &tcp_hashinfo,
da5e3630 1848 iph->saddr, th->source,
eddc9ec5 1849 iph->daddr, th->dest,
463c84b9 1850 inet_iif(skb));
1da177e4 1851 if (sk2) {
9469c7b4
YH
1852 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1853 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1854 sk = sk2;
1855 goto process;
1856 }
1857 /* Fall through to ACK */
1858 }
1859 case TCP_TW_ACK:
1860 tcp_v4_timewait_ack(sk, skb);
1861 break;
1862 case TCP_TW_RST:
1863 goto no_tcp_socket;
1864 case TCP_TW_SUCCESS:;
1865 }
1866 goto discard_it;
1867}
1868
ccb7c410
DM
1869static struct timewait_sock_ops tcp_timewait_sock_ops = {
1870 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1871 .twsk_unique = tcp_twsk_unique,
1872 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 1873};
1da177e4 1874
63d02d15 1875void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5d299f3d
ED
1876{
1877 struct dst_entry *dst = skb_dst(skb);
1878
1879 dst_hold(dst);
1880 sk->sk_rx_dst = dst;
1881 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1882}
63d02d15 1883EXPORT_SYMBOL(inet_sk_rx_dst_set);
5d299f3d 1884
3b401a81 1885const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1886 .queue_xmit = ip_queue_xmit,
1887 .send_check = tcp_v4_send_check,
1888 .rebuild_header = inet_sk_rebuild_header,
5d299f3d 1889 .sk_rx_dst_set = inet_sk_rx_dst_set,
543d9cfe
ACM
1890 .conn_request = tcp_v4_conn_request,
1891 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
1892 .net_header_len = sizeof(struct iphdr),
1893 .setsockopt = ip_setsockopt,
1894 .getsockopt = ip_getsockopt,
1895 .addr2sockaddr = inet_csk_addr2sockaddr,
1896 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1897 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1898#ifdef CONFIG_COMPAT
543d9cfe
ACM
1899 .compat_setsockopt = compat_ip_setsockopt,
1900 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1901#endif
1da177e4 1902};
4bc2f18b 1903EXPORT_SYMBOL(ipv4_specific);
1da177e4 1904
cfb6eeb4 1905#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1906static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1907 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1908 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1909 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1910};
b6332e6c 1911#endif
cfb6eeb4 1912
1da177e4
LT
1913/* NOTE: A lot of things set to zero explicitly by call to
1914 * sk_alloc() so need not be done here.
1915 */
1916static int tcp_v4_init_sock(struct sock *sk)
1917{
6687e988 1918 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 1919
900f65d3 1920 tcp_init_sock(sk);
1da177e4 1921
8292a17a 1922 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 1923
cfb6eeb4 1924#ifdef CONFIG_TCP_MD5SIG
ac807fa8 1925 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 1926#endif
1da177e4 1927
1da177e4
LT
1928 return 0;
1929}
1930
7d06b2e0 1931void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1932{
1933 struct tcp_sock *tp = tcp_sk(sk);
1934
1935 tcp_clear_xmit_timers(sk);
1936
6687e988 1937 tcp_cleanup_congestion_control(sk);
317a76f9 1938
1da177e4 1939 /* Cleanup up the write buffer. */
fe067e8a 1940 tcp_write_queue_purge(sk);
1da177e4
LT
1941
1942 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1943 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1944
cfb6eeb4
YH
1945#ifdef CONFIG_TCP_MD5SIG
1946 /* Clean up the MD5 key list, if any */
1947 if (tp->md5sig_info) {
a915da9b 1948 tcp_clear_md5_list(sk);
a8afca03 1949 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
1950 tp->md5sig_info = NULL;
1951 }
1952#endif
1953
1a2449a8
CL
1954#ifdef CONFIG_NET_DMA
1955 /* Cleans up our sk_async_wait_queue */
e905a9ed 1956 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1957#endif
1958
1da177e4
LT
1959 /* Clean prequeue, it must be empty really */
1960 __skb_queue_purge(&tp->ucopy.prequeue);
1961
1962 /* Clean up a referenced TCP bind bucket. */
463c84b9 1963 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1964 inet_put_port(sk);
1da177e4 1965
168a8f58 1966 BUG_ON(tp->fastopen_rsk != NULL);
435cf559 1967
cf60af03
YC
1968 /* If socket is aborted during connect operation */
1969 tcp_free_fastopen_req(tp);
1970
180d8cd9 1971 sk_sockets_allocated_dec(sk);
d1a4c0b3 1972 sock_release_memcg(sk);
1da177e4 1973}
1da177e4
LT
1974EXPORT_SYMBOL(tcp_v4_destroy_sock);
1975
1976#ifdef CONFIG_PROC_FS
1977/* Proc filesystem TCP sock list dumping. */
1978
a8b690f9
TH
1979/*
1980 * Get next listener socket follow cur. If cur is NULL, get first socket
1981 * starting from bucket given in st->bucket; when st->bucket is zero the
1982 * very first socket in the hash table is returned.
1983 */
1da177e4
LT
1984static void *listening_get_next(struct seq_file *seq, void *cur)
1985{
463c84b9 1986 struct inet_connection_sock *icsk;
c25eb3bf 1987 struct hlist_nulls_node *node;
1da177e4 1988 struct sock *sk = cur;
5caea4ea 1989 struct inet_listen_hashbucket *ilb;
5799de0b 1990 struct tcp_iter_state *st = seq->private;
a4146b1b 1991 struct net *net = seq_file_net(seq);
1da177e4
LT
1992
1993 if (!sk) {
a8b690f9 1994 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 1995 spin_lock_bh(&ilb->lock);
c25eb3bf 1996 sk = sk_nulls_head(&ilb->head);
a8b690f9 1997 st->offset = 0;
1da177e4
LT
1998 goto get_sk;
1999 }
5caea4ea 2000 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2001 ++st->num;
a8b690f9 2002 ++st->offset;
1da177e4
LT
2003
2004 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2005 struct request_sock *req = cur;
1da177e4 2006
72a3effa 2007 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2008 req = req->dl_next;
2009 while (1) {
2010 while (req) {
bdccc4ca 2011 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2012 cur = req;
2013 goto out;
2014 }
2015 req = req->dl_next;
2016 }
72a3effa 2017 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2018 break;
2019get_req:
463c84b9 2020 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2021 }
1bde5ac4 2022 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2023 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2024 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2025 } else {
e905a9ed 2026 icsk = inet_csk(sk);
463c84b9
ACM
2027 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2028 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2029 goto start_req;
463c84b9 2030 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2031 sk = sk_nulls_next(sk);
1da177e4
LT
2032 }
2033get_sk:
c25eb3bf 2034 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2035 if (!net_eq(sock_net(sk), net))
2036 continue;
2037 if (sk->sk_family == st->family) {
1da177e4
LT
2038 cur = sk;
2039 goto out;
2040 }
e905a9ed 2041 icsk = inet_csk(sk);
463c84b9
ACM
2042 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2043 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2044start_req:
2045 st->uid = sock_i_uid(sk);
2046 st->syn_wait_sk = sk;
2047 st->state = TCP_SEQ_STATE_OPENREQ;
2048 st->sbucket = 0;
2049 goto get_req;
2050 }
463c84b9 2051 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2052 }
5caea4ea 2053 spin_unlock_bh(&ilb->lock);
a8b690f9 2054 st->offset = 0;
0f7ff927 2055 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2056 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2057 spin_lock_bh(&ilb->lock);
c25eb3bf 2058 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2059 goto get_sk;
2060 }
2061 cur = NULL;
2062out:
2063 return cur;
2064}
2065
2066static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2067{
a8b690f9
TH
2068 struct tcp_iter_state *st = seq->private;
2069 void *rc;
2070
2071 st->bucket = 0;
2072 st->offset = 0;
2073 rc = listening_get_next(seq, NULL);
1da177e4
LT
2074
2075 while (rc && *pos) {
2076 rc = listening_get_next(seq, rc);
2077 --*pos;
2078 }
2079 return rc;
2080}
2081
05dbc7b5 2082static inline bool empty_bucket(const struct tcp_iter_state *st)
6eac5604 2083{
05dbc7b5 2084 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
6eac5604
AK
2085}
2086
a8b690f9
TH
2087/*
2088 * Get first established socket starting from bucket given in st->bucket.
2089 * If st->bucket is zero, the very first socket in the hash is returned.
2090 */
1da177e4
LT
2091static void *established_get_first(struct seq_file *seq)
2092{
5799de0b 2093 struct tcp_iter_state *st = seq->private;
a4146b1b 2094 struct net *net = seq_file_net(seq);
1da177e4
LT
2095 void *rc = NULL;
2096
a8b690f9
TH
2097 st->offset = 0;
2098 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2099 struct sock *sk;
3ab5aee7 2100 struct hlist_nulls_node *node;
9db66bdc 2101 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2102
6eac5604
AK
2103 /* Lockless fast path for the common case of empty buckets */
2104 if (empty_bucket(st))
2105 continue;
2106
9db66bdc 2107 spin_lock_bh(lock);
3ab5aee7 2108 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2109 if (sk->sk_family != st->family ||
878628fb 2110 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2111 continue;
2112 }
2113 rc = sk;
2114 goto out;
2115 }
9db66bdc 2116 spin_unlock_bh(lock);
1da177e4
LT
2117 }
2118out:
2119 return rc;
2120}
2121
2122static void *established_get_next(struct seq_file *seq, void *cur)
2123{
2124 struct sock *sk = cur;
3ab5aee7 2125 struct hlist_nulls_node *node;
5799de0b 2126 struct tcp_iter_state *st = seq->private;
a4146b1b 2127 struct net *net = seq_file_net(seq);
1da177e4
LT
2128
2129 ++st->num;
a8b690f9 2130 ++st->offset;
1da177e4 2131
05dbc7b5 2132 sk = sk_nulls_next(sk);
1da177e4 2133
3ab5aee7 2134 sk_nulls_for_each_from(sk, node) {
878628fb 2135 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
05dbc7b5 2136 return sk;
1da177e4
LT
2137 }
2138
05dbc7b5
ED
2139 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2140 ++st->bucket;
2141 return established_get_first(seq);
1da177e4
LT
2142}
2143
2144static void *established_get_idx(struct seq_file *seq, loff_t pos)
2145{
a8b690f9
TH
2146 struct tcp_iter_state *st = seq->private;
2147 void *rc;
2148
2149 st->bucket = 0;
2150 rc = established_get_first(seq);
1da177e4
LT
2151
2152 while (rc && pos) {
2153 rc = established_get_next(seq, rc);
2154 --pos;
7174259e 2155 }
1da177e4
LT
2156 return rc;
2157}
2158
2159static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2160{
2161 void *rc;
5799de0b 2162 struct tcp_iter_state *st = seq->private;
1da177e4 2163
1da177e4
LT
2164 st->state = TCP_SEQ_STATE_LISTENING;
2165 rc = listening_get_idx(seq, &pos);
2166
2167 if (!rc) {
1da177e4
LT
2168 st->state = TCP_SEQ_STATE_ESTABLISHED;
2169 rc = established_get_idx(seq, pos);
2170 }
2171
2172 return rc;
2173}
2174
a8b690f9
TH
2175static void *tcp_seek_last_pos(struct seq_file *seq)
2176{
2177 struct tcp_iter_state *st = seq->private;
2178 int offset = st->offset;
2179 int orig_num = st->num;
2180 void *rc = NULL;
2181
2182 switch (st->state) {
2183 case TCP_SEQ_STATE_OPENREQ:
2184 case TCP_SEQ_STATE_LISTENING:
2185 if (st->bucket >= INET_LHTABLE_SIZE)
2186 break;
2187 st->state = TCP_SEQ_STATE_LISTENING;
2188 rc = listening_get_next(seq, NULL);
2189 while (offset-- && rc)
2190 rc = listening_get_next(seq, rc);
2191 if (rc)
2192 break;
2193 st->bucket = 0;
05dbc7b5 2194 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2195 /* Fallthrough */
2196 case TCP_SEQ_STATE_ESTABLISHED:
a8b690f9
TH
2197 if (st->bucket > tcp_hashinfo.ehash_mask)
2198 break;
2199 rc = established_get_first(seq);
2200 while (offset-- && rc)
2201 rc = established_get_next(seq, rc);
2202 }
2203
2204 st->num = orig_num;
2205
2206 return rc;
2207}
2208
1da177e4
LT
2209static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2210{
5799de0b 2211 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2212 void *rc;
2213
2214 if (*pos && *pos == st->last_pos) {
2215 rc = tcp_seek_last_pos(seq);
2216 if (rc)
2217 goto out;
2218 }
2219
1da177e4
LT
2220 st->state = TCP_SEQ_STATE_LISTENING;
2221 st->num = 0;
a8b690f9
TH
2222 st->bucket = 0;
2223 st->offset = 0;
2224 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2225
2226out:
2227 st->last_pos = *pos;
2228 return rc;
1da177e4
LT
2229}
2230
2231static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2232{
a8b690f9 2233 struct tcp_iter_state *st = seq->private;
1da177e4 2234 void *rc = NULL;
1da177e4
LT
2235
2236 if (v == SEQ_START_TOKEN) {
2237 rc = tcp_get_idx(seq, 0);
2238 goto out;
2239 }
1da177e4
LT
2240
2241 switch (st->state) {
2242 case TCP_SEQ_STATE_OPENREQ:
2243 case TCP_SEQ_STATE_LISTENING:
2244 rc = listening_get_next(seq, v);
2245 if (!rc) {
1da177e4 2246 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2247 st->bucket = 0;
2248 st->offset = 0;
1da177e4
LT
2249 rc = established_get_first(seq);
2250 }
2251 break;
2252 case TCP_SEQ_STATE_ESTABLISHED:
1da177e4
LT
2253 rc = established_get_next(seq, v);
2254 break;
2255 }
2256out:
2257 ++*pos;
a8b690f9 2258 st->last_pos = *pos;
1da177e4
LT
2259 return rc;
2260}
2261
2262static void tcp_seq_stop(struct seq_file *seq, void *v)
2263{
5799de0b 2264 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2265
2266 switch (st->state) {
2267 case TCP_SEQ_STATE_OPENREQ:
2268 if (v) {
463c84b9
ACM
2269 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2270 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2271 }
2272 case TCP_SEQ_STATE_LISTENING:
2273 if (v != SEQ_START_TOKEN)
5caea4ea 2274 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4 2275 break;
1da177e4
LT
2276 case TCP_SEQ_STATE_ESTABLISHED:
2277 if (v)
9db66bdc 2278 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2279 break;
2280 }
2281}
2282
73cb88ec 2283int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4 2284{
d9dda78b 2285 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
1da177e4 2286 struct tcp_iter_state *s;
52d6f3f1 2287 int err;
1da177e4 2288
52d6f3f1
DL
2289 err = seq_open_net(inode, file, &afinfo->seq_ops,
2290 sizeof(struct tcp_iter_state));
2291 if (err < 0)
2292 return err;
f40c8174 2293
52d6f3f1 2294 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2295 s->family = afinfo->family;
a8b690f9 2296 s->last_pos = 0;
f40c8174
DL
2297 return 0;
2298}
73cb88ec 2299EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2300
6f8b13bc 2301int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2302{
2303 int rc = 0;
2304 struct proc_dir_entry *p;
2305
9427c4b3
DL
2306 afinfo->seq_ops.start = tcp_seq_start;
2307 afinfo->seq_ops.next = tcp_seq_next;
2308 afinfo->seq_ops.stop = tcp_seq_stop;
2309
84841c3c 2310 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2311 afinfo->seq_fops, afinfo);
84841c3c 2312 if (!p)
1da177e4
LT
2313 rc = -ENOMEM;
2314 return rc;
2315}
4bc2f18b 2316EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2317
6f8b13bc 2318void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2319{
ece31ffd 2320 remove_proc_entry(afinfo->name, net->proc_net);
1da177e4 2321}
4bc2f18b 2322EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2323
cf533ea5 2324static void get_openreq4(const struct sock *sk, const struct request_sock *req,
652586df 2325 struct seq_file *f, int i, kuid_t uid)
1da177e4 2326{
2e6599cb 2327 const struct inet_request_sock *ireq = inet_rsk(req);
a399a805 2328 long delta = req->expires - jiffies;
1da177e4 2329
5e659e4c 2330 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2331 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
1da177e4 2332 i,
634fb979 2333 ireq->ir_loc_addr,
c720c7e8 2334 ntohs(inet_sk(sk)->inet_sport),
634fb979
ED
2335 ireq->ir_rmt_addr,
2336 ntohs(ireq->ir_rmt_port),
1da177e4
LT
2337 TCP_SYN_RECV,
2338 0, 0, /* could print option size, but that is af dependent. */
2339 1, /* timers active (only the expire timer) */
a399a805 2340 jiffies_delta_to_clock_t(delta),
e6c022a4 2341 req->num_timeout,
a7cb5a49 2342 from_kuid_munged(seq_user_ns(f), uid),
1da177e4
LT
2343 0, /* non standard timer */
2344 0, /* open_requests have no inode */
2345 atomic_read(&sk->sk_refcnt),
652586df 2346 req);
1da177e4
LT
2347}
2348
652586df 2349static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
1da177e4
LT
2350{
2351 int timer_active;
2352 unsigned long timer_expires;
cf533ea5 2353 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2354 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2355 const struct inet_sock *inet = inet_sk(sk);
168a8f58 2356 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
c720c7e8
ED
2357 __be32 dest = inet->inet_daddr;
2358 __be32 src = inet->inet_rcv_saddr;
2359 __u16 destp = ntohs(inet->inet_dport);
2360 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2361 int rx_queue;
1da177e4 2362
6ba8a3b1
ND
2363 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2364 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2365 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1da177e4 2366 timer_active = 1;
463c84b9
ACM
2367 timer_expires = icsk->icsk_timeout;
2368 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2369 timer_active = 4;
463c84b9 2370 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2371 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2372 timer_active = 2;
cf4c6bf8 2373 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2374 } else {
2375 timer_active = 0;
2376 timer_expires = jiffies;
2377 }
2378
49d09007
ED
2379 if (sk->sk_state == TCP_LISTEN)
2380 rx_queue = sk->sk_ack_backlog;
2381 else
2382 /*
2383 * because we dont lock socket, we might find a transient negative value
2384 */
2385 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2386
5e659e4c 2387 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
652586df 2388 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
cf4c6bf8 2389 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2390 tp->write_seq - tp->snd_una,
49d09007 2391 rx_queue,
1da177e4 2392 timer_active,
a399a805 2393 jiffies_delta_to_clock_t(timer_expires - jiffies),
463c84b9 2394 icsk->icsk_retransmits,
a7cb5a49 2395 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
6687e988 2396 icsk->icsk_probes_out,
cf4c6bf8
IJ
2397 sock_i_ino(sk),
2398 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2399 jiffies_to_clock_t(icsk->icsk_rto),
2400 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2401 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2402 tp->snd_cwnd,
168a8f58
JC
2403 sk->sk_state == TCP_LISTEN ?
2404 (fastopenq ? fastopenq->max_qlen : 0) :
652586df 2405 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
1da177e4
LT
2406}
2407
cf533ea5 2408static void get_timewait4_sock(const struct inet_timewait_sock *tw,
652586df 2409 struct seq_file *f, int i)
1da177e4 2410{
23f33c2d 2411 __be32 dest, src;
1da177e4 2412 __u16 destp, srcp;
e2a1d3e4 2413 s32 delta = tw->tw_ttd - inet_tw_time_stamp();
1da177e4
LT
2414
2415 dest = tw->tw_daddr;
2416 src = tw->tw_rcv_saddr;
2417 destp = ntohs(tw->tw_dport);
2418 srcp = ntohs(tw->tw_sport);
2419
5e659e4c 2420 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2421 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
1da177e4 2422 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
a399a805 2423 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
652586df 2424 atomic_read(&tw->tw_refcnt), tw);
1da177e4
LT
2425}
2426
2427#define TMPSZ 150
2428
2429static int tcp4_seq_show(struct seq_file *seq, void *v)
2430{
5799de0b 2431 struct tcp_iter_state *st;
05dbc7b5 2432 struct sock *sk = v;
1da177e4 2433
652586df 2434 seq_setwidth(seq, TMPSZ - 1);
1da177e4 2435 if (v == SEQ_START_TOKEN) {
652586df 2436 seq_puts(seq, " sl local_address rem_address st tx_queue "
1da177e4
LT
2437 "rx_queue tr tm->when retrnsmt uid timeout "
2438 "inode");
2439 goto out;
2440 }
2441 st = seq->private;
2442
2443 switch (st->state) {
2444 case TCP_SEQ_STATE_LISTENING:
2445 case TCP_SEQ_STATE_ESTABLISHED:
05dbc7b5 2446 if (sk->sk_state == TCP_TIME_WAIT)
652586df 2447 get_timewait4_sock(v, seq, st->num);
05dbc7b5 2448 else
652586df 2449 get_tcp4_sock(v, seq, st->num);
1da177e4
LT
2450 break;
2451 case TCP_SEQ_STATE_OPENREQ:
652586df 2452 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
1da177e4
LT
2453 break;
2454 }
1da177e4 2455out:
652586df 2456 seq_pad(seq, '\n');
1da177e4
LT
2457 return 0;
2458}
2459
73cb88ec
AV
2460static const struct file_operations tcp_afinfo_seq_fops = {
2461 .owner = THIS_MODULE,
2462 .open = tcp_seq_open,
2463 .read = seq_read,
2464 .llseek = seq_lseek,
2465 .release = seq_release_net
2466};
2467
1da177e4 2468static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2469 .name = "tcp",
2470 .family = AF_INET,
73cb88ec 2471 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2472 .seq_ops = {
2473 .show = tcp4_seq_show,
2474 },
1da177e4
LT
2475};
2476
2c8c1e72 2477static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2478{
2479 return tcp_proc_register(net, &tcp4_seq_afinfo);
2480}
2481
2c8c1e72 2482static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2483{
2484 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2485}
2486
2487static struct pernet_operations tcp4_net_ops = {
2488 .init = tcp4_proc_init_net,
2489 .exit = tcp4_proc_exit_net,
2490};
2491
1da177e4
LT
2492int __init tcp4_proc_init(void)
2493{
757764f6 2494 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2495}
2496
2497void tcp4_proc_exit(void)
2498{
757764f6 2499 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2500}
2501#endif /* CONFIG_PROC_FS */
2502
2503struct proto tcp_prot = {
2504 .name = "TCP",
2505 .owner = THIS_MODULE,
2506 .close = tcp_close,
2507 .connect = tcp_v4_connect,
2508 .disconnect = tcp_disconnect,
463c84b9 2509 .accept = inet_csk_accept,
1da177e4
LT
2510 .ioctl = tcp_ioctl,
2511 .init = tcp_v4_init_sock,
2512 .destroy = tcp_v4_destroy_sock,
2513 .shutdown = tcp_shutdown,
2514 .setsockopt = tcp_setsockopt,
2515 .getsockopt = tcp_getsockopt,
1da177e4 2516 .recvmsg = tcp_recvmsg,
7ba42910
CG
2517 .sendmsg = tcp_sendmsg,
2518 .sendpage = tcp_sendpage,
1da177e4 2519 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 2520 .release_cb = tcp_release_cb,
563d34d0 2521 .mtu_reduced = tcp_v4_mtu_reduced,
ab1e0a13
ACM
2522 .hash = inet_hash,
2523 .unhash = inet_unhash,
2524 .get_port = inet_csk_get_port,
1da177e4 2525 .enter_memory_pressure = tcp_enter_memory_pressure,
c9bee3b7 2526 .stream_memory_free = tcp_stream_memory_free,
1da177e4 2527 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2528 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2529 .memory_allocated = &tcp_memory_allocated,
2530 .memory_pressure = &tcp_memory_pressure,
a4fe34bf 2531 .sysctl_mem = sysctl_tcp_mem,
1da177e4
LT
2532 .sysctl_wmem = sysctl_tcp_wmem,
2533 .sysctl_rmem = sysctl_tcp_rmem,
2534 .max_header = MAX_TCP_HEADER,
2535 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2536 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2537 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2538 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2539 .h.hashinfo = &tcp_hashinfo,
7ba42910 2540 .no_autobind = true,
543d9cfe
ACM
2541#ifdef CONFIG_COMPAT
2542 .compat_setsockopt = compat_tcp_setsockopt,
2543 .compat_getsockopt = compat_tcp_getsockopt,
2544#endif
c255a458 2545#ifdef CONFIG_MEMCG_KMEM
d1a4c0b3
GC
2546 .init_cgroup = tcp_init_cgroup,
2547 .destroy_cgroup = tcp_destroy_cgroup,
2548 .proto_cgroup = tcp_proto_cgroup,
2549#endif
1da177e4 2550};
4bc2f18b 2551EXPORT_SYMBOL(tcp_prot);
1da177e4 2552
046ee902
DL
2553static int __net_init tcp_sk_init(struct net *net)
2554{
5d134f1c 2555 net->ipv4.sysctl_tcp_ecn = 2;
be9f4a44 2556 return 0;
046ee902
DL
2557}
2558
2559static void __net_exit tcp_sk_exit(struct net *net)
2560{
b099ce26
EB
2561}
2562
2563static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2564{
2565 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2566}
2567
2568static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2569 .init = tcp_sk_init,
2570 .exit = tcp_sk_exit,
2571 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2572};
2573
9b0f976f 2574void __init tcp_v4_init(void)
1da177e4 2575{
5caea4ea 2576 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2577 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2578 panic("Failed to create the TCP control socket.\n");
1da177e4 2579}