]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - net/ipv4/tcp_ipv4.c
tcp: use tcp_v4_send_synack on first SYN-ACK
[mirror_ubuntu-hirsute-kernel.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
afd46503 53#define pr_fmt(fmt) "TCP: " fmt
1da177e4 54
eb4dea58 55#include <linux/bottom_half.h>
1da177e4
LT
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
5a0e3ad6 64#include <linux/slab.h>
1da177e4 65
457c4cbc 66#include <net/net_namespace.h>
1da177e4 67#include <net/icmp.h>
304a1618 68#include <net/inet_hashtables.h>
1da177e4 69#include <net/tcp.h>
20380731 70#include <net/transp_v6.h>
1da177e4
LT
71#include <net/ipv6.h>
72#include <net/inet_common.h>
6d6ee43e 73#include <net/timewait_sock.h>
1da177e4 74#include <net/xfrm.h>
1a2449a8 75#include <net/netdma.h>
6e5714ea 76#include <net/secure_seq.h>
d1a4c0b3 77#include <net/tcp_memcontrol.h>
076bb0c8 78#include <net/busy_poll.h>
1da177e4
LT
79
80#include <linux/inet.h>
81#include <linux/ipv6.h>
82#include <linux/stddef.h>
83#include <linux/proc_fs.h>
84#include <linux/seq_file.h>
85
cfb6eeb4
YH
86#include <linux/crypto.h>
87#include <linux/scatterlist.h>
88
ab32ea5d
BH
89int sysctl_tcp_tw_reuse __read_mostly;
90int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 91EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 92
1da177e4 93
cfb6eeb4 94#ifdef CONFIG_TCP_MD5SIG
a915da9b 95static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 96 __be32 daddr, __be32 saddr, const struct tcphdr *th);
cfb6eeb4
YH
97#endif
98
5caea4ea 99struct inet_hashinfo tcp_hashinfo;
4bc2f18b 100EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 101
cf533ea5 102static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 103{
eddc9ec5
ACM
104 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
105 ip_hdr(skb)->saddr,
aa8223c7
ACM
106 tcp_hdr(skb)->dest,
107 tcp_hdr(skb)->source);
1da177e4
LT
108}
109
6d6ee43e
ACM
110int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
111{
112 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
113 struct tcp_sock *tp = tcp_sk(sk);
114
115 /* With PAWS, it is safe from the viewpoint
116 of data integrity. Even without PAWS it is safe provided sequence
117 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
118
119 Actually, the idea is close to VJ's one, only timestamp cache is
120 held not per host, but per port pair and TW bucket is used as state
121 holder.
122
123 If TW bucket has been already destroyed we fall back to VJ's scheme
124 and use initial timestamp retrieved from peer table.
125 */
126 if (tcptw->tw_ts_recent_stamp &&
127 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 128 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
129 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
130 if (tp->write_seq == 0)
131 tp->write_seq = 1;
132 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
133 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
134 sock_hold(sktw);
135 return 1;
136 }
137
138 return 0;
139}
6d6ee43e
ACM
140EXPORT_SYMBOL_GPL(tcp_twsk_unique);
141
1da177e4
LT
142/* This will initiate an outgoing connection. */
143int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
144{
2d7192d6 145 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
146 struct inet_sock *inet = inet_sk(sk);
147 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 148 __be16 orig_sport, orig_dport;
bada8adc 149 __be32 daddr, nexthop;
da905bd1 150 struct flowi4 *fl4;
2d7192d6 151 struct rtable *rt;
1da177e4 152 int err;
f6d8bd05 153 struct ip_options_rcu *inet_opt;
1da177e4
LT
154
155 if (addr_len < sizeof(struct sockaddr_in))
156 return -EINVAL;
157
158 if (usin->sin_family != AF_INET)
159 return -EAFNOSUPPORT;
160
161 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
162 inet_opt = rcu_dereference_protected(inet->inet_opt,
163 sock_owned_by_user(sk));
164 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
165 if (!daddr)
166 return -EINVAL;
f6d8bd05 167 nexthop = inet_opt->opt.faddr;
1da177e4
LT
168 }
169
dca8b089
DM
170 orig_sport = inet->inet_sport;
171 orig_dport = usin->sin_port;
da905bd1
DM
172 fl4 = &inet->cork.fl.u.ip4;
173 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
174 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
175 IPPROTO_TCP,
0e0d44ab 176 orig_sport, orig_dport, sk);
b23dd4fe
DM
177 if (IS_ERR(rt)) {
178 err = PTR_ERR(rt);
179 if (err == -ENETUNREACH)
f1d8cba6 180 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 181 return err;
584bdf8c 182 }
1da177e4
LT
183
184 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
185 ip_rt_put(rt);
186 return -ENETUNREACH;
187 }
188
f6d8bd05 189 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 190 daddr = fl4->daddr;
1da177e4 191
c720c7e8 192 if (!inet->inet_saddr)
da905bd1 193 inet->inet_saddr = fl4->saddr;
c720c7e8 194 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 195
c720c7e8 196 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
197 /* Reset inherited state */
198 tp->rx_opt.ts_recent = 0;
199 tp->rx_opt.ts_recent_stamp = 0;
ee995283
PE
200 if (likely(!tp->repair))
201 tp->write_seq = 0;
1da177e4
LT
202 }
203
295ff7ed 204 if (tcp_death_row.sysctl_tw_recycle &&
81166dd6
DM
205 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
206 tcp_fetch_timewait_stamp(sk, &rt->dst);
1da177e4 207
c720c7e8
ED
208 inet->inet_dport = usin->sin_port;
209 inet->inet_daddr = daddr;
1da177e4 210
d83d8461 211 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
212 if (inet_opt)
213 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 214
bee7ca9e 215 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
216
217 /* Socket identity is still unknown (sport may be zero).
218 * However we set state to SYN-SENT and not releasing socket
219 * lock select source port, enter ourselves into the hash tables and
220 * complete initialization after this.
221 */
222 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 223 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
224 if (err)
225 goto failure;
226
da905bd1 227 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
228 inet->inet_sport, inet->inet_dport, sk);
229 if (IS_ERR(rt)) {
230 err = PTR_ERR(rt);
231 rt = NULL;
1da177e4 232 goto failure;
b23dd4fe 233 }
1da177e4 234 /* OK, now commit destination to socket. */
bcd76111 235 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 236 sk_setup_caps(sk, &rt->dst);
1da177e4 237
ee995283 238 if (!tp->write_seq && likely(!tp->repair))
c720c7e8
ED
239 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
240 inet->inet_daddr,
241 inet->inet_sport,
1da177e4
LT
242 usin->sin_port);
243
c720c7e8 244 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4 245
2b916477 246 err = tcp_connect(sk);
ee995283 247
1da177e4
LT
248 rt = NULL;
249 if (err)
250 goto failure;
251
252 return 0;
253
254failure:
7174259e
ACM
255 /*
256 * This unhashes the socket and releases the local port,
257 * if necessary.
258 */
1da177e4
LT
259 tcp_set_state(sk, TCP_CLOSE);
260 ip_rt_put(rt);
261 sk->sk_route_caps = 0;
c720c7e8 262 inet->inet_dport = 0;
1da177e4
LT
263 return err;
264}
4bc2f18b 265EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 266
1da177e4 267/*
563d34d0
ED
268 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
269 * It can be called through tcp_release_cb() if socket was owned by user
270 * at the time tcp_v4_err() was called to handle ICMP message.
1da177e4 271 */
563d34d0 272static void tcp_v4_mtu_reduced(struct sock *sk)
1da177e4
LT
273{
274 struct dst_entry *dst;
275 struct inet_sock *inet = inet_sk(sk);
563d34d0 276 u32 mtu = tcp_sk(sk)->mtu_info;
1da177e4 277
80d0a69f
DM
278 dst = inet_csk_update_pmtu(sk, mtu);
279 if (!dst)
1da177e4
LT
280 return;
281
1da177e4
LT
282 /* Something is about to be wrong... Remember soft error
283 * for the case, if this connection will not able to recover.
284 */
285 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
286 sk->sk_err_soft = EMSGSIZE;
287
288 mtu = dst_mtu(dst);
289
290 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
482fc609 291 ip_sk_accept_pmtu(sk) &&
d83d8461 292 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
293 tcp_sync_mss(sk, mtu);
294
295 /* Resend the TCP packet because it's
296 * clear that the old packet has been
297 * dropped. This is the new "fast" path mtu
298 * discovery.
299 */
300 tcp_simple_retransmit(sk);
301 } /* else let the usual retransmit timer handle it */
302}
303
55be7a9c
DM
304static void do_redirect(struct sk_buff *skb, struct sock *sk)
305{
306 struct dst_entry *dst = __sk_dst_check(sk, 0);
307
1ed5c48f 308 if (dst)
6700c270 309 dst->ops->redirect(dst, sk, skb);
55be7a9c
DM
310}
311
1da177e4
LT
312/*
313 * This routine is called by the ICMP module when it gets some
314 * sort of error condition. If err < 0 then the socket should
315 * be closed and the error returned to the user. If err > 0
316 * it's just the icmp type << 8 | icmp code. After adjustment
317 * header points to the first 8 bytes of the tcp header. We need
318 * to find the appropriate port.
319 *
320 * The locking strategy used here is very "optimistic". When
321 * someone else accesses the socket the ICMP is just dropped
322 * and for some paths there is no check at all.
323 * A more general error queue to queue errors for later handling
324 * is probably better.
325 *
326 */
327
4d1a2d9e 328void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 329{
b71d1d42 330 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 331 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 332 struct inet_connection_sock *icsk;
1da177e4
LT
333 struct tcp_sock *tp;
334 struct inet_sock *inet;
4d1a2d9e
DL
335 const int type = icmp_hdr(icmp_skb)->type;
336 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 337 struct sock *sk;
f1ecd5d9 338 struct sk_buff *skb;
168a8f58 339 struct request_sock *req;
1da177e4 340 __u32 seq;
f1ecd5d9 341 __u32 remaining;
1da177e4 342 int err;
4d1a2d9e 343 struct net *net = dev_net(icmp_skb->dev);
1da177e4 344
4d1a2d9e 345 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 346 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
347 return;
348 }
349
fd54d716 350 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 351 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 352 if (!sk) {
dcfc23ca 353 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
354 return;
355 }
356 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 357 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
358 return;
359 }
360
361 bh_lock_sock(sk);
362 /* If too many ICMPs get dropped on busy
363 * servers this needs to be solved differently.
563d34d0
ED
364 * We do take care of PMTU discovery (RFC1191) special case :
365 * we can receive locally generated ICMP messages while socket is held.
1da177e4 366 */
b74aa930
ED
367 if (sock_owned_by_user(sk)) {
368 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
369 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
370 }
1da177e4
LT
371 if (sk->sk_state == TCP_CLOSE)
372 goto out;
373
97e3ecd1 374 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
375 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
376 goto out;
377 }
378
f1ecd5d9 379 icsk = inet_csk(sk);
1da177e4 380 tp = tcp_sk(sk);
168a8f58 381 req = tp->fastopen_rsk;
1da177e4
LT
382 seq = ntohl(th->seq);
383 if (sk->sk_state != TCP_LISTEN &&
168a8f58
JC
384 !between(seq, tp->snd_una, tp->snd_nxt) &&
385 (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
386 /* For a Fast Open socket, allow seq to be snt_isn. */
de0744af 387 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
388 goto out;
389 }
390
391 switch (type) {
55be7a9c
DM
392 case ICMP_REDIRECT:
393 do_redirect(icmp_skb, sk);
394 goto out;
1da177e4
LT
395 case ICMP_SOURCE_QUENCH:
396 /* Just silently ignore these. */
397 goto out;
398 case ICMP_PARAMETERPROB:
399 err = EPROTO;
400 break;
401 case ICMP_DEST_UNREACH:
402 if (code > NR_ICMP_UNREACH)
403 goto out;
404
405 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
0d4f0608
ED
406 /* We are not interested in TCP_LISTEN and open_requests
407 * (SYN-ACKs send out by Linux are always <576bytes so
408 * they should go through unfragmented).
409 */
410 if (sk->sk_state == TCP_LISTEN)
411 goto out;
412
563d34d0 413 tp->mtu_info = info;
144d56e9 414 if (!sock_owned_by_user(sk)) {
563d34d0 415 tcp_v4_mtu_reduced(sk);
144d56e9
ED
416 } else {
417 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
418 sock_hold(sk);
419 }
1da177e4
LT
420 goto out;
421 }
422
423 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
424 /* check if icmp_skb allows revert of backoff
425 * (see draft-zimmermann-tcp-lcd) */
426 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
427 break;
428 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
429 !icsk->icsk_backoff)
430 break;
431
168a8f58
JC
432 /* XXX (TFO) - revisit the following logic for TFO */
433
8f49c270
DM
434 if (sock_owned_by_user(sk))
435 break;
436
f1ecd5d9 437 icsk->icsk_backoff--;
740b0f18 438 inet_csk(sk)->icsk_rto = (tp->srtt_us ? __tcp_set_rto(tp) :
9ad7c049 439 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
440 tcp_bound_rto(sk);
441
442 skb = tcp_write_queue_head(sk);
443 BUG_ON(!skb);
444
445 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
446 tcp_time_stamp - TCP_SKB_CB(skb)->when);
447
448 if (remaining) {
449 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
450 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
451 } else {
452 /* RTO revert clocked out retransmission.
453 * Will retransmit now */
454 tcp_retransmit_timer(sk);
455 }
456
1da177e4
LT
457 break;
458 case ICMP_TIME_EXCEEDED:
459 err = EHOSTUNREACH;
460 break;
461 default:
462 goto out;
463 }
464
168a8f58
JC
465 /* XXX (TFO) - if it's a TFO socket and has been accepted, rather
466 * than following the TCP_SYN_RECV case and closing the socket,
467 * we ignore the ICMP error and keep trying like a fully established
468 * socket. Is this the right thing to do?
469 */
470 if (req && req->sk == NULL)
471 goto out;
472
1da177e4 473 switch (sk->sk_state) {
60236fdd 474 struct request_sock *req, **prev;
1da177e4
LT
475 case TCP_LISTEN:
476 if (sock_owned_by_user(sk))
477 goto out;
478
463c84b9
ACM
479 req = inet_csk_search_req(sk, &prev, th->dest,
480 iph->daddr, iph->saddr);
1da177e4
LT
481 if (!req)
482 goto out;
483
484 /* ICMPs are not backlogged, hence we cannot get
485 an established socket here.
486 */
547b792c 487 WARN_ON(req->sk);
1da177e4 488
2e6599cb 489 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 490 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
491 goto out;
492 }
493
494 /*
495 * Still in SYN_RECV, just remove it silently.
496 * There is no good way to pass the error to the newly
497 * created socket, and POSIX does not want network
498 * errors returned from accept().
499 */
463c84b9 500 inet_csk_reqsk_queue_drop(sk, req, prev);
848bf15f 501 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
502 goto out;
503
504 case TCP_SYN_SENT:
505 case TCP_SYN_RECV: /* Cannot happen.
168a8f58
JC
506 It can f.e. if SYNs crossed,
507 or Fast Open.
1da177e4
LT
508 */
509 if (!sock_owned_by_user(sk)) {
1da177e4
LT
510 sk->sk_err = err;
511
512 sk->sk_error_report(sk);
513
514 tcp_done(sk);
515 } else {
516 sk->sk_err_soft = err;
517 }
518 goto out;
519 }
520
521 /* If we've already connected we will keep trying
522 * until we time out, or the user gives up.
523 *
524 * rfc1122 4.2.3.9 allows to consider as hard errors
525 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
526 * but it is obsoleted by pmtu discovery).
527 *
528 * Note, that in modern internet, where routing is unreliable
529 * and in each dark corner broken firewalls sit, sending random
530 * errors ordered by their masters even this two messages finally lose
531 * their original sense (even Linux sends invalid PORT_UNREACHs)
532 *
533 * Now we are in compliance with RFCs.
534 * --ANK (980905)
535 */
536
537 inet = inet_sk(sk);
538 if (!sock_owned_by_user(sk) && inet->recverr) {
539 sk->sk_err = err;
540 sk->sk_error_report(sk);
541 } else { /* Only an error on timeout */
542 sk->sk_err_soft = err;
543 }
544
545out:
546 bh_unlock_sock(sk);
547 sock_put(sk);
548}
549
28850dc7 550void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1da177e4 551{
aa8223c7 552 struct tcphdr *th = tcp_hdr(skb);
1da177e4 553
84fa7933 554 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 555 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 556 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 557 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 558 } else {
419f9f89 559 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 560 csum_partial(th,
1da177e4
LT
561 th->doff << 2,
562 skb->csum));
563 }
564}
565
419f9f89 566/* This routine computes an IPv4 TCP checksum. */
bb296246 567void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 568{
cf533ea5 569 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
570
571 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
572}
4bc2f18b 573EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 574
1da177e4
LT
575/*
576 * This routine will send an RST to the other tcp.
577 *
578 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
579 * for reset.
580 * Answer: if a packet caused RST, it is not for a socket
581 * existing in our system, if it is matched to a socket,
582 * it is just duplicate segment or bug in other side's TCP.
583 * So that we build reply only basing on parameters
584 * arrived with segment.
585 * Exception: precedence violation. We do not implement it in any case.
586 */
587
cfb6eeb4 588static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 589{
cf533ea5 590 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
591 struct {
592 struct tcphdr th;
593#ifdef CONFIG_TCP_MD5SIG
714e85be 594 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
595#endif
596 } rep;
1da177e4 597 struct ip_reply_arg arg;
cfb6eeb4
YH
598#ifdef CONFIG_TCP_MD5SIG
599 struct tcp_md5sig_key *key;
658ddaaf
SL
600 const __u8 *hash_location = NULL;
601 unsigned char newhash[16];
602 int genhash;
603 struct sock *sk1 = NULL;
cfb6eeb4 604#endif
a86b1e30 605 struct net *net;
1da177e4
LT
606
607 /* Never send a reset in response to a reset. */
608 if (th->rst)
609 return;
610
511c3f92 611 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
612 return;
613
614 /* Swap the send and the receive. */
cfb6eeb4
YH
615 memset(&rep, 0, sizeof(rep));
616 rep.th.dest = th->source;
617 rep.th.source = th->dest;
618 rep.th.doff = sizeof(struct tcphdr) / 4;
619 rep.th.rst = 1;
1da177e4
LT
620
621 if (th->ack) {
cfb6eeb4 622 rep.th.seq = th->ack_seq;
1da177e4 623 } else {
cfb6eeb4
YH
624 rep.th.ack = 1;
625 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
626 skb->len - (th->doff << 2));
1da177e4
LT
627 }
628
7174259e 629 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
630 arg.iov[0].iov_base = (unsigned char *)&rep;
631 arg.iov[0].iov_len = sizeof(rep.th);
632
633#ifdef CONFIG_TCP_MD5SIG
658ddaaf
SL
634 hash_location = tcp_parse_md5sig_option(th);
635 if (!sk && hash_location) {
636 /*
637 * active side is lost. Try to find listening socket through
638 * source port, and then find md5 key through listening socket.
639 * we are not loose security here:
640 * Incoming packet is checked with md5 hash with finding key,
641 * no RST generated if md5 hash doesn't match.
642 */
643 sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
da5e3630
TH
644 &tcp_hashinfo, ip_hdr(skb)->saddr,
645 th->source, ip_hdr(skb)->daddr,
658ddaaf
SL
646 ntohs(th->source), inet_iif(skb));
647 /* don't send rst if it can't find key */
648 if (!sk1)
649 return;
650 rcu_read_lock();
651 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
652 &ip_hdr(skb)->saddr, AF_INET);
653 if (!key)
654 goto release_sk1;
655
656 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
657 if (genhash || memcmp(hash_location, newhash, 16) != 0)
658 goto release_sk1;
659 } else {
660 key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
661 &ip_hdr(skb)->saddr,
662 AF_INET) : NULL;
663 }
664
cfb6eeb4
YH
665 if (key) {
666 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
667 (TCPOPT_NOP << 16) |
668 (TCPOPT_MD5SIG << 8) |
669 TCPOLEN_MD5SIG);
670 /* Update length and the length the header thinks exists */
671 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
672 rep.th.doff = arg.iov[0].iov_len / 4;
673
49a72dfb 674 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
675 key, ip_hdr(skb)->saddr,
676 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
677 }
678#endif
eddc9ec5
ACM
679 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
680 ip_hdr(skb)->saddr, /* XXX */
52cd5750 681 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 682 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 683 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
e2446eaa 684 /* When socket is gone, all binding information is lost.
4c675258
AK
685 * routing might fail in this case. No choice here, if we choose to force
686 * input interface, we will misroute in case of asymmetric route.
e2446eaa 687 */
4c675258
AK
688 if (sk)
689 arg.bound_dev_if = sk->sk_bound_dev_if;
1da177e4 690
adf30907 691 net = dev_net(skb_dst(skb)->dev);
66b13d99 692 arg.tos = ip_hdr(skb)->tos;
be9f4a44 693 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 694 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 695
63231bdd
PE
696 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
697 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
658ddaaf
SL
698
699#ifdef CONFIG_TCP_MD5SIG
700release_sk1:
701 if (sk1) {
702 rcu_read_unlock();
703 sock_put(sk1);
704 }
705#endif
1da177e4
LT
706}
707
708/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
709 outside socket context is ugly, certainly. What can I do?
710 */
711
9501f972 712static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
ee684b6f 713 u32 win, u32 tsval, u32 tsecr, int oif,
88ef4a5a 714 struct tcp_md5sig_key *key,
66b13d99 715 int reply_flags, u8 tos)
1da177e4 716{
cf533ea5 717 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
718 struct {
719 struct tcphdr th;
714e85be 720 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 721#ifdef CONFIG_TCP_MD5SIG
714e85be 722 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
723#endif
724 ];
1da177e4
LT
725 } rep;
726 struct ip_reply_arg arg;
adf30907 727 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
728
729 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 730 memset(&arg, 0, sizeof(arg));
1da177e4
LT
731
732 arg.iov[0].iov_base = (unsigned char *)&rep;
733 arg.iov[0].iov_len = sizeof(rep.th);
ee684b6f 734 if (tsecr) {
cfb6eeb4
YH
735 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
736 (TCPOPT_TIMESTAMP << 8) |
737 TCPOLEN_TIMESTAMP);
ee684b6f
AV
738 rep.opt[1] = htonl(tsval);
739 rep.opt[2] = htonl(tsecr);
cb48cfe8 740 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
741 }
742
743 /* Swap the send and the receive. */
744 rep.th.dest = th->source;
745 rep.th.source = th->dest;
746 rep.th.doff = arg.iov[0].iov_len / 4;
747 rep.th.seq = htonl(seq);
748 rep.th.ack_seq = htonl(ack);
749 rep.th.ack = 1;
750 rep.th.window = htons(win);
751
cfb6eeb4 752#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4 753 if (key) {
ee684b6f 754 int offset = (tsecr) ? 3 : 0;
cfb6eeb4
YH
755
756 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
757 (TCPOPT_NOP << 16) |
758 (TCPOPT_MD5SIG << 8) |
759 TCPOLEN_MD5SIG);
760 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
761 rep.th.doff = arg.iov[0].iov_len/4;
762
49a72dfb 763 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
764 key, ip_hdr(skb)->saddr,
765 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
766 }
767#endif
88ef4a5a 768 arg.flags = reply_flags;
eddc9ec5
ACM
769 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
770 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
771 arg.iov[0].iov_len, IPPROTO_TCP, 0);
772 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
773 if (oif)
774 arg.bound_dev_if = oif;
66b13d99 775 arg.tos = tos;
be9f4a44 776 ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
70e73416 777 ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
1da177e4 778
63231bdd 779 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
780}
781
782static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
783{
8feaf0c0 784 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 785 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 786
9501f972 787 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 788 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
ee684b6f 789 tcp_time_stamp + tcptw->tw_ts_offset,
9501f972
YH
790 tcptw->tw_ts_recent,
791 tw->tw_bound_dev_if,
88ef4a5a 792 tcp_twsk_md5_key(tcptw),
66b13d99
ED
793 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
794 tw->tw_tos
9501f972 795 );
1da177e4 796
8feaf0c0 797 inet_twsk_put(tw);
1da177e4
LT
798}
799
6edafaaf 800static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 801 struct request_sock *req)
1da177e4 802{
168a8f58
JC
803 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
804 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
805 */
806 tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
807 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
808 tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
ee684b6f 809 tcp_time_stamp,
9501f972
YH
810 req->ts_recent,
811 0,
a915da9b
ED
812 tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
813 AF_INET),
66b13d99
ED
814 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
815 ip_hdr(skb)->tos);
1da177e4
LT
816}
817
1da177e4 818/*
9bf1d83e 819 * Send a SYN-ACK after having received a SYN.
60236fdd 820 * This still operates on a request_sock only, not on a big
1da177e4
LT
821 * socket.
822 */
72659ecc
OP
823static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
824 struct request_sock *req,
843f4a55
YC
825 u16 queue_mapping,
826 struct tcp_fastopen_cookie *foc)
1da177e4 827{
2e6599cb 828 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 829 struct flowi4 fl4;
1da177e4 830 int err = -1;
d41db5af 831 struct sk_buff *skb;
1da177e4
LT
832
833 /* First, grab a route. */
ba3f7f04 834 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 835 return -1;
1da177e4 836
843f4a55 837 skb = tcp_make_synack(sk, dst, req, foc);
1da177e4
LT
838
839 if (skb) {
634fb979 840 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
1da177e4 841
fff32699 842 skb_set_queue_mapping(skb, queue_mapping);
634fb979
ED
843 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
844 ireq->ir_rmt_addr,
2e6599cb 845 ireq->opt);
b9df3cb8 846 err = net_xmit_eval(err);
016818d0
NC
847 if (!tcp_rsk(req)->snt_synack && !err)
848 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4
LT
849 }
850
1da177e4
LT
851 return err;
852}
853
1a2c6181 854static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
fd80eb94 855{
843f4a55 856 int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL);
e6c022a4 857
f19c29e3 858 if (!res) {
e6c022a4 859 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
f19c29e3
YC
860 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
861 }
e6c022a4 862 return res;
fd80eb94
DL
863}
864
1da177e4 865/*
60236fdd 866 * IPv4 request_sock destructor.
1da177e4 867 */
60236fdd 868static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 869{
a51482bd 870 kfree(inet_rsk(req)->opt);
1da177e4
LT
871}
872
946cedcc 873/*
a2a385d6 874 * Return true if a syncookie should be sent
946cedcc 875 */
a2a385d6 876bool tcp_syn_flood_action(struct sock *sk,
946cedcc
ED
877 const struct sk_buff *skb,
878 const char *proto)
1da177e4 879{
946cedcc 880 const char *msg = "Dropping request";
a2a385d6 881 bool want_cookie = false;
946cedcc
ED
882 struct listen_sock *lopt;
883
2a1d4bd4 884#ifdef CONFIG_SYN_COOKIES
946cedcc 885 if (sysctl_tcp_syncookies) {
2a1d4bd4 886 msg = "Sending cookies";
a2a385d6 887 want_cookie = true;
946cedcc
ED
888 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
889 } else
80e40daa 890#endif
946cedcc
ED
891 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
892
893 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
5ad37d5d 894 if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
946cedcc 895 lopt->synflood_warned = 1;
afd46503 896 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
946cedcc
ED
897 proto, ntohs(tcp_hdr(skb)->dest), msg);
898 }
899 return want_cookie;
2a1d4bd4 900}
946cedcc 901EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
902
903/*
60236fdd 904 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 905 */
5dff747b 906static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
1da177e4 907{
f6d8bd05
ED
908 const struct ip_options *opt = &(IPCB(skb)->opt);
909 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
910
911 if (opt && opt->optlen) {
f6d8bd05
ED
912 int opt_size = sizeof(*dopt) + opt->optlen;
913
1da177e4
LT
914 dopt = kmalloc(opt_size, GFP_ATOMIC);
915 if (dopt) {
f6d8bd05 916 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
917 kfree(dopt);
918 dopt = NULL;
919 }
920 }
921 }
922 return dopt;
923}
924
cfb6eeb4
YH
925#ifdef CONFIG_TCP_MD5SIG
926/*
927 * RFC2385 MD5 checksumming requires a mapping of
928 * IP address->MD5 Key.
929 * We need to maintain these in the sk structure.
930 */
931
932/* Find the Key structure for an address. */
a915da9b
ED
933struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
934 const union tcp_md5_addr *addr,
935 int family)
cfb6eeb4
YH
936{
937 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 938 struct tcp_md5sig_key *key;
a915da9b 939 unsigned int size = sizeof(struct in_addr);
a8afca03 940 struct tcp_md5sig_info *md5sig;
cfb6eeb4 941
a8afca03
ED
942 /* caller either holds rcu_read_lock() or socket lock */
943 md5sig = rcu_dereference_check(tp->md5sig_info,
b4fb05ea
ED
944 sock_owned_by_user(sk) ||
945 lockdep_is_held(&sk->sk_lock.slock));
a8afca03 946 if (!md5sig)
cfb6eeb4 947 return NULL;
a915da9b
ED
948#if IS_ENABLED(CONFIG_IPV6)
949 if (family == AF_INET6)
950 size = sizeof(struct in6_addr);
951#endif
b67bfe0d 952 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
a915da9b
ED
953 if (key->family != family)
954 continue;
955 if (!memcmp(&key->addr, addr, size))
956 return key;
cfb6eeb4
YH
957 }
958 return NULL;
959}
a915da9b 960EXPORT_SYMBOL(tcp_md5_do_lookup);
cfb6eeb4
YH
961
962struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
963 struct sock *addr_sk)
964{
a915da9b
ED
965 union tcp_md5_addr *addr;
966
967 addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
968 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4 969}
cfb6eeb4
YH
970EXPORT_SYMBOL(tcp_v4_md5_lookup);
971
f5b99bcd
AB
972static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
973 struct request_sock *req)
cfb6eeb4 974{
a915da9b
ED
975 union tcp_md5_addr *addr;
976
634fb979 977 addr = (union tcp_md5_addr *)&inet_rsk(req)->ir_rmt_addr;
a915da9b 978 return tcp_md5_do_lookup(sk, addr, AF_INET);
cfb6eeb4
YH
979}
980
981/* This can be called on a newly created socket, from other files */
a915da9b
ED
982int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
983 int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
cfb6eeb4
YH
984{
985 /* Add Key to the list */
b0a713e9 986 struct tcp_md5sig_key *key;
cfb6eeb4 987 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 988 struct tcp_md5sig_info *md5sig;
cfb6eeb4 989
c0353c7b 990 key = tcp_md5_do_lookup(sk, addr, family);
cfb6eeb4
YH
991 if (key) {
992 /* Pre-existing entry - just update that one. */
a915da9b 993 memcpy(key->key, newkey, newkeylen);
b0a713e9 994 key->keylen = newkeylen;
a915da9b
ED
995 return 0;
996 }
260fcbeb 997
a8afca03
ED
998 md5sig = rcu_dereference_protected(tp->md5sig_info,
999 sock_owned_by_user(sk));
a915da9b
ED
1000 if (!md5sig) {
1001 md5sig = kmalloc(sizeof(*md5sig), gfp);
1002 if (!md5sig)
cfb6eeb4 1003 return -ENOMEM;
cfb6eeb4 1004
a915da9b
ED
1005 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1006 INIT_HLIST_HEAD(&md5sig->head);
a8afca03 1007 rcu_assign_pointer(tp->md5sig_info, md5sig);
a915da9b 1008 }
cfb6eeb4 1009
5f3d9cb2 1010 key = sock_kmalloc(sk, sizeof(*key), gfp);
a915da9b
ED
1011 if (!key)
1012 return -ENOMEM;
71cea17e 1013 if (!tcp_alloc_md5sig_pool()) {
5f3d9cb2 1014 sock_kfree_s(sk, key, sizeof(*key));
a915da9b 1015 return -ENOMEM;
cfb6eeb4 1016 }
a915da9b
ED
1017
1018 memcpy(key->key, newkey, newkeylen);
1019 key->keylen = newkeylen;
1020 key->family = family;
1021 memcpy(&key->addr, addr,
1022 (family == AF_INET6) ? sizeof(struct in6_addr) :
1023 sizeof(struct in_addr));
1024 hlist_add_head_rcu(&key->node, &md5sig->head);
cfb6eeb4
YH
1025 return 0;
1026}
a915da9b 1027EXPORT_SYMBOL(tcp_md5_do_add);
cfb6eeb4 1028
a915da9b 1029int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
cfb6eeb4 1030{
a915da9b
ED
1031 struct tcp_md5sig_key *key;
1032
c0353c7b 1033 key = tcp_md5_do_lookup(sk, addr, family);
a915da9b
ED
1034 if (!key)
1035 return -ENOENT;
1036 hlist_del_rcu(&key->node);
5f3d9cb2 1037 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1038 kfree_rcu(key, rcu);
a915da9b 1039 return 0;
cfb6eeb4 1040}
a915da9b 1041EXPORT_SYMBOL(tcp_md5_do_del);
cfb6eeb4 1042
e0683e70 1043static void tcp_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1044{
1045 struct tcp_sock *tp = tcp_sk(sk);
a915da9b 1046 struct tcp_md5sig_key *key;
b67bfe0d 1047 struct hlist_node *n;
a8afca03 1048 struct tcp_md5sig_info *md5sig;
cfb6eeb4 1049
a8afca03
ED
1050 md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
1051
b67bfe0d 1052 hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
a915da9b 1053 hlist_del_rcu(&key->node);
5f3d9cb2 1054 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
a915da9b 1055 kfree_rcu(key, rcu);
cfb6eeb4
YH
1056 }
1057}
1058
7174259e
ACM
1059static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1060 int optlen)
cfb6eeb4
YH
1061{
1062 struct tcp_md5sig cmd;
1063 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
cfb6eeb4
YH
1064
1065 if (optlen < sizeof(cmd))
1066 return -EINVAL;
1067
7174259e 1068 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1069 return -EFAULT;
1070
1071 if (sin->sin_family != AF_INET)
1072 return -EINVAL;
1073
a8afca03 1074 if (!cmd.tcpm_key || !cmd.tcpm_keylen)
a915da9b
ED
1075 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1076 AF_INET);
cfb6eeb4
YH
1077
1078 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1079 return -EINVAL;
1080
a915da9b
ED
1081 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1082 AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1083 GFP_KERNEL);
cfb6eeb4
YH
1084}
1085
49a72dfb
AL
1086static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1087 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1088{
cfb6eeb4 1089 struct tcp4_pseudohdr *bp;
49a72dfb 1090 struct scatterlist sg;
cfb6eeb4
YH
1091
1092 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1093
1094 /*
49a72dfb 1095 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1096 * destination IP address, zero-padded protocol number, and
1097 * segment length)
1098 */
1099 bp->saddr = saddr;
1100 bp->daddr = daddr;
1101 bp->pad = 0;
076fb722 1102 bp->protocol = IPPROTO_TCP;
49a72dfb 1103 bp->len = cpu_to_be16(nbytes);
c7da57a1 1104
49a72dfb
AL
1105 sg_init_one(&sg, bp, sizeof(*bp));
1106 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1107}
1108
a915da9b 1109static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
318cf7aa 1110 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1111{
1112 struct tcp_md5sig_pool *hp;
1113 struct hash_desc *desc;
1114
1115 hp = tcp_get_md5sig_pool();
1116 if (!hp)
1117 goto clear_hash_noput;
1118 desc = &hp->md5_desc;
1119
1120 if (crypto_hash_init(desc))
1121 goto clear_hash;
1122 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1123 goto clear_hash;
1124 if (tcp_md5_hash_header(hp, th))
1125 goto clear_hash;
1126 if (tcp_md5_hash_key(hp, key))
1127 goto clear_hash;
1128 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1129 goto clear_hash;
1130
cfb6eeb4 1131 tcp_put_md5sig_pool();
cfb6eeb4 1132 return 0;
49a72dfb 1133
cfb6eeb4
YH
1134clear_hash:
1135 tcp_put_md5sig_pool();
1136clear_hash_noput:
1137 memset(md5_hash, 0, 16);
49a72dfb 1138 return 1;
cfb6eeb4
YH
1139}
1140
49a72dfb 1141int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1142 const struct sock *sk, const struct request_sock *req,
1143 const struct sk_buff *skb)
cfb6eeb4 1144{
49a72dfb
AL
1145 struct tcp_md5sig_pool *hp;
1146 struct hash_desc *desc;
318cf7aa 1147 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1148 __be32 saddr, daddr;
1149
1150 if (sk) {
c720c7e8
ED
1151 saddr = inet_sk(sk)->inet_saddr;
1152 daddr = inet_sk(sk)->inet_daddr;
49a72dfb 1153 } else if (req) {
634fb979
ED
1154 saddr = inet_rsk(req)->ir_loc_addr;
1155 daddr = inet_rsk(req)->ir_rmt_addr;
cfb6eeb4 1156 } else {
49a72dfb
AL
1157 const struct iphdr *iph = ip_hdr(skb);
1158 saddr = iph->saddr;
1159 daddr = iph->daddr;
cfb6eeb4 1160 }
49a72dfb
AL
1161
1162 hp = tcp_get_md5sig_pool();
1163 if (!hp)
1164 goto clear_hash_noput;
1165 desc = &hp->md5_desc;
1166
1167 if (crypto_hash_init(desc))
1168 goto clear_hash;
1169
1170 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1171 goto clear_hash;
1172 if (tcp_md5_hash_header(hp, th))
1173 goto clear_hash;
1174 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1175 goto clear_hash;
1176 if (tcp_md5_hash_key(hp, key))
1177 goto clear_hash;
1178 if (crypto_hash_final(desc, md5_hash))
1179 goto clear_hash;
1180
1181 tcp_put_md5sig_pool();
1182 return 0;
1183
1184clear_hash:
1185 tcp_put_md5sig_pool();
1186clear_hash_noput:
1187 memset(md5_hash, 0, 16);
1188 return 1;
cfb6eeb4 1189}
49a72dfb 1190EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1191
a2a385d6 1192static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1193{
1194 /*
1195 * This gets called for each TCP segment that arrives
1196 * so we want to be efficient.
1197 * We have 3 drop cases:
1198 * o No MD5 hash and one expected.
1199 * o MD5 hash and we're not expecting one.
1200 * o MD5 hash and its wrong.
1201 */
cf533ea5 1202 const __u8 *hash_location = NULL;
cfb6eeb4 1203 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1204 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1205 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1206 int genhash;
cfb6eeb4
YH
1207 unsigned char newhash[16];
1208
a915da9b
ED
1209 hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1210 AF_INET);
7d5d5525 1211 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1212
cfb6eeb4
YH
1213 /* We've parsed the options - do we have a hash? */
1214 if (!hash_expected && !hash_location)
a2a385d6 1215 return false;
cfb6eeb4
YH
1216
1217 if (hash_expected && !hash_location) {
785957d3 1218 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
a2a385d6 1219 return true;
cfb6eeb4
YH
1220 }
1221
1222 if (!hash_expected && hash_location) {
785957d3 1223 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
a2a385d6 1224 return true;
cfb6eeb4
YH
1225 }
1226
1227 /* Okay, so this is hash_expected and hash_location -
1228 * so we need to calculate the checksum.
1229 */
49a72dfb
AL
1230 genhash = tcp_v4_md5_hash_skb(newhash,
1231 hash_expected,
1232 NULL, NULL, skb);
cfb6eeb4
YH
1233
1234 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
e87cc472
JP
1235 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1236 &iph->saddr, ntohs(th->source),
1237 &iph->daddr, ntohs(th->dest),
1238 genhash ? " tcp_v4_calc_md5_hash failed"
1239 : "");
a2a385d6 1240 return true;
cfb6eeb4 1241 }
a2a385d6 1242 return false;
cfb6eeb4
YH
1243}
1244
1245#endif
1246
72a3effa 1247struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1248 .family = PF_INET,
2e6599cb 1249 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1250 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1251 .send_ack = tcp_v4_reqsk_send_ack,
1252 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1253 .send_reset = tcp_v4_send_reset,
72659ecc 1254 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1255};
1256
cfb6eeb4 1257#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1258static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1259 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1260 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1261};
b6332e6c 1262#endif
cfb6eeb4 1263
1da177e4
LT
1264int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1265{
1266 struct tcp_options_received tmp_opt;
60236fdd 1267 struct request_sock *req;
e6b4d113 1268 struct inet_request_sock *ireq;
4957faad 1269 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1270 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1271 __be32 saddr = ip_hdr(skb)->saddr;
1272 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1273 __u32 isn = TCP_SKB_CB(skb)->when;
843f4a55 1274 bool want_cookie = false, fastopen;
168a8f58
JC
1275 struct flowi4 fl4;
1276 struct tcp_fastopen_cookie foc = { .len = -1 };
843f4a55 1277 int err;
1da177e4
LT
1278
1279 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1280 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1281 goto drop;
1282
1283 /* TW buckets are converted to open requests without
1284 * limitations, they conserve resources and peer is
1285 * evidently real one.
1286 */
5ad37d5d
HFS
1287 if ((sysctl_tcp_syncookies == 2 ||
1288 inet_csk_reqsk_queue_is_full(sk)) && !isn) {
946cedcc
ED
1289 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1290 if (!want_cookie)
1291 goto drop;
1da177e4
LT
1292 }
1293
1294 /* Accept backlog is full. If we have already queued enough
1295 * of warm entries in syn queue, drop request. It is better than
1296 * clogging syn queue with openreqs with exponentially increasing
1297 * timeout.
1298 */
2aeef18d
NS
1299 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
1300 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1da177e4 1301 goto drop;
2aeef18d 1302 }
1da177e4 1303
ce4a7d0d 1304 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1305 if (!req)
1306 goto drop;
1307
cfb6eeb4
YH
1308#ifdef CONFIG_TCP_MD5SIG
1309 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1310#endif
1311
1da177e4 1312 tcp_clear_options(&tmp_opt);
bee7ca9e 1313 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1314 tmp_opt.user_mss = tp->rx_opt.user_mss;
1a2c6181 1315 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1da177e4 1316
4dfc2817 1317 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1318 tcp_clear_options(&tmp_opt);
1da177e4 1319
1da177e4 1320 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1321 tcp_openreq_init(req, &tmp_opt, skb);
1322
bb5b7c11 1323 ireq = inet_rsk(req);
634fb979
ED
1324 ireq->ir_loc_addr = daddr;
1325 ireq->ir_rmt_addr = saddr;
bb5b7c11 1326 ireq->no_srccheck = inet_sk(sk)->transparent;
5dff747b 1327 ireq->opt = tcp_v4_save_options(skb);
bb5b7c11 1328
284904aa 1329 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1330 goto drop_and_free;
284904aa 1331
172d69e6 1332 if (!want_cookie || tmp_opt.tstamp_ok)
5d134f1c 1333 TCP_ECN_create_request(req, skb, sock_net(sk));
1da177e4
LT
1334
1335 if (want_cookie) {
1da177e4 1336 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1337 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4 1338 } else if (!isn) {
1da177e4
LT
1339 /* VJ's idea. We save last timestamp seen
1340 * from the destination in peer table, when entering
1341 * state TIME-WAIT, and check against it before
1342 * accepting new connection request.
1343 *
1344 * If "isn" is not zero, this request hit alive
1345 * timewait bucket, so that all the necessary checks
1346 * are made in the function processing timewait state.
1347 */
1348 if (tmp_opt.saw_tstamp &&
295ff7ed 1349 tcp_death_row.sysctl_tw_recycle &&
ba3f7f04 1350 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
81166dd6
DM
1351 fl4.daddr == saddr) {
1352 if (!tcp_peer_is_proven(req, dst, true)) {
de0744af 1353 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1354 goto drop_and_release;
1da177e4
LT
1355 }
1356 }
1357 /* Kill the following clause, if you dislike this way. */
1358 else if (!sysctl_tcp_syncookies &&
463c84b9 1359 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4 1360 (sysctl_max_syn_backlog >> 2)) &&
81166dd6 1361 !tcp_peer_is_proven(req, dst, false)) {
1da177e4
LT
1362 /* Without syncookies last quarter of
1363 * backlog is filled with destinations,
1364 * proven to be alive.
1365 * It means that we continue to communicate
1366 * to destinations, already remembered
1367 * to the moment of synflood.
1368 */
afd46503 1369 LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
673d57e7 1370 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1371 goto drop_and_release;
1da177e4
LT
1372 }
1373
a94f723d 1374 isn = tcp_v4_init_sequence(skb);
1da177e4 1375 }
843f4a55 1376 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
168a8f58
JC
1377 goto drop_and_free;
1378
843f4a55
YC
1379 tcp_rsk(req)->snt_isn = isn;
1380 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1381 tcp_openreq_init_rwin(req, sk, dst);
1382 fastopen = !want_cookie &&
1383 tcp_try_fastopen(sk, skb, req, &foc, dst);
1384 err = tcp_v4_send_synack(sk, dst, req,
1385 skb_get_queue_mapping(skb), &foc);
1386 if (!fastopen) {
168a8f58
JC
1387 if (err || want_cookie)
1388 goto drop_and_free;
1389
016818d0 1390 tcp_rsk(req)->snt_synack = tcp_time_stamp;
168a8f58 1391 tcp_rsk(req)->listener = NULL;
168a8f58 1392 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
843f4a55 1393 }
1da177e4 1394
1da177e4
LT
1395 return 0;
1396
7cd04fa7
DL
1397drop_and_release:
1398 dst_release(dst);
1da177e4 1399drop_and_free:
60236fdd 1400 reqsk_free(req);
1da177e4 1401drop:
848bf15f 1402 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4
LT
1403 return 0;
1404}
4bc2f18b 1405EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1406
1407
1408/*
1409 * The three way handshake has completed - we got a valid synack -
1410 * now create the new socket.
1411 */
1412struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1413 struct request_sock *req,
1da177e4
LT
1414 struct dst_entry *dst)
1415{
2e6599cb 1416 struct inet_request_sock *ireq;
1da177e4
LT
1417 struct inet_sock *newinet;
1418 struct tcp_sock *newtp;
1419 struct sock *newsk;
cfb6eeb4
YH
1420#ifdef CONFIG_TCP_MD5SIG
1421 struct tcp_md5sig_key *key;
1422#endif
f6d8bd05 1423 struct ip_options_rcu *inet_opt;
1da177e4
LT
1424
1425 if (sk_acceptq_is_full(sk))
1426 goto exit_overflow;
1427
1da177e4
LT
1428 newsk = tcp_create_openreq_child(sk, req, skb);
1429 if (!newsk)
093d2823 1430 goto exit_nonewsk;
1da177e4 1431
bcd76111 1432 newsk->sk_gso_type = SKB_GSO_TCPV4;
fae6ef87 1433 inet_sk_rx_dst_set(newsk, skb);
1da177e4
LT
1434
1435 newtp = tcp_sk(newsk);
1436 newinet = inet_sk(newsk);
2e6599cb 1437 ireq = inet_rsk(req);
634fb979
ED
1438 newinet->inet_daddr = ireq->ir_rmt_addr;
1439 newinet->inet_rcv_saddr = ireq->ir_loc_addr;
1440 newinet->inet_saddr = ireq->ir_loc_addr;
f6d8bd05
ED
1441 inet_opt = ireq->opt;
1442 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1443 ireq->opt = NULL;
463c84b9 1444 newinet->mc_index = inet_iif(skb);
eddc9ec5 1445 newinet->mc_ttl = ip_hdr(skb)->ttl;
4c507d28 1446 newinet->rcv_tos = ip_hdr(skb)->tos;
d83d8461 1447 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1448 if (inet_opt)
1449 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1450 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1451
dfd25fff
ED
1452 if (!dst) {
1453 dst = inet_csk_route_child_sock(sk, newsk, req);
1454 if (!dst)
1455 goto put_and_exit;
1456 } else {
1457 /* syncookie case : see end of cookie_v4_check() */
1458 }
0e734419
DM
1459 sk_setup_caps(newsk, dst);
1460
1da177e4 1461 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1462 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1463 if (tcp_sk(sk)->rx_opt.user_mss &&
1464 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1465 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1466
1da177e4
LT
1467 tcp_initialize_rcv_mss(newsk);
1468
cfb6eeb4
YH
1469#ifdef CONFIG_TCP_MD5SIG
1470 /* Copy over the MD5 key from the original socket */
a915da9b
ED
1471 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1472 AF_INET);
c720c7e8 1473 if (key != NULL) {
cfb6eeb4
YH
1474 /*
1475 * We're using one, so create a matching key
1476 * on the newsk structure. If we fail to get
1477 * memory, then we end up not copying the key
1478 * across. Shucks.
1479 */
a915da9b
ED
1480 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1481 AF_INET, key->key, key->keylen, GFP_ATOMIC);
a465419b 1482 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1483 }
1484#endif
1485
0e734419
DM
1486 if (__inet_inherit_port(sk, newsk) < 0)
1487 goto put_and_exit;
9327f705 1488 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1489
1490 return newsk;
1491
1492exit_overflow:
de0744af 1493 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1494exit_nonewsk:
1495 dst_release(dst);
1da177e4 1496exit:
de0744af 1497 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1498 return NULL;
0e734419 1499put_and_exit:
e337e24d
CP
1500 inet_csk_prepare_forced_close(newsk);
1501 tcp_done(newsk);
0e734419 1502 goto exit;
1da177e4 1503}
4bc2f18b 1504EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1505
1506static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1507{
aa8223c7 1508 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1509 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1510 struct sock *nsk;
60236fdd 1511 struct request_sock **prev;
1da177e4 1512 /* Find possible connection requests. */
463c84b9
ACM
1513 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1514 iph->saddr, iph->daddr);
1da177e4 1515 if (req)
8336886f 1516 return tcp_check_req(sk, skb, req, prev, false);
1da177e4 1517
3b1e0a65 1518 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1519 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1520
1521 if (nsk) {
1522 if (nsk->sk_state != TCP_TIME_WAIT) {
1523 bh_lock_sock(nsk);
1524 return nsk;
1525 }
9469c7b4 1526 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1527 return NULL;
1528 }
1529
1530#ifdef CONFIG_SYN_COOKIES
af9b4738 1531 if (!th->syn)
1da177e4
LT
1532 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1533#endif
1534 return sk;
1535}
1536
1da177e4
LT
1537/* The socket must have it's spinlock held when we get
1538 * here.
1539 *
1540 * We have a potential double-lock case here, so even when
1541 * doing backlog processing we use the BH locking scheme.
1542 * This is because we cannot sleep with the original spinlock
1543 * held.
1544 */
1545int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1546{
cfb6eeb4
YH
1547 struct sock *rsk;
1548#ifdef CONFIG_TCP_MD5SIG
1549 /*
1550 * We really want to reject the packet as early as possible
1551 * if:
1552 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1553 * o There is an MD5 option and we're not expecting one
1554 */
7174259e 1555 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1556 goto discard;
1557#endif
1558
1da177e4 1559 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
404e0a8b
ED
1560 struct dst_entry *dst = sk->sk_rx_dst;
1561
bdeab991 1562 sock_rps_save_rxhash(sk, skb);
404e0a8b 1563 if (dst) {
505fbcf0
ED
1564 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1565 dst->ops->check(dst, 0) == NULL) {
92101b3b
DM
1566 dst_release(dst);
1567 sk->sk_rx_dst = NULL;
1568 }
1569 }
c995ae22 1570 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1da177e4
LT
1571 return 0;
1572 }
1573
ab6a5bb6 1574 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1575 goto csum_err;
1576
1577 if (sk->sk_state == TCP_LISTEN) {
1578 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1579 if (!nsk)
1580 goto discard;
1581
1582 if (nsk != sk) {
bdeab991 1583 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1584 if (tcp_child_process(sk, nsk, skb)) {
1585 rsk = nsk;
1da177e4 1586 goto reset;
cfb6eeb4 1587 }
1da177e4
LT
1588 return 0;
1589 }
ca55158c 1590 } else
bdeab991 1591 sock_rps_save_rxhash(sk, skb);
ca55158c 1592
aa8223c7 1593 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1594 rsk = sk;
1da177e4 1595 goto reset;
cfb6eeb4 1596 }
1da177e4
LT
1597 return 0;
1598
1599reset:
cfb6eeb4 1600 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1601discard:
1602 kfree_skb(skb);
1603 /* Be careful here. If this function gets more complicated and
1604 * gcc suffers from register pressure on the x86, sk (in %ebx)
1605 * might be destroyed here. This current version compiles correctly,
1606 * but you have been warned.
1607 */
1608 return 0;
1609
1610csum_err:
6a5dc9e5 1611 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
63231bdd 1612 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1613 goto discard;
1614}
4bc2f18b 1615EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4 1616
160eb5a6 1617void tcp_v4_early_demux(struct sk_buff *skb)
41063e9d 1618{
41063e9d
DM
1619 const struct iphdr *iph;
1620 const struct tcphdr *th;
1621 struct sock *sk;
41063e9d 1622
41063e9d 1623 if (skb->pkt_type != PACKET_HOST)
160eb5a6 1624 return;
41063e9d 1625
45f00f99 1626 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
160eb5a6 1627 return;
41063e9d
DM
1628
1629 iph = ip_hdr(skb);
45f00f99 1630 th = tcp_hdr(skb);
41063e9d
DM
1631
1632 if (th->doff < sizeof(struct tcphdr) / 4)
160eb5a6 1633 return;
41063e9d 1634
45f00f99 1635 sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
41063e9d 1636 iph->saddr, th->source,
7011d085 1637 iph->daddr, ntohs(th->dest),
9cb429d6 1638 skb->skb_iif);
41063e9d
DM
1639 if (sk) {
1640 skb->sk = sk;
1641 skb->destructor = sock_edemux;
1642 if (sk->sk_state != TCP_TIME_WAIT) {
1643 struct dst_entry *dst = sk->sk_rx_dst;
505fbcf0 1644
41063e9d
DM
1645 if (dst)
1646 dst = dst_check(dst, 0);
92101b3b 1647 if (dst &&
505fbcf0 1648 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
92101b3b 1649 skb_dst_set_noref(skb, dst);
41063e9d
DM
1650 }
1651 }
41063e9d
DM
1652}
1653
b2fb4f54
ED
1654/* Packet is added to VJ-style prequeue for processing in process
1655 * context, if a reader task is waiting. Apparently, this exciting
1656 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1657 * failed somewhere. Latency? Burstiness? Well, at least now we will
1658 * see, why it failed. 8)8) --ANK
1659 *
1660 */
1661bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1662{
1663 struct tcp_sock *tp = tcp_sk(sk);
1664
1665 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1666 return false;
1667
1668 if (skb->len <= tcp_hdrlen(skb) &&
1669 skb_queue_len(&tp->ucopy.prequeue) == 0)
1670 return false;
1671
58717686 1672 skb_dst_force(skb);
b2fb4f54
ED
1673 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1674 tp->ucopy.memory += skb->truesize;
1675 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1676 struct sk_buff *skb1;
1677
1678 BUG_ON(sock_owned_by_user(sk));
1679
1680 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1681 sk_backlog_rcv(sk, skb1);
1682 NET_INC_STATS_BH(sock_net(sk),
1683 LINUX_MIB_TCPPREQUEUEDROPPED);
1684 }
1685
1686 tp->ucopy.memory = 0;
1687 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1688 wake_up_interruptible_sync_poll(sk_sleep(sk),
1689 POLLIN | POLLRDNORM | POLLRDBAND);
1690 if (!inet_csk_ack_scheduled(sk))
1691 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1692 (3 * tcp_rto_min(sk)) / 4,
1693 TCP_RTO_MAX);
1694 }
1695 return true;
1696}
1697EXPORT_SYMBOL(tcp_prequeue);
1698
1da177e4
LT
1699/*
1700 * From tcp_input.c
1701 */
1702
1703int tcp_v4_rcv(struct sk_buff *skb)
1704{
eddc9ec5 1705 const struct iphdr *iph;
cf533ea5 1706 const struct tcphdr *th;
1da177e4
LT
1707 struct sock *sk;
1708 int ret;
a86b1e30 1709 struct net *net = dev_net(skb->dev);
1da177e4
LT
1710
1711 if (skb->pkt_type != PACKET_HOST)
1712 goto discard_it;
1713
1714 /* Count it even if it's bad */
63231bdd 1715 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1716
1717 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1718 goto discard_it;
1719
aa8223c7 1720 th = tcp_hdr(skb);
1da177e4
LT
1721
1722 if (th->doff < sizeof(struct tcphdr) / 4)
1723 goto bad_packet;
1724 if (!pskb_may_pull(skb, th->doff * 4))
1725 goto discard_it;
1726
1727 /* An explanation is required here, I think.
1728 * Packet length and doff are validated by header prediction,
caa20d9a 1729 * provided case of th->doff==0 is eliminated.
1da177e4 1730 * So, we defer the checks. */
ed70fcfc
TH
1731
1732 if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
6a5dc9e5 1733 goto csum_error;
1da177e4 1734
aa8223c7 1735 th = tcp_hdr(skb);
eddc9ec5 1736 iph = ip_hdr(skb);
1da177e4
LT
1737 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1738 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1739 skb->len - th->doff * 4);
1740 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1741 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1742 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1743 TCP_SKB_CB(skb)->sacked = 0;
1744
9a1f27c4 1745 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1746 if (!sk)
1747 goto no_tcp_socket;
1748
bb134d5d
ED
1749process:
1750 if (sk->sk_state == TCP_TIME_WAIT)
1751 goto do_time_wait;
1752
6cce09f8
ED
1753 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1754 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1755 goto discard_and_relse;
6cce09f8 1756 }
d218d111 1757
1da177e4
LT
1758 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1759 goto discard_and_relse;
b59c2701 1760 nf_reset(skb);
1da177e4 1761
fda9ef5d 1762 if (sk_filter(sk, skb))
1da177e4
LT
1763 goto discard_and_relse;
1764
8b80cda5 1765 sk_mark_napi_id(sk, skb);
1da177e4
LT
1766 skb->dev = NULL;
1767
c6366184 1768 bh_lock_sock_nested(sk);
1da177e4
LT
1769 ret = 0;
1770 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1771#ifdef CONFIG_NET_DMA
1772 struct tcp_sock *tp = tcp_sk(sk);
1773 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
a2bd1140 1774 tp->ucopy.dma_chan = net_dma_find_channel();
1a2449a8 1775 if (tp->ucopy.dma_chan)
1da177e4 1776 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1777 else
1778#endif
1779 {
1780 if (!tcp_prequeue(sk, skb))
ae8d7f88 1781 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1782 }
da882c1f
ED
1783 } else if (unlikely(sk_add_backlog(sk, skb,
1784 sk->sk_rcvbuf + sk->sk_sndbuf))) {
6b03a53a 1785 bh_unlock_sock(sk);
6cce09f8 1786 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1787 goto discard_and_relse;
1788 }
1da177e4
LT
1789 bh_unlock_sock(sk);
1790
1791 sock_put(sk);
1792
1793 return ret;
1794
1795no_tcp_socket:
1796 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1797 goto discard_it;
1798
1799 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
6a5dc9e5
ED
1800csum_error:
1801 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1da177e4 1802bad_packet:
63231bdd 1803 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1804 } else {
cfb6eeb4 1805 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1806 }
1807
1808discard_it:
1809 /* Discard frame. */
1810 kfree_skb(skb);
e905a9ed 1811 return 0;
1da177e4
LT
1812
1813discard_and_relse:
1814 sock_put(sk);
1815 goto discard_it;
1816
1817do_time_wait:
1818 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1819 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1820 goto discard_it;
1821 }
1822
6a5dc9e5 1823 if (skb->len < (th->doff << 2)) {
9469c7b4 1824 inet_twsk_put(inet_twsk(sk));
6a5dc9e5
ED
1825 goto bad_packet;
1826 }
1827 if (tcp_checksum_complete(skb)) {
1828 inet_twsk_put(inet_twsk(sk));
1829 goto csum_error;
1da177e4 1830 }
9469c7b4 1831 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1832 case TCP_TW_SYN: {
c346dca1 1833 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1834 &tcp_hashinfo,
da5e3630 1835 iph->saddr, th->source,
eddc9ec5 1836 iph->daddr, th->dest,
463c84b9 1837 inet_iif(skb));
1da177e4 1838 if (sk2) {
9469c7b4
YH
1839 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1840 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1841 sk = sk2;
1842 goto process;
1843 }
1844 /* Fall through to ACK */
1845 }
1846 case TCP_TW_ACK:
1847 tcp_v4_timewait_ack(sk, skb);
1848 break;
1849 case TCP_TW_RST:
1850 goto no_tcp_socket;
1851 case TCP_TW_SUCCESS:;
1852 }
1853 goto discard_it;
1854}
1855
ccb7c410
DM
1856static struct timewait_sock_ops tcp_timewait_sock_ops = {
1857 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1858 .twsk_unique = tcp_twsk_unique,
1859 .twsk_destructor= tcp_twsk_destructor,
ccb7c410 1860};
1da177e4 1861
63d02d15 1862void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
5d299f3d
ED
1863{
1864 struct dst_entry *dst = skb_dst(skb);
1865
1866 dst_hold(dst);
1867 sk->sk_rx_dst = dst;
1868 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1869}
63d02d15 1870EXPORT_SYMBOL(inet_sk_rx_dst_set);
5d299f3d 1871
3b401a81 1872const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1873 .queue_xmit = ip_queue_xmit,
1874 .send_check = tcp_v4_send_check,
1875 .rebuild_header = inet_sk_rebuild_header,
5d299f3d 1876 .sk_rx_dst_set = inet_sk_rx_dst_set,
543d9cfe
ACM
1877 .conn_request = tcp_v4_conn_request,
1878 .syn_recv_sock = tcp_v4_syn_recv_sock,
543d9cfe
ACM
1879 .net_header_len = sizeof(struct iphdr),
1880 .setsockopt = ip_setsockopt,
1881 .getsockopt = ip_getsockopt,
1882 .addr2sockaddr = inet_csk_addr2sockaddr,
1883 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1884 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1885#ifdef CONFIG_COMPAT
543d9cfe
ACM
1886 .compat_setsockopt = compat_ip_setsockopt,
1887 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1888#endif
1da177e4 1889};
4bc2f18b 1890EXPORT_SYMBOL(ipv4_specific);
1da177e4 1891
cfb6eeb4 1892#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1893static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1894 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1895 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1896 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1897};
b6332e6c 1898#endif
cfb6eeb4 1899
1da177e4
LT
1900/* NOTE: A lot of things set to zero explicitly by call to
1901 * sk_alloc() so need not be done here.
1902 */
1903static int tcp_v4_init_sock(struct sock *sk)
1904{
6687e988 1905 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4 1906
900f65d3 1907 tcp_init_sock(sk);
1da177e4 1908
8292a17a 1909 icsk->icsk_af_ops = &ipv4_specific;
900f65d3 1910
cfb6eeb4 1911#ifdef CONFIG_TCP_MD5SIG
ac807fa8 1912 tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
cfb6eeb4 1913#endif
1da177e4 1914
1da177e4
LT
1915 return 0;
1916}
1917
7d06b2e0 1918void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1919{
1920 struct tcp_sock *tp = tcp_sk(sk);
1921
1922 tcp_clear_xmit_timers(sk);
1923
6687e988 1924 tcp_cleanup_congestion_control(sk);
317a76f9 1925
1da177e4 1926 /* Cleanup up the write buffer. */
fe067e8a 1927 tcp_write_queue_purge(sk);
1da177e4
LT
1928
1929 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1930 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1931
cfb6eeb4
YH
1932#ifdef CONFIG_TCP_MD5SIG
1933 /* Clean up the MD5 key list, if any */
1934 if (tp->md5sig_info) {
a915da9b 1935 tcp_clear_md5_list(sk);
a8afca03 1936 kfree_rcu(tp->md5sig_info, rcu);
cfb6eeb4
YH
1937 tp->md5sig_info = NULL;
1938 }
1939#endif
1940
1a2449a8
CL
1941#ifdef CONFIG_NET_DMA
1942 /* Cleans up our sk_async_wait_queue */
e905a9ed 1943 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1944#endif
1945
1da177e4
LT
1946 /* Clean prequeue, it must be empty really */
1947 __skb_queue_purge(&tp->ucopy.prequeue);
1948
1949 /* Clean up a referenced TCP bind bucket. */
463c84b9 1950 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1951 inet_put_port(sk);
1da177e4 1952
168a8f58 1953 BUG_ON(tp->fastopen_rsk != NULL);
435cf559 1954
cf60af03
YC
1955 /* If socket is aborted during connect operation */
1956 tcp_free_fastopen_req(tp);
1957
180d8cd9 1958 sk_sockets_allocated_dec(sk);
d1a4c0b3 1959 sock_release_memcg(sk);
1da177e4 1960}
1da177e4
LT
1961EXPORT_SYMBOL(tcp_v4_destroy_sock);
1962
1963#ifdef CONFIG_PROC_FS
1964/* Proc filesystem TCP sock list dumping. */
1965
a8b690f9
TH
1966/*
1967 * Get next listener socket follow cur. If cur is NULL, get first socket
1968 * starting from bucket given in st->bucket; when st->bucket is zero the
1969 * very first socket in the hash table is returned.
1970 */
1da177e4
LT
1971static void *listening_get_next(struct seq_file *seq, void *cur)
1972{
463c84b9 1973 struct inet_connection_sock *icsk;
c25eb3bf 1974 struct hlist_nulls_node *node;
1da177e4 1975 struct sock *sk = cur;
5caea4ea 1976 struct inet_listen_hashbucket *ilb;
5799de0b 1977 struct tcp_iter_state *st = seq->private;
a4146b1b 1978 struct net *net = seq_file_net(seq);
1da177e4
LT
1979
1980 if (!sk) {
a8b690f9 1981 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 1982 spin_lock_bh(&ilb->lock);
c25eb3bf 1983 sk = sk_nulls_head(&ilb->head);
a8b690f9 1984 st->offset = 0;
1da177e4
LT
1985 goto get_sk;
1986 }
5caea4ea 1987 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 1988 ++st->num;
a8b690f9 1989 ++st->offset;
1da177e4
LT
1990
1991 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 1992 struct request_sock *req = cur;
1da177e4 1993
72a3effa 1994 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
1995 req = req->dl_next;
1996 while (1) {
1997 while (req) {
bdccc4ca 1998 if (req->rsk_ops->family == st->family) {
1da177e4
LT
1999 cur = req;
2000 goto out;
2001 }
2002 req = req->dl_next;
2003 }
72a3effa 2004 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2005 break;
2006get_req:
463c84b9 2007 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2008 }
1bde5ac4 2009 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2010 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2011 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2012 } else {
e905a9ed 2013 icsk = inet_csk(sk);
463c84b9
ACM
2014 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2015 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2016 goto start_req;
463c84b9 2017 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2018 sk = sk_nulls_next(sk);
1da177e4
LT
2019 }
2020get_sk:
c25eb3bf 2021 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2022 if (!net_eq(sock_net(sk), net))
2023 continue;
2024 if (sk->sk_family == st->family) {
1da177e4
LT
2025 cur = sk;
2026 goto out;
2027 }
e905a9ed 2028 icsk = inet_csk(sk);
463c84b9
ACM
2029 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2030 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2031start_req:
2032 st->uid = sock_i_uid(sk);
2033 st->syn_wait_sk = sk;
2034 st->state = TCP_SEQ_STATE_OPENREQ;
2035 st->sbucket = 0;
2036 goto get_req;
2037 }
463c84b9 2038 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2039 }
5caea4ea 2040 spin_unlock_bh(&ilb->lock);
a8b690f9 2041 st->offset = 0;
0f7ff927 2042 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2043 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2044 spin_lock_bh(&ilb->lock);
c25eb3bf 2045 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2046 goto get_sk;
2047 }
2048 cur = NULL;
2049out:
2050 return cur;
2051}
2052
2053static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2054{
a8b690f9
TH
2055 struct tcp_iter_state *st = seq->private;
2056 void *rc;
2057
2058 st->bucket = 0;
2059 st->offset = 0;
2060 rc = listening_get_next(seq, NULL);
1da177e4
LT
2061
2062 while (rc && *pos) {
2063 rc = listening_get_next(seq, rc);
2064 --*pos;
2065 }
2066 return rc;
2067}
2068
05dbc7b5 2069static inline bool empty_bucket(const struct tcp_iter_state *st)
6eac5604 2070{
05dbc7b5 2071 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
6eac5604
AK
2072}
2073
a8b690f9
TH
2074/*
2075 * Get first established socket starting from bucket given in st->bucket.
2076 * If st->bucket is zero, the very first socket in the hash is returned.
2077 */
1da177e4
LT
2078static void *established_get_first(struct seq_file *seq)
2079{
5799de0b 2080 struct tcp_iter_state *st = seq->private;
a4146b1b 2081 struct net *net = seq_file_net(seq);
1da177e4
LT
2082 void *rc = NULL;
2083
a8b690f9
TH
2084 st->offset = 0;
2085 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2086 struct sock *sk;
3ab5aee7 2087 struct hlist_nulls_node *node;
9db66bdc 2088 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2089
6eac5604
AK
2090 /* Lockless fast path for the common case of empty buckets */
2091 if (empty_bucket(st))
2092 continue;
2093
9db66bdc 2094 spin_lock_bh(lock);
3ab5aee7 2095 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2096 if (sk->sk_family != st->family ||
878628fb 2097 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2098 continue;
2099 }
2100 rc = sk;
2101 goto out;
2102 }
9db66bdc 2103 spin_unlock_bh(lock);
1da177e4
LT
2104 }
2105out:
2106 return rc;
2107}
2108
2109static void *established_get_next(struct seq_file *seq, void *cur)
2110{
2111 struct sock *sk = cur;
3ab5aee7 2112 struct hlist_nulls_node *node;
5799de0b 2113 struct tcp_iter_state *st = seq->private;
a4146b1b 2114 struct net *net = seq_file_net(seq);
1da177e4
LT
2115
2116 ++st->num;
a8b690f9 2117 ++st->offset;
1da177e4 2118
05dbc7b5 2119 sk = sk_nulls_next(sk);
1da177e4 2120
3ab5aee7 2121 sk_nulls_for_each_from(sk, node) {
878628fb 2122 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
05dbc7b5 2123 return sk;
1da177e4
LT
2124 }
2125
05dbc7b5
ED
2126 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2127 ++st->bucket;
2128 return established_get_first(seq);
1da177e4
LT
2129}
2130
2131static void *established_get_idx(struct seq_file *seq, loff_t pos)
2132{
a8b690f9
TH
2133 struct tcp_iter_state *st = seq->private;
2134 void *rc;
2135
2136 st->bucket = 0;
2137 rc = established_get_first(seq);
1da177e4
LT
2138
2139 while (rc && pos) {
2140 rc = established_get_next(seq, rc);
2141 --pos;
7174259e 2142 }
1da177e4
LT
2143 return rc;
2144}
2145
2146static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2147{
2148 void *rc;
5799de0b 2149 struct tcp_iter_state *st = seq->private;
1da177e4 2150
1da177e4
LT
2151 st->state = TCP_SEQ_STATE_LISTENING;
2152 rc = listening_get_idx(seq, &pos);
2153
2154 if (!rc) {
1da177e4
LT
2155 st->state = TCP_SEQ_STATE_ESTABLISHED;
2156 rc = established_get_idx(seq, pos);
2157 }
2158
2159 return rc;
2160}
2161
a8b690f9
TH
2162static void *tcp_seek_last_pos(struct seq_file *seq)
2163{
2164 struct tcp_iter_state *st = seq->private;
2165 int offset = st->offset;
2166 int orig_num = st->num;
2167 void *rc = NULL;
2168
2169 switch (st->state) {
2170 case TCP_SEQ_STATE_OPENREQ:
2171 case TCP_SEQ_STATE_LISTENING:
2172 if (st->bucket >= INET_LHTABLE_SIZE)
2173 break;
2174 st->state = TCP_SEQ_STATE_LISTENING;
2175 rc = listening_get_next(seq, NULL);
2176 while (offset-- && rc)
2177 rc = listening_get_next(seq, rc);
2178 if (rc)
2179 break;
2180 st->bucket = 0;
05dbc7b5 2181 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2182 /* Fallthrough */
2183 case TCP_SEQ_STATE_ESTABLISHED:
a8b690f9
TH
2184 if (st->bucket > tcp_hashinfo.ehash_mask)
2185 break;
2186 rc = established_get_first(seq);
2187 while (offset-- && rc)
2188 rc = established_get_next(seq, rc);
2189 }
2190
2191 st->num = orig_num;
2192
2193 return rc;
2194}
2195
1da177e4
LT
2196static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2197{
5799de0b 2198 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2199 void *rc;
2200
2201 if (*pos && *pos == st->last_pos) {
2202 rc = tcp_seek_last_pos(seq);
2203 if (rc)
2204 goto out;
2205 }
2206
1da177e4
LT
2207 st->state = TCP_SEQ_STATE_LISTENING;
2208 st->num = 0;
a8b690f9
TH
2209 st->bucket = 0;
2210 st->offset = 0;
2211 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2212
2213out:
2214 st->last_pos = *pos;
2215 return rc;
1da177e4
LT
2216}
2217
2218static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2219{
a8b690f9 2220 struct tcp_iter_state *st = seq->private;
1da177e4 2221 void *rc = NULL;
1da177e4
LT
2222
2223 if (v == SEQ_START_TOKEN) {
2224 rc = tcp_get_idx(seq, 0);
2225 goto out;
2226 }
1da177e4
LT
2227
2228 switch (st->state) {
2229 case TCP_SEQ_STATE_OPENREQ:
2230 case TCP_SEQ_STATE_LISTENING:
2231 rc = listening_get_next(seq, v);
2232 if (!rc) {
1da177e4 2233 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2234 st->bucket = 0;
2235 st->offset = 0;
1da177e4
LT
2236 rc = established_get_first(seq);
2237 }
2238 break;
2239 case TCP_SEQ_STATE_ESTABLISHED:
1da177e4
LT
2240 rc = established_get_next(seq, v);
2241 break;
2242 }
2243out:
2244 ++*pos;
a8b690f9 2245 st->last_pos = *pos;
1da177e4
LT
2246 return rc;
2247}
2248
2249static void tcp_seq_stop(struct seq_file *seq, void *v)
2250{
5799de0b 2251 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2252
2253 switch (st->state) {
2254 case TCP_SEQ_STATE_OPENREQ:
2255 if (v) {
463c84b9
ACM
2256 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2257 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2258 }
2259 case TCP_SEQ_STATE_LISTENING:
2260 if (v != SEQ_START_TOKEN)
5caea4ea 2261 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4 2262 break;
1da177e4
LT
2263 case TCP_SEQ_STATE_ESTABLISHED:
2264 if (v)
9db66bdc 2265 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2266 break;
2267 }
2268}
2269
73cb88ec 2270int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4 2271{
d9dda78b 2272 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
1da177e4 2273 struct tcp_iter_state *s;
52d6f3f1 2274 int err;
1da177e4 2275
52d6f3f1
DL
2276 err = seq_open_net(inode, file, &afinfo->seq_ops,
2277 sizeof(struct tcp_iter_state));
2278 if (err < 0)
2279 return err;
f40c8174 2280
52d6f3f1 2281 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2282 s->family = afinfo->family;
a8b690f9 2283 s->last_pos = 0;
f40c8174
DL
2284 return 0;
2285}
73cb88ec 2286EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2287
6f8b13bc 2288int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2289{
2290 int rc = 0;
2291 struct proc_dir_entry *p;
2292
9427c4b3
DL
2293 afinfo->seq_ops.start = tcp_seq_start;
2294 afinfo->seq_ops.next = tcp_seq_next;
2295 afinfo->seq_ops.stop = tcp_seq_stop;
2296
84841c3c 2297 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2298 afinfo->seq_fops, afinfo);
84841c3c 2299 if (!p)
1da177e4
LT
2300 rc = -ENOMEM;
2301 return rc;
2302}
4bc2f18b 2303EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2304
6f8b13bc 2305void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2306{
ece31ffd 2307 remove_proc_entry(afinfo->name, net->proc_net);
1da177e4 2308}
4bc2f18b 2309EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2310
cf533ea5 2311static void get_openreq4(const struct sock *sk, const struct request_sock *req,
652586df 2312 struct seq_file *f, int i, kuid_t uid)
1da177e4 2313{
2e6599cb 2314 const struct inet_request_sock *ireq = inet_rsk(req);
a399a805 2315 long delta = req->expires - jiffies;
1da177e4 2316
5e659e4c 2317 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2318 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
1da177e4 2319 i,
634fb979 2320 ireq->ir_loc_addr,
c720c7e8 2321 ntohs(inet_sk(sk)->inet_sport),
634fb979
ED
2322 ireq->ir_rmt_addr,
2323 ntohs(ireq->ir_rmt_port),
1da177e4
LT
2324 TCP_SYN_RECV,
2325 0, 0, /* could print option size, but that is af dependent. */
2326 1, /* timers active (only the expire timer) */
a399a805 2327 jiffies_delta_to_clock_t(delta),
e6c022a4 2328 req->num_timeout,
a7cb5a49 2329 from_kuid_munged(seq_user_ns(f), uid),
1da177e4
LT
2330 0, /* non standard timer */
2331 0, /* open_requests have no inode */
2332 atomic_read(&sk->sk_refcnt),
652586df 2333 req);
1da177e4
LT
2334}
2335
652586df 2336static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
1da177e4
LT
2337{
2338 int timer_active;
2339 unsigned long timer_expires;
cf533ea5 2340 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2341 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2342 const struct inet_sock *inet = inet_sk(sk);
168a8f58 2343 struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
c720c7e8
ED
2344 __be32 dest = inet->inet_daddr;
2345 __be32 src = inet->inet_rcv_saddr;
2346 __u16 destp = ntohs(inet->inet_dport);
2347 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2348 int rx_queue;
1da177e4 2349
6ba8a3b1
ND
2350 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2351 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2352 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1da177e4 2353 timer_active = 1;
463c84b9
ACM
2354 timer_expires = icsk->icsk_timeout;
2355 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2356 timer_active = 4;
463c84b9 2357 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2358 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2359 timer_active = 2;
cf4c6bf8 2360 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2361 } else {
2362 timer_active = 0;
2363 timer_expires = jiffies;
2364 }
2365
49d09007
ED
2366 if (sk->sk_state == TCP_LISTEN)
2367 rx_queue = sk->sk_ack_backlog;
2368 else
2369 /*
2370 * because we dont lock socket, we might find a transient negative value
2371 */
2372 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2373
5e659e4c 2374 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
652586df 2375 "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
cf4c6bf8 2376 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2377 tp->write_seq - tp->snd_una,
49d09007 2378 rx_queue,
1da177e4 2379 timer_active,
a399a805 2380 jiffies_delta_to_clock_t(timer_expires - jiffies),
463c84b9 2381 icsk->icsk_retransmits,
a7cb5a49 2382 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
6687e988 2383 icsk->icsk_probes_out,
cf4c6bf8
IJ
2384 sock_i_ino(sk),
2385 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2386 jiffies_to_clock_t(icsk->icsk_rto),
2387 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2388 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2389 tp->snd_cwnd,
168a8f58
JC
2390 sk->sk_state == TCP_LISTEN ?
2391 (fastopenq ? fastopenq->max_qlen : 0) :
652586df 2392 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
1da177e4
LT
2393}
2394
cf533ea5 2395static void get_timewait4_sock(const struct inet_timewait_sock *tw,
652586df 2396 struct seq_file *f, int i)
1da177e4 2397{
23f33c2d 2398 __be32 dest, src;
1da177e4 2399 __u16 destp, srcp;
e2a1d3e4 2400 s32 delta = tw->tw_ttd - inet_tw_time_stamp();
1da177e4
LT
2401
2402 dest = tw->tw_daddr;
2403 src = tw->tw_rcv_saddr;
2404 destp = ntohs(tw->tw_dport);
2405 srcp = ntohs(tw->tw_sport);
2406
5e659e4c 2407 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
652586df 2408 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
1da177e4 2409 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
a399a805 2410 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
652586df 2411 atomic_read(&tw->tw_refcnt), tw);
1da177e4
LT
2412}
2413
2414#define TMPSZ 150
2415
2416static int tcp4_seq_show(struct seq_file *seq, void *v)
2417{
5799de0b 2418 struct tcp_iter_state *st;
05dbc7b5 2419 struct sock *sk = v;
1da177e4 2420
652586df 2421 seq_setwidth(seq, TMPSZ - 1);
1da177e4 2422 if (v == SEQ_START_TOKEN) {
652586df 2423 seq_puts(seq, " sl local_address rem_address st tx_queue "
1da177e4
LT
2424 "rx_queue tr tm->when retrnsmt uid timeout "
2425 "inode");
2426 goto out;
2427 }
2428 st = seq->private;
2429
2430 switch (st->state) {
2431 case TCP_SEQ_STATE_LISTENING:
2432 case TCP_SEQ_STATE_ESTABLISHED:
05dbc7b5 2433 if (sk->sk_state == TCP_TIME_WAIT)
652586df 2434 get_timewait4_sock(v, seq, st->num);
05dbc7b5 2435 else
652586df 2436 get_tcp4_sock(v, seq, st->num);
1da177e4
LT
2437 break;
2438 case TCP_SEQ_STATE_OPENREQ:
652586df 2439 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid);
1da177e4
LT
2440 break;
2441 }
1da177e4 2442out:
652586df 2443 seq_pad(seq, '\n');
1da177e4
LT
2444 return 0;
2445}
2446
73cb88ec
AV
2447static const struct file_operations tcp_afinfo_seq_fops = {
2448 .owner = THIS_MODULE,
2449 .open = tcp_seq_open,
2450 .read = seq_read,
2451 .llseek = seq_lseek,
2452 .release = seq_release_net
2453};
2454
1da177e4 2455static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2456 .name = "tcp",
2457 .family = AF_INET,
73cb88ec 2458 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2459 .seq_ops = {
2460 .show = tcp4_seq_show,
2461 },
1da177e4
LT
2462};
2463
2c8c1e72 2464static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2465{
2466 return tcp_proc_register(net, &tcp4_seq_afinfo);
2467}
2468
2c8c1e72 2469static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2470{
2471 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2472}
2473
2474static struct pernet_operations tcp4_net_ops = {
2475 .init = tcp4_proc_init_net,
2476 .exit = tcp4_proc_exit_net,
2477};
2478
1da177e4
LT
2479int __init tcp4_proc_init(void)
2480{
757764f6 2481 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2482}
2483
2484void tcp4_proc_exit(void)
2485{
757764f6 2486 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2487}
2488#endif /* CONFIG_PROC_FS */
2489
2490struct proto tcp_prot = {
2491 .name = "TCP",
2492 .owner = THIS_MODULE,
2493 .close = tcp_close,
2494 .connect = tcp_v4_connect,
2495 .disconnect = tcp_disconnect,
463c84b9 2496 .accept = inet_csk_accept,
1da177e4
LT
2497 .ioctl = tcp_ioctl,
2498 .init = tcp_v4_init_sock,
2499 .destroy = tcp_v4_destroy_sock,
2500 .shutdown = tcp_shutdown,
2501 .setsockopt = tcp_setsockopt,
2502 .getsockopt = tcp_getsockopt,
1da177e4 2503 .recvmsg = tcp_recvmsg,
7ba42910
CG
2504 .sendmsg = tcp_sendmsg,
2505 .sendpage = tcp_sendpage,
1da177e4 2506 .backlog_rcv = tcp_v4_do_rcv,
46d3ceab 2507 .release_cb = tcp_release_cb,
563d34d0 2508 .mtu_reduced = tcp_v4_mtu_reduced,
ab1e0a13
ACM
2509 .hash = inet_hash,
2510 .unhash = inet_unhash,
2511 .get_port = inet_csk_get_port,
1da177e4 2512 .enter_memory_pressure = tcp_enter_memory_pressure,
c9bee3b7 2513 .stream_memory_free = tcp_stream_memory_free,
1da177e4 2514 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2515 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2516 .memory_allocated = &tcp_memory_allocated,
2517 .memory_pressure = &tcp_memory_pressure,
a4fe34bf 2518 .sysctl_mem = sysctl_tcp_mem,
1da177e4
LT
2519 .sysctl_wmem = sysctl_tcp_wmem,
2520 .sysctl_rmem = sysctl_tcp_rmem,
2521 .max_header = MAX_TCP_HEADER,
2522 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2523 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2524 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2525 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2526 .h.hashinfo = &tcp_hashinfo,
7ba42910 2527 .no_autobind = true,
543d9cfe
ACM
2528#ifdef CONFIG_COMPAT
2529 .compat_setsockopt = compat_tcp_setsockopt,
2530 .compat_getsockopt = compat_tcp_getsockopt,
2531#endif
c255a458 2532#ifdef CONFIG_MEMCG_KMEM
d1a4c0b3
GC
2533 .init_cgroup = tcp_init_cgroup,
2534 .destroy_cgroup = tcp_destroy_cgroup,
2535 .proto_cgroup = tcp_proto_cgroup,
2536#endif
1da177e4 2537};
4bc2f18b 2538EXPORT_SYMBOL(tcp_prot);
1da177e4 2539
046ee902
DL
2540static int __net_init tcp_sk_init(struct net *net)
2541{
5d134f1c 2542 net->ipv4.sysctl_tcp_ecn = 2;
be9f4a44 2543 return 0;
046ee902
DL
2544}
2545
2546static void __net_exit tcp_sk_exit(struct net *net)
2547{
b099ce26
EB
2548}
2549
2550static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2551{
2552 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2553}
2554
2555static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2556 .init = tcp_sk_init,
2557 .exit = tcp_sk_exit,
2558 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2559};
2560
9b0f976f 2561void __init tcp_v4_init(void)
1da177e4 2562{
5caea4ea 2563 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2564 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2565 panic("Failed to create the TCP control socket.\n");
1da177e4 2566}