]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv4/tcp_ipv4.c
tcp: md5: remove obsolete md5_add() method
[mirror_ubuntu-zesty-kernel.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
1da177e4
LT
8 * IPv4 specific functions
9 *
10 *
11 * code split from:
12 * linux/ipv4/tcp.c
13 * linux/ipv4/tcp_input.c
14 * linux/ipv4/tcp_output.c
15 *
16 * See tcp.c for author information
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24/*
25 * Changes:
26 * David S. Miller : New socket lookup architecture.
27 * This code is dedicated to John Dyson.
28 * David S. Miller : Change semantics of established hash,
29 * half is devoted to TIME_WAIT sockets
30 * and the rest go in the other half.
31 * Andi Kleen : Add support for syncookies and fixed
32 * some bugs: ip options weren't passed to
33 * the TCP layer, missed a check for an
34 * ACK bit.
35 * Andi Kleen : Implemented fast path mtu discovery.
36 * Fixed many serious bugs in the
60236fdd 37 * request_sock handling and moved
1da177e4
LT
38 * most of it into the af independent code.
39 * Added tail drop and some other bugfixes.
caa20d9a 40 * Added new listen semantics.
1da177e4
LT
41 * Mike McLagan : Routing by source
42 * Juan Jose Ciarlante: ip_dynaddr bits
43 * Andi Kleen: various fixes.
44 * Vitaly E. Lavrov : Transparent proxy revived after year
45 * coma.
46 * Andi Kleen : Fix new listen.
47 * Andi Kleen : Fix accept error reporting.
48 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
49 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
50 * a single port at the same time.
51 */
52
1da177e4 53
eb4dea58 54#include <linux/bottom_half.h>
1da177e4
LT
55#include <linux/types.h>
56#include <linux/fcntl.h>
57#include <linux/module.h>
58#include <linux/random.h>
59#include <linux/cache.h>
60#include <linux/jhash.h>
61#include <linux/init.h>
62#include <linux/times.h>
5a0e3ad6 63#include <linux/slab.h>
1da177e4 64
457c4cbc 65#include <net/net_namespace.h>
1da177e4 66#include <net/icmp.h>
304a1618 67#include <net/inet_hashtables.h>
1da177e4 68#include <net/tcp.h>
20380731 69#include <net/transp_v6.h>
1da177e4
LT
70#include <net/ipv6.h>
71#include <net/inet_common.h>
6d6ee43e 72#include <net/timewait_sock.h>
1da177e4 73#include <net/xfrm.h>
1a2449a8 74#include <net/netdma.h>
6e5714ea 75#include <net/secure_seq.h>
d1a4c0b3 76#include <net/tcp_memcontrol.h>
1da177e4
LT
77
78#include <linux/inet.h>
79#include <linux/ipv6.h>
80#include <linux/stddef.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83
cfb6eeb4
YH
84#include <linux/crypto.h>
85#include <linux/scatterlist.h>
86
ab32ea5d
BH
87int sysctl_tcp_tw_reuse __read_mostly;
88int sysctl_tcp_low_latency __read_mostly;
4bc2f18b 89EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 90
1da177e4 91
cfb6eeb4 92#ifdef CONFIG_TCP_MD5SIG
7174259e
ACM
93static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
94 __be32 addr);
49a72dfb 95static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa 96 __be32 daddr, __be32 saddr, const struct tcphdr *th);
9501f972
YH
97#else
98static inline
99struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
100{
101 return NULL;
102}
cfb6eeb4
YH
103#endif
104
5caea4ea 105struct inet_hashinfo tcp_hashinfo;
4bc2f18b 106EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 107
cf533ea5 108static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
1da177e4 109{
eddc9ec5
ACM
110 return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
111 ip_hdr(skb)->saddr,
aa8223c7
ACM
112 tcp_hdr(skb)->dest,
113 tcp_hdr(skb)->source);
1da177e4
LT
114}
115
6d6ee43e
ACM
116int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
117{
118 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
119 struct tcp_sock *tp = tcp_sk(sk);
120
121 /* With PAWS, it is safe from the viewpoint
122 of data integrity. Even without PAWS it is safe provided sequence
123 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
124
125 Actually, the idea is close to VJ's one, only timestamp cache is
126 held not per host, but per port pair and TW bucket is used as state
127 holder.
128
129 If TW bucket has been already destroyed we fall back to VJ's scheme
130 and use initial timestamp retrieved from peer table.
131 */
132 if (tcptw->tw_ts_recent_stamp &&
133 (twp == NULL || (sysctl_tcp_tw_reuse &&
9d729f72 134 get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
6d6ee43e
ACM
135 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
136 if (tp->write_seq == 0)
137 tp->write_seq = 1;
138 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
139 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
140 sock_hold(sktw);
141 return 1;
142 }
143
144 return 0;
145}
6d6ee43e
ACM
146EXPORT_SYMBOL_GPL(tcp_twsk_unique);
147
1da177e4
LT
148/* This will initiate an outgoing connection. */
149int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
150{
2d7192d6 151 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
1da177e4
LT
152 struct inet_sock *inet = inet_sk(sk);
153 struct tcp_sock *tp = tcp_sk(sk);
dca8b089 154 __be16 orig_sport, orig_dport;
bada8adc 155 __be32 daddr, nexthop;
da905bd1 156 struct flowi4 *fl4;
2d7192d6 157 struct rtable *rt;
1da177e4 158 int err;
f6d8bd05 159 struct ip_options_rcu *inet_opt;
1da177e4
LT
160
161 if (addr_len < sizeof(struct sockaddr_in))
162 return -EINVAL;
163
164 if (usin->sin_family != AF_INET)
165 return -EAFNOSUPPORT;
166
167 nexthop = daddr = usin->sin_addr.s_addr;
f6d8bd05
ED
168 inet_opt = rcu_dereference_protected(inet->inet_opt,
169 sock_owned_by_user(sk));
170 if (inet_opt && inet_opt->opt.srr) {
1da177e4
LT
171 if (!daddr)
172 return -EINVAL;
f6d8bd05 173 nexthop = inet_opt->opt.faddr;
1da177e4
LT
174 }
175
dca8b089
DM
176 orig_sport = inet->inet_sport;
177 orig_dport = usin->sin_port;
da905bd1
DM
178 fl4 = &inet->cork.fl.u.ip4;
179 rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
b23dd4fe
DM
180 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
181 IPPROTO_TCP,
182 orig_sport, orig_dport, sk, true);
183 if (IS_ERR(rt)) {
184 err = PTR_ERR(rt);
185 if (err == -ENETUNREACH)
7c73a6fa 186 IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
b23dd4fe 187 return err;
584bdf8c 188 }
1da177e4
LT
189
190 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
191 ip_rt_put(rt);
192 return -ENETUNREACH;
193 }
194
f6d8bd05 195 if (!inet_opt || !inet_opt->opt.srr)
da905bd1 196 daddr = fl4->daddr;
1da177e4 197
c720c7e8 198 if (!inet->inet_saddr)
da905bd1 199 inet->inet_saddr = fl4->saddr;
c720c7e8 200 inet->inet_rcv_saddr = inet->inet_saddr;
1da177e4 201
c720c7e8 202 if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
1da177e4
LT
203 /* Reset inherited state */
204 tp->rx_opt.ts_recent = 0;
205 tp->rx_opt.ts_recent_stamp = 0;
206 tp->write_seq = 0;
207 }
208
295ff7ed 209 if (tcp_death_row.sysctl_tw_recycle &&
da905bd1 210 !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr) {
ed2361e6 211 struct inet_peer *peer = rt_get_peer(rt, fl4->daddr);
7174259e
ACM
212 /*
213 * VJ's idea. We save last timestamp seen from
214 * the destination in peer table, when entering state
215 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
216 * when trying new connection.
1da177e4 217 */
317fe0e6
ED
218 if (peer) {
219 inet_peer_refcheck(peer);
220 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
221 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
222 tp->rx_opt.ts_recent = peer->tcp_ts;
223 }
1da177e4
LT
224 }
225 }
226
c720c7e8
ED
227 inet->inet_dport = usin->sin_port;
228 inet->inet_daddr = daddr;
1da177e4 229
d83d8461 230 inet_csk(sk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
231 if (inet_opt)
232 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1da177e4 233
bee7ca9e 234 tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
1da177e4
LT
235
236 /* Socket identity is still unknown (sport may be zero).
237 * However we set state to SYN-SENT and not releasing socket
238 * lock select source port, enter ourselves into the hash tables and
239 * complete initialization after this.
240 */
241 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 242 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
243 if (err)
244 goto failure;
245
da905bd1 246 rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
b23dd4fe
DM
247 inet->inet_sport, inet->inet_dport, sk);
248 if (IS_ERR(rt)) {
249 err = PTR_ERR(rt);
250 rt = NULL;
1da177e4 251 goto failure;
b23dd4fe 252 }
1da177e4 253 /* OK, now commit destination to socket. */
bcd76111 254 sk->sk_gso_type = SKB_GSO_TCPV4;
d8d1f30b 255 sk_setup_caps(sk, &rt->dst);
1da177e4
LT
256
257 if (!tp->write_seq)
c720c7e8
ED
258 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
259 inet->inet_daddr,
260 inet->inet_sport,
1da177e4
LT
261 usin->sin_port);
262
c720c7e8 263 inet->inet_id = tp->write_seq ^ jiffies;
1da177e4
LT
264
265 err = tcp_connect(sk);
266 rt = NULL;
267 if (err)
268 goto failure;
269
270 return 0;
271
272failure:
7174259e
ACM
273 /*
274 * This unhashes the socket and releases the local port,
275 * if necessary.
276 */
1da177e4
LT
277 tcp_set_state(sk, TCP_CLOSE);
278 ip_rt_put(rt);
279 sk->sk_route_caps = 0;
c720c7e8 280 inet->inet_dport = 0;
1da177e4
LT
281 return err;
282}
4bc2f18b 283EXPORT_SYMBOL(tcp_v4_connect);
1da177e4 284
1da177e4
LT
285/*
286 * This routine does path mtu discovery as defined in RFC1191.
287 */
b71d1d42 288static void do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu)
1da177e4
LT
289{
290 struct dst_entry *dst;
291 struct inet_sock *inet = inet_sk(sk);
1da177e4
LT
292
293 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
294 * send out by Linux are always <576bytes so they should go through
295 * unfragmented).
296 */
297 if (sk->sk_state == TCP_LISTEN)
298 return;
299
300 /* We don't check in the destentry if pmtu discovery is forbidden
301 * on this route. We just assume that no packet_to_big packets
302 * are send back when pmtu discovery is not active.
e905a9ed 303 * There is a small race when the user changes this flag in the
1da177e4
LT
304 * route, but I think that's acceptable.
305 */
306 if ((dst = __sk_dst_check(sk, 0)) == NULL)
307 return;
308
309 dst->ops->update_pmtu(dst, mtu);
310
311 /* Something is about to be wrong... Remember soft error
312 * for the case, if this connection will not able to recover.
313 */
314 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
315 sk->sk_err_soft = EMSGSIZE;
316
317 mtu = dst_mtu(dst);
318
319 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 320 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
321 tcp_sync_mss(sk, mtu);
322
323 /* Resend the TCP packet because it's
324 * clear that the old packet has been
325 * dropped. This is the new "fast" path mtu
326 * discovery.
327 */
328 tcp_simple_retransmit(sk);
329 } /* else let the usual retransmit timer handle it */
330}
331
332/*
333 * This routine is called by the ICMP module when it gets some
334 * sort of error condition. If err < 0 then the socket should
335 * be closed and the error returned to the user. If err > 0
336 * it's just the icmp type << 8 | icmp code. After adjustment
337 * header points to the first 8 bytes of the tcp header. We need
338 * to find the appropriate port.
339 *
340 * The locking strategy used here is very "optimistic". When
341 * someone else accesses the socket the ICMP is just dropped
342 * and for some paths there is no check at all.
343 * A more general error queue to queue errors for later handling
344 * is probably better.
345 *
346 */
347
4d1a2d9e 348void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
1da177e4 349{
b71d1d42 350 const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
4d1a2d9e 351 struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
f1ecd5d9 352 struct inet_connection_sock *icsk;
1da177e4
LT
353 struct tcp_sock *tp;
354 struct inet_sock *inet;
4d1a2d9e
DL
355 const int type = icmp_hdr(icmp_skb)->type;
356 const int code = icmp_hdr(icmp_skb)->code;
1da177e4 357 struct sock *sk;
f1ecd5d9 358 struct sk_buff *skb;
1da177e4 359 __u32 seq;
f1ecd5d9 360 __u32 remaining;
1da177e4 361 int err;
4d1a2d9e 362 struct net *net = dev_net(icmp_skb->dev);
1da177e4 363
4d1a2d9e 364 if (icmp_skb->len < (iph->ihl << 2) + 8) {
dcfc23ca 365 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
366 return;
367 }
368
fd54d716 369 sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
4d1a2d9e 370 iph->saddr, th->source, inet_iif(icmp_skb));
1da177e4 371 if (!sk) {
dcfc23ca 372 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
1da177e4
LT
373 return;
374 }
375 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 376 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
377 return;
378 }
379
380 bh_lock_sock(sk);
381 /* If too many ICMPs get dropped on busy
382 * servers this needs to be solved differently.
383 */
384 if (sock_owned_by_user(sk))
de0744af 385 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
1da177e4
LT
386
387 if (sk->sk_state == TCP_CLOSE)
388 goto out;
389
97e3ecd1 390 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
391 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
392 goto out;
393 }
394
f1ecd5d9 395 icsk = inet_csk(sk);
1da177e4
LT
396 tp = tcp_sk(sk);
397 seq = ntohl(th->seq);
398 if (sk->sk_state != TCP_LISTEN &&
399 !between(seq, tp->snd_una, tp->snd_nxt)) {
de0744af 400 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
401 goto out;
402 }
403
404 switch (type) {
405 case ICMP_SOURCE_QUENCH:
406 /* Just silently ignore these. */
407 goto out;
408 case ICMP_PARAMETERPROB:
409 err = EPROTO;
410 break;
411 case ICMP_DEST_UNREACH:
412 if (code > NR_ICMP_UNREACH)
413 goto out;
414
415 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
416 if (!sock_owned_by_user(sk))
417 do_pmtu_discovery(sk, iph, info);
418 goto out;
419 }
420
421 err = icmp_err_convert[code].errno;
f1ecd5d9
DL
422 /* check if icmp_skb allows revert of backoff
423 * (see draft-zimmermann-tcp-lcd) */
424 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
425 break;
426 if (seq != tp->snd_una || !icsk->icsk_retransmits ||
427 !icsk->icsk_backoff)
428 break;
429
8f49c270
DM
430 if (sock_owned_by_user(sk))
431 break;
432
f1ecd5d9 433 icsk->icsk_backoff--;
9ad7c049
JC
434 inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
435 TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
f1ecd5d9
DL
436 tcp_bound_rto(sk);
437
438 skb = tcp_write_queue_head(sk);
439 BUG_ON(!skb);
440
441 remaining = icsk->icsk_rto - min(icsk->icsk_rto,
442 tcp_time_stamp - TCP_SKB_CB(skb)->when);
443
444 if (remaining) {
445 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
446 remaining, TCP_RTO_MAX);
f1ecd5d9
DL
447 } else {
448 /* RTO revert clocked out retransmission.
449 * Will retransmit now */
450 tcp_retransmit_timer(sk);
451 }
452
1da177e4
LT
453 break;
454 case ICMP_TIME_EXCEEDED:
455 err = EHOSTUNREACH;
456 break;
457 default:
458 goto out;
459 }
460
461 switch (sk->sk_state) {
60236fdd 462 struct request_sock *req, **prev;
1da177e4
LT
463 case TCP_LISTEN:
464 if (sock_owned_by_user(sk))
465 goto out;
466
463c84b9
ACM
467 req = inet_csk_search_req(sk, &prev, th->dest,
468 iph->daddr, iph->saddr);
1da177e4
LT
469 if (!req)
470 goto out;
471
472 /* ICMPs are not backlogged, hence we cannot get
473 an established socket here.
474 */
547b792c 475 WARN_ON(req->sk);
1da177e4 476
2e6599cb 477 if (seq != tcp_rsk(req)->snt_isn) {
de0744af 478 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
479 goto out;
480 }
481
482 /*
483 * Still in SYN_RECV, just remove it silently.
484 * There is no good way to pass the error to the newly
485 * created socket, and POSIX does not want network
486 * errors returned from accept().
487 */
463c84b9 488 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
489 goto out;
490
491 case TCP_SYN_SENT:
492 case TCP_SYN_RECV: /* Cannot happen.
493 It can f.e. if SYNs crossed.
494 */
495 if (!sock_owned_by_user(sk)) {
1da177e4
LT
496 sk->sk_err = err;
497
498 sk->sk_error_report(sk);
499
500 tcp_done(sk);
501 } else {
502 sk->sk_err_soft = err;
503 }
504 goto out;
505 }
506
507 /* If we've already connected we will keep trying
508 * until we time out, or the user gives up.
509 *
510 * rfc1122 4.2.3.9 allows to consider as hard errors
511 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
512 * but it is obsoleted by pmtu discovery).
513 *
514 * Note, that in modern internet, where routing is unreliable
515 * and in each dark corner broken firewalls sit, sending random
516 * errors ordered by their masters even this two messages finally lose
517 * their original sense (even Linux sends invalid PORT_UNREACHs)
518 *
519 * Now we are in compliance with RFCs.
520 * --ANK (980905)
521 */
522
523 inet = inet_sk(sk);
524 if (!sock_owned_by_user(sk) && inet->recverr) {
525 sk->sk_err = err;
526 sk->sk_error_report(sk);
527 } else { /* Only an error on timeout */
528 sk->sk_err_soft = err;
529 }
530
531out:
532 bh_unlock_sock(sk);
533 sock_put(sk);
534}
535
419f9f89
HX
536static void __tcp_v4_send_check(struct sk_buff *skb,
537 __be32 saddr, __be32 daddr)
1da177e4 538{
aa8223c7 539 struct tcphdr *th = tcp_hdr(skb);
1da177e4 540
84fa7933 541 if (skb->ip_summed == CHECKSUM_PARTIAL) {
419f9f89 542 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
663ead3b 543 skb->csum_start = skb_transport_header(skb) - skb->head;
ff1dcadb 544 skb->csum_offset = offsetof(struct tcphdr, check);
1da177e4 545 } else {
419f9f89 546 th->check = tcp_v4_check(skb->len, saddr, daddr,
07f0757a 547 csum_partial(th,
1da177e4
LT
548 th->doff << 2,
549 skb->csum));
550 }
551}
552
419f9f89 553/* This routine computes an IPv4 TCP checksum. */
bb296246 554void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
419f9f89 555{
cf533ea5 556 const struct inet_sock *inet = inet_sk(sk);
419f9f89
HX
557
558 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
559}
4bc2f18b 560EXPORT_SYMBOL(tcp_v4_send_check);
419f9f89 561
a430a43d
HX
562int tcp_v4_gso_send_check(struct sk_buff *skb)
563{
eddc9ec5 564 const struct iphdr *iph;
a430a43d
HX
565 struct tcphdr *th;
566
567 if (!pskb_may_pull(skb, sizeof(*th)))
568 return -EINVAL;
569
eddc9ec5 570 iph = ip_hdr(skb);
aa8223c7 571 th = tcp_hdr(skb);
a430a43d
HX
572
573 th->check = 0;
84fa7933 574 skb->ip_summed = CHECKSUM_PARTIAL;
419f9f89 575 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
a430a43d
HX
576 return 0;
577}
578
1da177e4
LT
579/*
580 * This routine will send an RST to the other tcp.
581 *
582 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
583 * for reset.
584 * Answer: if a packet caused RST, it is not for a socket
585 * existing in our system, if it is matched to a socket,
586 * it is just duplicate segment or bug in other side's TCP.
587 * So that we build reply only basing on parameters
588 * arrived with segment.
589 * Exception: precedence violation. We do not implement it in any case.
590 */
591
cfb6eeb4 592static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4 593{
cf533ea5 594 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
595 struct {
596 struct tcphdr th;
597#ifdef CONFIG_TCP_MD5SIG
714e85be 598 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
599#endif
600 } rep;
1da177e4 601 struct ip_reply_arg arg;
cfb6eeb4
YH
602#ifdef CONFIG_TCP_MD5SIG
603 struct tcp_md5sig_key *key;
604#endif
a86b1e30 605 struct net *net;
1da177e4
LT
606
607 /* Never send a reset in response to a reset. */
608 if (th->rst)
609 return;
610
511c3f92 611 if (skb_rtable(skb)->rt_type != RTN_LOCAL)
1da177e4
LT
612 return;
613
614 /* Swap the send and the receive. */
cfb6eeb4
YH
615 memset(&rep, 0, sizeof(rep));
616 rep.th.dest = th->source;
617 rep.th.source = th->dest;
618 rep.th.doff = sizeof(struct tcphdr) / 4;
619 rep.th.rst = 1;
1da177e4
LT
620
621 if (th->ack) {
cfb6eeb4 622 rep.th.seq = th->ack_seq;
1da177e4 623 } else {
cfb6eeb4
YH
624 rep.th.ack = 1;
625 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
626 skb->len - (th->doff << 2));
1da177e4
LT
627 }
628
7174259e 629 memset(&arg, 0, sizeof(arg));
cfb6eeb4
YH
630 arg.iov[0].iov_base = (unsigned char *)&rep;
631 arg.iov[0].iov_len = sizeof(rep.th);
632
633#ifdef CONFIG_TCP_MD5SIG
8a622e71 634 key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->saddr) : NULL;
cfb6eeb4
YH
635 if (key) {
636 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
637 (TCPOPT_NOP << 16) |
638 (TCPOPT_MD5SIG << 8) |
639 TCPOLEN_MD5SIG);
640 /* Update length and the length the header thinks exists */
641 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
642 rep.th.doff = arg.iov[0].iov_len / 4;
643
49a72dfb 644 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
78e645cb
IJ
645 key, ip_hdr(skb)->saddr,
646 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
647 }
648#endif
eddc9ec5
ACM
649 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
650 ip_hdr(skb)->saddr, /* XXX */
52cd5750 651 arg.iov[0].iov_len, IPPROTO_TCP, 0);
1da177e4 652 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
88ef4a5a 653 arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
1da177e4 654
adf30907 655 net = dev_net(skb_dst(skb)->dev);
66b13d99 656 arg.tos = ip_hdr(skb)->tos;
0a5ebb80 657 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7feb49c8 658 &arg, arg.iov[0].iov_len);
1da177e4 659
63231bdd
PE
660 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
661 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1da177e4
LT
662}
663
664/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
665 outside socket context is ugly, certainly. What can I do?
666 */
667
9501f972
YH
668static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
669 u32 win, u32 ts, int oif,
88ef4a5a 670 struct tcp_md5sig_key *key,
66b13d99 671 int reply_flags, u8 tos)
1da177e4 672{
cf533ea5 673 const struct tcphdr *th = tcp_hdr(skb);
1da177e4
LT
674 struct {
675 struct tcphdr th;
714e85be 676 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 677#ifdef CONFIG_TCP_MD5SIG
714e85be 678 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
679#endif
680 ];
1da177e4
LT
681 } rep;
682 struct ip_reply_arg arg;
adf30907 683 struct net *net = dev_net(skb_dst(skb)->dev);
1da177e4
LT
684
685 memset(&rep.th, 0, sizeof(struct tcphdr));
7174259e 686 memset(&arg, 0, sizeof(arg));
1da177e4
LT
687
688 arg.iov[0].iov_base = (unsigned char *)&rep;
689 arg.iov[0].iov_len = sizeof(rep.th);
690 if (ts) {
cfb6eeb4
YH
691 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
692 (TCPOPT_TIMESTAMP << 8) |
693 TCPOLEN_TIMESTAMP);
694 rep.opt[1] = htonl(tcp_time_stamp);
695 rep.opt[2] = htonl(ts);
cb48cfe8 696 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
697 }
698
699 /* Swap the send and the receive. */
700 rep.th.dest = th->source;
701 rep.th.source = th->dest;
702 rep.th.doff = arg.iov[0].iov_len / 4;
703 rep.th.seq = htonl(seq);
704 rep.th.ack_seq = htonl(ack);
705 rep.th.ack = 1;
706 rep.th.window = htons(win);
707
cfb6eeb4 708#ifdef CONFIG_TCP_MD5SIG
cfb6eeb4
YH
709 if (key) {
710 int offset = (ts) ? 3 : 0;
711
712 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
713 (TCPOPT_NOP << 16) |
714 (TCPOPT_MD5SIG << 8) |
715 TCPOLEN_MD5SIG);
716 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
717 rep.th.doff = arg.iov[0].iov_len/4;
718
49a72dfb 719 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
90b7e112
AL
720 key, ip_hdr(skb)->saddr,
721 ip_hdr(skb)->daddr, &rep.th);
cfb6eeb4
YH
722 }
723#endif
88ef4a5a 724 arg.flags = reply_flags;
eddc9ec5
ACM
725 arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
726 ip_hdr(skb)->saddr, /* XXX */
1da177e4
LT
727 arg.iov[0].iov_len, IPPROTO_TCP, 0);
728 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
9501f972
YH
729 if (oif)
730 arg.bound_dev_if = oif;
66b13d99 731 arg.tos = tos;
0a5ebb80 732 ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
7feb49c8 733 &arg, arg.iov[0].iov_len);
1da177e4 734
63231bdd 735 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1da177e4
LT
736}
737
738static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
739{
8feaf0c0 740 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 741 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 742
9501f972 743 tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
7174259e 744 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
9501f972
YH
745 tcptw->tw_ts_recent,
746 tw->tw_bound_dev_if,
88ef4a5a 747 tcp_twsk_md5_key(tcptw),
66b13d99
ED
748 tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
749 tw->tw_tos
9501f972 750 );
1da177e4 751
8feaf0c0 752 inet_twsk_put(tw);
1da177e4
LT
753}
754
6edafaaf 755static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
7174259e 756 struct request_sock *req)
1da177e4 757{
9501f972 758 tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
cfb6eeb4 759 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
9501f972
YH
760 req->ts_recent,
761 0,
88ef4a5a 762 tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
66b13d99
ED
763 inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
764 ip_hdr(skb)->tos);
1da177e4
LT
765}
766
1da177e4 767/*
9bf1d83e 768 * Send a SYN-ACK after having received a SYN.
60236fdd 769 * This still operates on a request_sock only, not on a big
1da177e4
LT
770 * socket.
771 */
72659ecc
OP
772static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
773 struct request_sock *req,
774 struct request_values *rvp)
1da177e4 775{
2e6599cb 776 const struct inet_request_sock *ireq = inet_rsk(req);
6bd023f3 777 struct flowi4 fl4;
1da177e4
LT
778 int err = -1;
779 struct sk_buff * skb;
780
781 /* First, grab a route. */
6bd023f3 782 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
fd80eb94 783 return -1;
1da177e4 784
e6b4d113 785 skb = tcp_make_synack(sk, dst, req, rvp);
1da177e4
LT
786
787 if (skb) {
419f9f89 788 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
1da177e4 789
2e6599cb
ACM
790 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
791 ireq->rmt_addr,
792 ireq->opt);
b9df3cb8 793 err = net_xmit_eval(err);
1da177e4
LT
794 }
795
1da177e4
LT
796 dst_release(dst);
797 return err;
798}
799
72659ecc 800static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
e6b4d113 801 struct request_values *rvp)
fd80eb94 802{
72659ecc
OP
803 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
804 return tcp_v4_send_synack(sk, NULL, req, rvp);
fd80eb94
DL
805}
806
1da177e4 807/*
60236fdd 808 * IPv4 request_sock destructor.
1da177e4 809 */
60236fdd 810static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 811{
a51482bd 812 kfree(inet_rsk(req)->opt);
1da177e4
LT
813}
814
946cedcc
ED
815/*
816 * Return 1 if a syncookie should be sent
817 */
818int tcp_syn_flood_action(struct sock *sk,
819 const struct sk_buff *skb,
820 const char *proto)
1da177e4 821{
946cedcc
ED
822 const char *msg = "Dropping request";
823 int want_cookie = 0;
824 struct listen_sock *lopt;
825
826
1da177e4 827
2a1d4bd4 828#ifdef CONFIG_SYN_COOKIES
946cedcc 829 if (sysctl_tcp_syncookies) {
2a1d4bd4 830 msg = "Sending cookies";
946cedcc
ED
831 want_cookie = 1;
832 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
833 } else
80e40daa 834#endif
946cedcc
ED
835 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
836
837 lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
838 if (!lopt->synflood_warned) {
839 lopt->synflood_warned = 1;
840 pr_info("%s: Possible SYN flooding on port %d. %s. "
841 " Check SNMP counters.\n",
842 proto, ntohs(tcp_hdr(skb)->dest), msg);
843 }
844 return want_cookie;
2a1d4bd4 845}
946cedcc 846EXPORT_SYMBOL(tcp_syn_flood_action);
1da177e4
LT
847
848/*
60236fdd 849 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 850 */
f6d8bd05
ED
851static struct ip_options_rcu *tcp_v4_save_options(struct sock *sk,
852 struct sk_buff *skb)
1da177e4 853{
f6d8bd05
ED
854 const struct ip_options *opt = &(IPCB(skb)->opt);
855 struct ip_options_rcu *dopt = NULL;
1da177e4
LT
856
857 if (opt && opt->optlen) {
f6d8bd05
ED
858 int opt_size = sizeof(*dopt) + opt->optlen;
859
1da177e4
LT
860 dopt = kmalloc(opt_size, GFP_ATOMIC);
861 if (dopt) {
f6d8bd05 862 if (ip_options_echo(&dopt->opt, skb)) {
1da177e4
LT
863 kfree(dopt);
864 dopt = NULL;
865 }
866 }
867 }
868 return dopt;
869}
870
cfb6eeb4
YH
871#ifdef CONFIG_TCP_MD5SIG
872/*
873 * RFC2385 MD5 checksumming requires a mapping of
874 * IP address->MD5 Key.
875 * We need to maintain these in the sk structure.
876 */
877
878/* Find the Key structure for an address. */
7174259e
ACM
879static struct tcp_md5sig_key *
880 tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
cfb6eeb4
YH
881{
882 struct tcp_sock *tp = tcp_sk(sk);
883 int i;
884
885 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
886 return NULL;
887 for (i = 0; i < tp->md5sig_info->entries4; i++) {
888 if (tp->md5sig_info->keys4[i].addr == addr)
f8ab18d2 889 return &tp->md5sig_info->keys4[i].base;
cfb6eeb4
YH
890 }
891 return NULL;
892}
893
894struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
895 struct sock *addr_sk)
896{
c720c7e8 897 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
cfb6eeb4 898}
cfb6eeb4
YH
899EXPORT_SYMBOL(tcp_v4_md5_lookup);
900
f5b99bcd
AB
901static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
902 struct request_sock *req)
cfb6eeb4
YH
903{
904 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
905}
906
907/* This can be called on a newly created socket, from other files */
908int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
909 u8 *newkey, u8 newkeylen)
910{
911 /* Add Key to the list */
b0a713e9 912 struct tcp_md5sig_key *key;
cfb6eeb4
YH
913 struct tcp_sock *tp = tcp_sk(sk);
914 struct tcp4_md5sig_key *keys;
915
b0a713e9 916 key = tcp_v4_md5_do_lookup(sk, addr);
cfb6eeb4
YH
917 if (key) {
918 /* Pre-existing entry - just update that one. */
b0a713e9
MD
919 kfree(key->key);
920 key->key = newkey;
921 key->keylen = newkeylen;
cfb6eeb4 922 } else {
f6685938
ACM
923 struct tcp_md5sig_info *md5sig;
924
cfb6eeb4 925 if (!tp->md5sig_info) {
f6685938
ACM
926 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info),
927 GFP_ATOMIC);
cfb6eeb4
YH
928 if (!tp->md5sig_info) {
929 kfree(newkey);
930 return -ENOMEM;
931 }
a465419b 932 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
cfb6eeb4 933 }
260fcbeb
YZ
934
935 md5sig = tp->md5sig_info;
936 if (md5sig->entries4 == 0 &&
937 tcp_alloc_md5sig_pool(sk) == NULL) {
cfb6eeb4
YH
938 kfree(newkey);
939 return -ENOMEM;
940 }
f6685938
ACM
941
942 if (md5sig->alloced4 == md5sig->entries4) {
943 keys = kmalloc((sizeof(*keys) *
e905a9ed 944 (md5sig->entries4 + 1)), GFP_ATOMIC);
cfb6eeb4
YH
945 if (!keys) {
946 kfree(newkey);
260fcbeb
YZ
947 if (md5sig->entries4 == 0)
948 tcp_free_md5sig_pool();
cfb6eeb4
YH
949 return -ENOMEM;
950 }
951
f6685938
ACM
952 if (md5sig->entries4)
953 memcpy(keys, md5sig->keys4,
954 sizeof(*keys) * md5sig->entries4);
cfb6eeb4
YH
955
956 /* Free old key list, and reference new one */
a80cc20d 957 kfree(md5sig->keys4);
f6685938
ACM
958 md5sig->keys4 = keys;
959 md5sig->alloced4++;
cfb6eeb4 960 }
f6685938 961 md5sig->entries4++;
f8ab18d2
DM
962 md5sig->keys4[md5sig->entries4 - 1].addr = addr;
963 md5sig->keys4[md5sig->entries4 - 1].base.key = newkey;
964 md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen;
cfb6eeb4
YH
965 }
966 return 0;
967}
cfb6eeb4
YH
968EXPORT_SYMBOL(tcp_v4_md5_do_add);
969
cfb6eeb4
YH
970int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
971{
972 struct tcp_sock *tp = tcp_sk(sk);
973 int i;
974
975 for (i = 0; i < tp->md5sig_info->entries4; i++) {
976 if (tp->md5sig_info->keys4[i].addr == addr) {
977 /* Free the key */
f8ab18d2 978 kfree(tp->md5sig_info->keys4[i].base.key);
cfb6eeb4
YH
979 tp->md5sig_info->entries4--;
980
981 if (tp->md5sig_info->entries4 == 0) {
982 kfree(tp->md5sig_info->keys4);
983 tp->md5sig_info->keys4 = NULL;
8228a18d 984 tp->md5sig_info->alloced4 = 0;
260fcbeb 985 tcp_free_md5sig_pool();
7174259e 986 } else if (tp->md5sig_info->entries4 != i) {
cfb6eeb4 987 /* Need to do some manipulation */
354faf09
YH
988 memmove(&tp->md5sig_info->keys4[i],
989 &tp->md5sig_info->keys4[i+1],
990 (tp->md5sig_info->entries4 - i) *
991 sizeof(struct tcp4_md5sig_key));
cfb6eeb4 992 }
cfb6eeb4
YH
993 return 0;
994 }
995 }
996 return -ENOENT;
997}
cfb6eeb4
YH
998EXPORT_SYMBOL(tcp_v4_md5_do_del);
999
7174259e 1000static void tcp_v4_clear_md5_list(struct sock *sk)
cfb6eeb4
YH
1001{
1002 struct tcp_sock *tp = tcp_sk(sk);
1003
1004 /* Free each key, then the set of key keys,
1005 * the crypto element, and then decrement our
1006 * hold on the last resort crypto.
1007 */
1008 if (tp->md5sig_info->entries4) {
1009 int i;
1010 for (i = 0; i < tp->md5sig_info->entries4; i++)
f8ab18d2 1011 kfree(tp->md5sig_info->keys4[i].base.key);
cfb6eeb4
YH
1012 tp->md5sig_info->entries4 = 0;
1013 tcp_free_md5sig_pool();
1014 }
1015 if (tp->md5sig_info->keys4) {
1016 kfree(tp->md5sig_info->keys4);
1017 tp->md5sig_info->keys4 = NULL;
1018 tp->md5sig_info->alloced4 = 0;
1019 }
1020}
1021
7174259e
ACM
1022static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1023 int optlen)
cfb6eeb4
YH
1024{
1025 struct tcp_md5sig cmd;
1026 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1027 u8 *newkey;
1028
1029 if (optlen < sizeof(cmd))
1030 return -EINVAL;
1031
7174259e 1032 if (copy_from_user(&cmd, optval, sizeof(cmd)))
cfb6eeb4
YH
1033 return -EFAULT;
1034
1035 if (sin->sin_family != AF_INET)
1036 return -EINVAL;
1037
1038 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
1039 if (!tcp_sk(sk)->md5sig_info)
1040 return -ENOENT;
1041 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
1042 }
1043
1044 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1045 return -EINVAL;
1046
1047 if (!tcp_sk(sk)->md5sig_info) {
1048 struct tcp_sock *tp = tcp_sk(sk);
aa133076 1049 struct tcp_md5sig_info *p;
cfb6eeb4 1050
aa133076 1051 p = kzalloc(sizeof(*p), sk->sk_allocation);
cfb6eeb4
YH
1052 if (!p)
1053 return -EINVAL;
1054
1055 tp->md5sig_info = p;
a465419b 1056 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1057 }
1058
aa133076 1059 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
cfb6eeb4
YH
1060 if (!newkey)
1061 return -ENOMEM;
cfb6eeb4
YH
1062 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
1063 newkey, cmd.tcpm_keylen);
1064}
1065
49a72dfb
AL
1066static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
1067 __be32 daddr, __be32 saddr, int nbytes)
cfb6eeb4 1068{
cfb6eeb4 1069 struct tcp4_pseudohdr *bp;
49a72dfb 1070 struct scatterlist sg;
cfb6eeb4
YH
1071
1072 bp = &hp->md5_blk.ip4;
cfb6eeb4
YH
1073
1074 /*
49a72dfb 1075 * 1. the TCP pseudo-header (in the order: source IP address,
cfb6eeb4
YH
1076 * destination IP address, zero-padded protocol number, and
1077 * segment length)
1078 */
1079 bp->saddr = saddr;
1080 bp->daddr = daddr;
1081 bp->pad = 0;
076fb722 1082 bp->protocol = IPPROTO_TCP;
49a72dfb 1083 bp->len = cpu_to_be16(nbytes);
c7da57a1 1084
49a72dfb
AL
1085 sg_init_one(&sg, bp, sizeof(*bp));
1086 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
1087}
1088
1089static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa 1090 __be32 daddr, __be32 saddr, const struct tcphdr *th)
49a72dfb
AL
1091{
1092 struct tcp_md5sig_pool *hp;
1093 struct hash_desc *desc;
1094
1095 hp = tcp_get_md5sig_pool();
1096 if (!hp)
1097 goto clear_hash_noput;
1098 desc = &hp->md5_desc;
1099
1100 if (crypto_hash_init(desc))
1101 goto clear_hash;
1102 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
1103 goto clear_hash;
1104 if (tcp_md5_hash_header(hp, th))
1105 goto clear_hash;
1106 if (tcp_md5_hash_key(hp, key))
1107 goto clear_hash;
1108 if (crypto_hash_final(desc, md5_hash))
cfb6eeb4
YH
1109 goto clear_hash;
1110
cfb6eeb4 1111 tcp_put_md5sig_pool();
cfb6eeb4 1112 return 0;
49a72dfb 1113
cfb6eeb4
YH
1114clear_hash:
1115 tcp_put_md5sig_pool();
1116clear_hash_noput:
1117 memset(md5_hash, 0, 16);
49a72dfb 1118 return 1;
cfb6eeb4
YH
1119}
1120
49a72dfb 1121int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
318cf7aa
ED
1122 const struct sock *sk, const struct request_sock *req,
1123 const struct sk_buff *skb)
cfb6eeb4 1124{
49a72dfb
AL
1125 struct tcp_md5sig_pool *hp;
1126 struct hash_desc *desc;
318cf7aa 1127 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4
YH
1128 __be32 saddr, daddr;
1129
1130 if (sk) {
c720c7e8
ED
1131 saddr = inet_sk(sk)->inet_saddr;
1132 daddr = inet_sk(sk)->inet_daddr;
49a72dfb
AL
1133 } else if (req) {
1134 saddr = inet_rsk(req)->loc_addr;
1135 daddr = inet_rsk(req)->rmt_addr;
cfb6eeb4 1136 } else {
49a72dfb
AL
1137 const struct iphdr *iph = ip_hdr(skb);
1138 saddr = iph->saddr;
1139 daddr = iph->daddr;
cfb6eeb4 1140 }
49a72dfb
AL
1141
1142 hp = tcp_get_md5sig_pool();
1143 if (!hp)
1144 goto clear_hash_noput;
1145 desc = &hp->md5_desc;
1146
1147 if (crypto_hash_init(desc))
1148 goto clear_hash;
1149
1150 if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
1151 goto clear_hash;
1152 if (tcp_md5_hash_header(hp, th))
1153 goto clear_hash;
1154 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1155 goto clear_hash;
1156 if (tcp_md5_hash_key(hp, key))
1157 goto clear_hash;
1158 if (crypto_hash_final(desc, md5_hash))
1159 goto clear_hash;
1160
1161 tcp_put_md5sig_pool();
1162 return 0;
1163
1164clear_hash:
1165 tcp_put_md5sig_pool();
1166clear_hash_noput:
1167 memset(md5_hash, 0, 16);
1168 return 1;
cfb6eeb4 1169}
49a72dfb 1170EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
cfb6eeb4 1171
318cf7aa 1172static int tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
cfb6eeb4
YH
1173{
1174 /*
1175 * This gets called for each TCP segment that arrives
1176 * so we want to be efficient.
1177 * We have 3 drop cases:
1178 * o No MD5 hash and one expected.
1179 * o MD5 hash and we're not expecting one.
1180 * o MD5 hash and its wrong.
1181 */
cf533ea5 1182 const __u8 *hash_location = NULL;
cfb6eeb4 1183 struct tcp_md5sig_key *hash_expected;
eddc9ec5 1184 const struct iphdr *iph = ip_hdr(skb);
cf533ea5 1185 const struct tcphdr *th = tcp_hdr(skb);
cfb6eeb4 1186 int genhash;
cfb6eeb4
YH
1187 unsigned char newhash[16];
1188
1189 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
7d5d5525 1190 hash_location = tcp_parse_md5sig_option(th);
cfb6eeb4 1191
cfb6eeb4
YH
1192 /* We've parsed the options - do we have a hash? */
1193 if (!hash_expected && !hash_location)
1194 return 0;
1195
1196 if (hash_expected && !hash_location) {
785957d3 1197 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
cfb6eeb4
YH
1198 return 1;
1199 }
1200
1201 if (!hash_expected && hash_location) {
785957d3 1202 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
cfb6eeb4
YH
1203 return 1;
1204 }
1205
1206 /* Okay, so this is hash_expected and hash_location -
1207 * so we need to calculate the checksum.
1208 */
49a72dfb
AL
1209 genhash = tcp_v4_md5_hash_skb(newhash,
1210 hash_expected,
1211 NULL, NULL, skb);
cfb6eeb4
YH
1212
1213 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1214 if (net_ratelimit()) {
673d57e7
HH
1215 printk(KERN_INFO "MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1216 &iph->saddr, ntohs(th->source),
1217 &iph->daddr, ntohs(th->dest),
cfb6eeb4 1218 genhash ? " tcp_v4_calc_md5_hash failed" : "");
cfb6eeb4
YH
1219 }
1220 return 1;
1221 }
1222 return 0;
1223}
1224
1225#endif
1226
72a3effa 1227struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1228 .family = PF_INET,
2e6599cb 1229 .obj_size = sizeof(struct tcp_request_sock),
72659ecc 1230 .rtx_syn_ack = tcp_v4_rtx_synack,
60236fdd
ACM
1231 .send_ack = tcp_v4_reqsk_send_ack,
1232 .destructor = tcp_v4_reqsk_destructor,
1da177e4 1233 .send_reset = tcp_v4_send_reset,
72659ecc 1234 .syn_ack_timeout = tcp_syn_ack_timeout,
1da177e4
LT
1235};
1236
cfb6eeb4 1237#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1238static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
cfb6eeb4 1239 .md5_lookup = tcp_v4_reqsk_md5_lookup,
e3afe7b7 1240 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1241};
b6332e6c 1242#endif
cfb6eeb4 1243
1da177e4
LT
1244int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1245{
4957faad 1246 struct tcp_extend_values tmp_ext;
1da177e4 1247 struct tcp_options_received tmp_opt;
cf533ea5 1248 const u8 *hash_location;
60236fdd 1249 struct request_sock *req;
e6b4d113 1250 struct inet_request_sock *ireq;
4957faad 1251 struct tcp_sock *tp = tcp_sk(sk);
e6b4d113 1252 struct dst_entry *dst = NULL;
eddc9ec5
ACM
1253 __be32 saddr = ip_hdr(skb)->saddr;
1254 __be32 daddr = ip_hdr(skb)->daddr;
1da177e4 1255 __u32 isn = TCP_SKB_CB(skb)->when;
1da177e4 1256 int want_cookie = 0;
1da177e4
LT
1257
1258 /* Never answer to SYNs send to broadcast or multicast */
511c3f92 1259 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1da177e4
LT
1260 goto drop;
1261
1262 /* TW buckets are converted to open requests without
1263 * limitations, they conserve resources and peer is
1264 * evidently real one.
1265 */
463c84b9 1266 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
946cedcc
ED
1267 want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
1268 if (!want_cookie)
1269 goto drop;
1da177e4
LT
1270 }
1271
1272 /* Accept backlog is full. If we have already queued enough
1273 * of warm entries in syn queue, drop request. It is better than
1274 * clogging syn queue with openreqs with exponentially increasing
1275 * timeout.
1276 */
463c84b9 1277 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1278 goto drop;
1279
ce4a7d0d 1280 req = inet_reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1281 if (!req)
1282 goto drop;
1283
cfb6eeb4
YH
1284#ifdef CONFIG_TCP_MD5SIG
1285 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1286#endif
1287
1da177e4 1288 tcp_clear_options(&tmp_opt);
bee7ca9e 1289 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
4957faad 1290 tmp_opt.user_mss = tp->rx_opt.user_mss;
bb5b7c11 1291 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
4957faad
WAS
1292
1293 if (tmp_opt.cookie_plus > 0 &&
1294 tmp_opt.saw_tstamp &&
1295 !tp->rx_opt.cookie_out_never &&
1296 (sysctl_tcp_cookie_size > 0 ||
1297 (tp->cookie_values != NULL &&
1298 tp->cookie_values->cookie_desired > 0))) {
1299 u8 *c;
1300 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1301 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1302
1303 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1304 goto drop_and_release;
1305
1306 /* Secret recipe starts with IP addresses */
0eae88f3
ED
1307 *mess++ ^= (__force u32)daddr;
1308 *mess++ ^= (__force u32)saddr;
1da177e4 1309
4957faad
WAS
1310 /* plus variable length Initiator Cookie */
1311 c = (u8 *)mess;
1312 while (l-- > 0)
1313 *c++ ^= *hash_location++;
1314
4957faad 1315 want_cookie = 0; /* not our kind of cookie */
4957faad
WAS
1316 tmp_ext.cookie_out_never = 0; /* false */
1317 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1318 } else if (!tp->rx_opt.cookie_in_always) {
1319 /* redundant indications, but ensure initialization. */
1320 tmp_ext.cookie_out_never = 1; /* true */
1321 tmp_ext.cookie_plus = 0;
1322 } else {
1323 goto drop_and_release;
1324 }
1325 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1da177e4 1326
4dfc2817 1327 if (want_cookie && !tmp_opt.saw_tstamp)
1da177e4 1328 tcp_clear_options(&tmp_opt);
1da177e4 1329
1da177e4 1330 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1da177e4
LT
1331 tcp_openreq_init(req, &tmp_opt, skb);
1332
bb5b7c11
DM
1333 ireq = inet_rsk(req);
1334 ireq->loc_addr = daddr;
1335 ireq->rmt_addr = saddr;
1336 ireq->no_srccheck = inet_sk(sk)->transparent;
1337 ireq->opt = tcp_v4_save_options(sk, skb);
1338
284904aa 1339 if (security_inet_conn_request(sk, skb, req))
bb5b7c11 1340 goto drop_and_free;
284904aa 1341
172d69e6 1342 if (!want_cookie || tmp_opt.tstamp_ok)
aa8223c7 1343 TCP_ECN_create_request(req, tcp_hdr(skb));
1da177e4
LT
1344
1345 if (want_cookie) {
1da177e4 1346 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
172d69e6 1347 req->cookie_ts = tmp_opt.tstamp_ok;
1da177e4
LT
1348 } else if (!isn) {
1349 struct inet_peer *peer = NULL;
6bd023f3 1350 struct flowi4 fl4;
1da177e4
LT
1351
1352 /* VJ's idea. We save last timestamp seen
1353 * from the destination in peer table, when entering
1354 * state TIME-WAIT, and check against it before
1355 * accepting new connection request.
1356 *
1357 * If "isn" is not zero, this request hit alive
1358 * timewait bucket, so that all the necessary checks
1359 * are made in the function processing timewait state.
1360 */
1361 if (tmp_opt.saw_tstamp &&
295ff7ed 1362 tcp_death_row.sysctl_tw_recycle &&
6bd023f3 1363 (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
ed2361e6
DM
1364 fl4.daddr == saddr &&
1365 (peer = rt_get_peer((struct rtable *)dst, fl4.daddr)) != NULL) {
317fe0e6 1366 inet_peer_refcheck(peer);
2c1409a0 1367 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1da177e4
LT
1368 (s32)(peer->tcp_ts - req->ts_recent) >
1369 TCP_PAWS_WINDOW) {
de0744af 1370 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
7cd04fa7 1371 goto drop_and_release;
1da177e4
LT
1372 }
1373 }
1374 /* Kill the following clause, if you dislike this way. */
1375 else if (!sysctl_tcp_syncookies &&
463c84b9 1376 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4
LT
1377 (sysctl_max_syn_backlog >> 2)) &&
1378 (!peer || !peer->tcp_ts_stamp) &&
1379 (!dst || !dst_metric(dst, RTAX_RTT))) {
1380 /* Without syncookies last quarter of
1381 * backlog is filled with destinations,
1382 * proven to be alive.
1383 * It means that we continue to communicate
1384 * to destinations, already remembered
1385 * to the moment of synflood.
1386 */
673d57e7
HH
1387 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI4/%u\n",
1388 &saddr, ntohs(tcp_hdr(skb)->source));
7cd04fa7 1389 goto drop_and_release;
1da177e4
LT
1390 }
1391
a94f723d 1392 isn = tcp_v4_init_sequence(skb);
1da177e4 1393 }
2e6599cb 1394 tcp_rsk(req)->snt_isn = isn;
9ad7c049 1395 tcp_rsk(req)->snt_synack = tcp_time_stamp;
1da177e4 1396
72659ecc
OP
1397 if (tcp_v4_send_synack(sk, dst, req,
1398 (struct request_values *)&tmp_ext) ||
4957faad 1399 want_cookie)
1da177e4
LT
1400 goto drop_and_free;
1401
7cd04fa7 1402 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1403 return 0;
1404
7cd04fa7
DL
1405drop_and_release:
1406 dst_release(dst);
1da177e4 1407drop_and_free:
60236fdd 1408 reqsk_free(req);
1da177e4 1409drop:
1da177e4
LT
1410 return 0;
1411}
4bc2f18b 1412EXPORT_SYMBOL(tcp_v4_conn_request);
1da177e4
LT
1413
1414
1415/*
1416 * The three way handshake has completed - we got a valid synack -
1417 * now create the new socket.
1418 */
1419struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1420 struct request_sock *req,
1da177e4
LT
1421 struct dst_entry *dst)
1422{
2e6599cb 1423 struct inet_request_sock *ireq;
1da177e4
LT
1424 struct inet_sock *newinet;
1425 struct tcp_sock *newtp;
1426 struct sock *newsk;
cfb6eeb4
YH
1427#ifdef CONFIG_TCP_MD5SIG
1428 struct tcp_md5sig_key *key;
1429#endif
f6d8bd05 1430 struct ip_options_rcu *inet_opt;
1da177e4
LT
1431
1432 if (sk_acceptq_is_full(sk))
1433 goto exit_overflow;
1434
1da177e4
LT
1435 newsk = tcp_create_openreq_child(sk, req, skb);
1436 if (!newsk)
093d2823 1437 goto exit_nonewsk;
1da177e4 1438
bcd76111 1439 newsk->sk_gso_type = SKB_GSO_TCPV4;
1da177e4
LT
1440
1441 newtp = tcp_sk(newsk);
1442 newinet = inet_sk(newsk);
2e6599cb 1443 ireq = inet_rsk(req);
c720c7e8
ED
1444 newinet->inet_daddr = ireq->rmt_addr;
1445 newinet->inet_rcv_saddr = ireq->loc_addr;
1446 newinet->inet_saddr = ireq->loc_addr;
f6d8bd05
ED
1447 inet_opt = ireq->opt;
1448 rcu_assign_pointer(newinet->inet_opt, inet_opt);
2e6599cb 1449 ireq->opt = NULL;
463c84b9 1450 newinet->mc_index = inet_iif(skb);
eddc9ec5 1451 newinet->mc_ttl = ip_hdr(skb)->ttl;
d83d8461 1452 inet_csk(newsk)->icsk_ext_hdr_len = 0;
f6d8bd05
ED
1453 if (inet_opt)
1454 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
c720c7e8 1455 newinet->inet_id = newtp->write_seq ^ jiffies;
1da177e4 1456
0e734419
DM
1457 if (!dst && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
1458 goto put_and_exit;
1459
1460 sk_setup_caps(newsk, dst);
1461
5d424d5a 1462 tcp_mtup_init(newsk);
1da177e4 1463 tcp_sync_mss(newsk, dst_mtu(dst));
0dbaee3b 1464 newtp->advmss = dst_metric_advmss(dst);
f5fff5dc
TQ
1465 if (tcp_sk(sk)->rx_opt.user_mss &&
1466 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1467 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1468
1da177e4 1469 tcp_initialize_rcv_mss(newsk);
9ad7c049
JC
1470 if (tcp_rsk(req)->snt_synack)
1471 tcp_valid_rtt_meas(newsk,
1472 tcp_time_stamp - tcp_rsk(req)->snt_synack);
1473 newtp->total_retrans = req->retrans;
1da177e4 1474
cfb6eeb4
YH
1475#ifdef CONFIG_TCP_MD5SIG
1476 /* Copy over the MD5 key from the original socket */
c720c7e8
ED
1477 key = tcp_v4_md5_do_lookup(sk, newinet->inet_daddr);
1478 if (key != NULL) {
cfb6eeb4
YH
1479 /*
1480 * We're using one, so create a matching key
1481 * on the newsk structure. If we fail to get
1482 * memory, then we end up not copying the key
1483 * across. Shucks.
1484 */
f6685938
ACM
1485 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1486 if (newkey != NULL)
c720c7e8 1487 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
cfb6eeb4 1488 newkey, key->keylen);
a465419b 1489 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
cfb6eeb4
YH
1490 }
1491#endif
1492
0e734419
DM
1493 if (__inet_inherit_port(sk, newsk) < 0)
1494 goto put_and_exit;
9327f705 1495 __inet_hash_nolisten(newsk, NULL);
1da177e4
LT
1496
1497 return newsk;
1498
1499exit_overflow:
de0744af 1500 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
093d2823
BS
1501exit_nonewsk:
1502 dst_release(dst);
1da177e4 1503exit:
de0744af 1504 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1da177e4 1505 return NULL;
0e734419 1506put_and_exit:
709e8697 1507 tcp_clear_xmit_timers(newsk);
d8a6e65f 1508 tcp_cleanup_congestion_control(newsk);
918eb399 1509 bh_unlock_sock(newsk);
0e734419
DM
1510 sock_put(newsk);
1511 goto exit;
1da177e4 1512}
4bc2f18b 1513EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1da177e4
LT
1514
1515static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1516{
aa8223c7 1517 struct tcphdr *th = tcp_hdr(skb);
eddc9ec5 1518 const struct iphdr *iph = ip_hdr(skb);
1da177e4 1519 struct sock *nsk;
60236fdd 1520 struct request_sock **prev;
1da177e4 1521 /* Find possible connection requests. */
463c84b9
ACM
1522 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1523 iph->saddr, iph->daddr);
1da177e4
LT
1524 if (req)
1525 return tcp_check_req(sk, skb, req, prev);
1526
3b1e0a65 1527 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
c67499c0 1528 th->source, iph->daddr, th->dest, inet_iif(skb));
1da177e4
LT
1529
1530 if (nsk) {
1531 if (nsk->sk_state != TCP_TIME_WAIT) {
1532 bh_lock_sock(nsk);
1533 return nsk;
1534 }
9469c7b4 1535 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1536 return NULL;
1537 }
1538
1539#ifdef CONFIG_SYN_COOKIES
af9b4738 1540 if (!th->syn)
1da177e4
LT
1541 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1542#endif
1543 return sk;
1544}
1545
b51655b9 1546static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1547{
eddc9ec5
ACM
1548 const struct iphdr *iph = ip_hdr(skb);
1549
84fa7933 1550 if (skb->ip_summed == CHECKSUM_COMPLETE) {
eddc9ec5
ACM
1551 if (!tcp_v4_check(skb->len, iph->saddr,
1552 iph->daddr, skb->csum)) {
fb286bb2 1553 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1554 return 0;
fb286bb2 1555 }
1da177e4 1556 }
fb286bb2 1557
eddc9ec5 1558 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
fb286bb2
HX
1559 skb->len, IPPROTO_TCP, 0);
1560
1da177e4 1561 if (skb->len <= 76) {
fb286bb2 1562 return __skb_checksum_complete(skb);
1da177e4
LT
1563 }
1564 return 0;
1565}
1566
1567
1568/* The socket must have it's spinlock held when we get
1569 * here.
1570 *
1571 * We have a potential double-lock case here, so even when
1572 * doing backlog processing we use the BH locking scheme.
1573 * This is because we cannot sleep with the original spinlock
1574 * held.
1575 */
1576int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1577{
cfb6eeb4
YH
1578 struct sock *rsk;
1579#ifdef CONFIG_TCP_MD5SIG
1580 /*
1581 * We really want to reject the packet as early as possible
1582 * if:
1583 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1584 * o There is an MD5 option and we're not expecting one
1585 */
7174259e 1586 if (tcp_v4_inbound_md5_hash(sk, skb))
cfb6eeb4
YH
1587 goto discard;
1588#endif
1589
1da177e4 1590 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
bdeab991 1591 sock_rps_save_rxhash(sk, skb);
aa8223c7 1592 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1593 rsk = sk;
1da177e4 1594 goto reset;
cfb6eeb4 1595 }
1da177e4
LT
1596 return 0;
1597 }
1598
ab6a5bb6 1599 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1da177e4
LT
1600 goto csum_err;
1601
1602 if (sk->sk_state == TCP_LISTEN) {
1603 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1604 if (!nsk)
1605 goto discard;
1606
1607 if (nsk != sk) {
bdeab991 1608 sock_rps_save_rxhash(nsk, skb);
cfb6eeb4
YH
1609 if (tcp_child_process(sk, nsk, skb)) {
1610 rsk = nsk;
1da177e4 1611 goto reset;
cfb6eeb4 1612 }
1da177e4
LT
1613 return 0;
1614 }
ca55158c 1615 } else
bdeab991 1616 sock_rps_save_rxhash(sk, skb);
ca55158c 1617
aa8223c7 1618 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
cfb6eeb4 1619 rsk = sk;
1da177e4 1620 goto reset;
cfb6eeb4 1621 }
1da177e4
LT
1622 return 0;
1623
1624reset:
cfb6eeb4 1625 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1626discard:
1627 kfree_skb(skb);
1628 /* Be careful here. If this function gets more complicated and
1629 * gcc suffers from register pressure on the x86, sk (in %ebx)
1630 * might be destroyed here. This current version compiles correctly,
1631 * but you have been warned.
1632 */
1633 return 0;
1634
1635csum_err:
63231bdd 1636 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1da177e4
LT
1637 goto discard;
1638}
4bc2f18b 1639EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4
LT
1640
1641/*
1642 * From tcp_input.c
1643 */
1644
1645int tcp_v4_rcv(struct sk_buff *skb)
1646{
eddc9ec5 1647 const struct iphdr *iph;
cf533ea5 1648 const struct tcphdr *th;
1da177e4
LT
1649 struct sock *sk;
1650 int ret;
a86b1e30 1651 struct net *net = dev_net(skb->dev);
1da177e4
LT
1652
1653 if (skb->pkt_type != PACKET_HOST)
1654 goto discard_it;
1655
1656 /* Count it even if it's bad */
63231bdd 1657 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1da177e4
LT
1658
1659 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1660 goto discard_it;
1661
aa8223c7 1662 th = tcp_hdr(skb);
1da177e4
LT
1663
1664 if (th->doff < sizeof(struct tcphdr) / 4)
1665 goto bad_packet;
1666 if (!pskb_may_pull(skb, th->doff * 4))
1667 goto discard_it;
1668
1669 /* An explanation is required here, I think.
1670 * Packet length and doff are validated by header prediction,
caa20d9a 1671 * provided case of th->doff==0 is eliminated.
1da177e4 1672 * So, we defer the checks. */
60476372 1673 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1da177e4
LT
1674 goto bad_packet;
1675
aa8223c7 1676 th = tcp_hdr(skb);
eddc9ec5 1677 iph = ip_hdr(skb);
1da177e4
LT
1678 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1679 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1680 skb->len - th->doff * 4);
1681 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1682 TCP_SKB_CB(skb)->when = 0;
b82d1bb4 1683 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1da177e4
LT
1684 TCP_SKB_CB(skb)->sacked = 0;
1685
9a1f27c4 1686 sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1da177e4
LT
1687 if (!sk)
1688 goto no_tcp_socket;
1689
bb134d5d
ED
1690process:
1691 if (sk->sk_state == TCP_TIME_WAIT)
1692 goto do_time_wait;
1693
6cce09f8
ED
1694 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1695 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
d218d111 1696 goto discard_and_relse;
6cce09f8 1697 }
d218d111 1698
1da177e4
LT
1699 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1700 goto discard_and_relse;
b59c2701 1701 nf_reset(skb);
1da177e4 1702
fda9ef5d 1703 if (sk_filter(sk, skb))
1da177e4
LT
1704 goto discard_and_relse;
1705
1706 skb->dev = NULL;
1707
c6366184 1708 bh_lock_sock_nested(sk);
1da177e4
LT
1709 ret = 0;
1710 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1711#ifdef CONFIG_NET_DMA
1712 struct tcp_sock *tp = tcp_sk(sk);
1713 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
f67b4599 1714 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1a2449a8 1715 if (tp->ucopy.dma_chan)
1da177e4 1716 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1717 else
1718#endif
1719 {
1720 if (!tcp_prequeue(sk, skb))
ae8d7f88 1721 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8 1722 }
6cce09f8 1723 } else if (unlikely(sk_add_backlog(sk, skb))) {
6b03a53a 1724 bh_unlock_sock(sk);
6cce09f8 1725 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
6b03a53a
ZY
1726 goto discard_and_relse;
1727 }
1da177e4
LT
1728 bh_unlock_sock(sk);
1729
1730 sock_put(sk);
1731
1732 return ret;
1733
1734no_tcp_socket:
1735 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1736 goto discard_it;
1737
1738 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1739bad_packet:
63231bdd 1740 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1da177e4 1741 } else {
cfb6eeb4 1742 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1743 }
1744
1745discard_it:
1746 /* Discard frame. */
1747 kfree_skb(skb);
e905a9ed 1748 return 0;
1da177e4
LT
1749
1750discard_and_relse:
1751 sock_put(sk);
1752 goto discard_it;
1753
1754do_time_wait:
1755 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1756 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1757 goto discard_it;
1758 }
1759
1760 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
63231bdd 1761 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
9469c7b4 1762 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1763 goto discard_it;
1764 }
9469c7b4 1765 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1766 case TCP_TW_SYN: {
c346dca1 1767 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
c67499c0 1768 &tcp_hashinfo,
eddc9ec5 1769 iph->daddr, th->dest,
463c84b9 1770 inet_iif(skb));
1da177e4 1771 if (sk2) {
9469c7b4
YH
1772 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1773 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1774 sk = sk2;
1775 goto process;
1776 }
1777 /* Fall through to ACK */
1778 }
1779 case TCP_TW_ACK:
1780 tcp_v4_timewait_ack(sk, skb);
1781 break;
1782 case TCP_TW_RST:
1783 goto no_tcp_socket;
1784 case TCP_TW_SUCCESS:;
1785 }
1786 goto discard_it;
1787}
1788
3f419d2d 1789struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
1da177e4 1790{
3f419d2d 1791 struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
1da177e4 1792 struct inet_sock *inet = inet_sk(sk);
3f419d2d 1793 struct inet_peer *peer;
1da177e4 1794
c5216cc7
DM
1795 if (!rt ||
1796 inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
b534ecf1 1797 peer = inet_getpeer_v4(inet->inet_daddr, 1);
3f419d2d 1798 *release_it = true;
1da177e4
LT
1799 } else {
1800 if (!rt->peer)
a48eff12 1801 rt_bind_peer(rt, inet->inet_daddr, 1);
1da177e4 1802 peer = rt->peer;
3f419d2d 1803 *release_it = false;
1da177e4
LT
1804 }
1805
3f419d2d 1806 return peer;
1da177e4 1807}
3f419d2d 1808EXPORT_SYMBOL(tcp_v4_get_peer);
1da177e4 1809
ccb7c410 1810void *tcp_v4_tw_get_peer(struct sock *sk)
1da177e4 1811{
cf533ea5 1812 const struct inet_timewait_sock *tw = inet_twsk(sk);
1da177e4 1813
ccb7c410 1814 return inet_getpeer_v4(tw->tw_daddr, 1);
1da177e4 1815}
ccb7c410
DM
1816EXPORT_SYMBOL(tcp_v4_tw_get_peer);
1817
1818static struct timewait_sock_ops tcp_timewait_sock_ops = {
1819 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1820 .twsk_unique = tcp_twsk_unique,
1821 .twsk_destructor= tcp_twsk_destructor,
1822 .twsk_getpeer = tcp_v4_tw_get_peer,
1823};
1da177e4 1824
3b401a81 1825const struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1826 .queue_xmit = ip_queue_xmit,
1827 .send_check = tcp_v4_send_check,
1828 .rebuild_header = inet_sk_rebuild_header,
1829 .conn_request = tcp_v4_conn_request,
1830 .syn_recv_sock = tcp_v4_syn_recv_sock,
3f419d2d 1831 .get_peer = tcp_v4_get_peer,
543d9cfe
ACM
1832 .net_header_len = sizeof(struct iphdr),
1833 .setsockopt = ip_setsockopt,
1834 .getsockopt = ip_getsockopt,
1835 .addr2sockaddr = inet_csk_addr2sockaddr,
1836 .sockaddr_len = sizeof(struct sockaddr_in),
ab1e0a13 1837 .bind_conflict = inet_csk_bind_conflict,
3fdadf7d 1838#ifdef CONFIG_COMPAT
543d9cfe
ACM
1839 .compat_setsockopt = compat_ip_setsockopt,
1840 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1841#endif
1da177e4 1842};
4bc2f18b 1843EXPORT_SYMBOL(ipv4_specific);
1da177e4 1844
cfb6eeb4 1845#ifdef CONFIG_TCP_MD5SIG
b2e4b3de 1846static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
cfb6eeb4 1847 .md5_lookup = tcp_v4_md5_lookup,
49a72dfb 1848 .calc_md5_hash = tcp_v4_md5_hash_skb,
cfb6eeb4 1849 .md5_parse = tcp_v4_parse_md5_keys,
cfb6eeb4 1850};
b6332e6c 1851#endif
cfb6eeb4 1852
1da177e4
LT
1853/* NOTE: A lot of things set to zero explicitly by call to
1854 * sk_alloc() so need not be done here.
1855 */
1856static int tcp_v4_init_sock(struct sock *sk)
1857{
6687e988 1858 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
1859 struct tcp_sock *tp = tcp_sk(sk);
1860
1861 skb_queue_head_init(&tp->out_of_order_queue);
1862 tcp_init_xmit_timers(sk);
1863 tcp_prequeue_init(tp);
1864
6687e988 1865 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
1866 tp->mdev = TCP_TIMEOUT_INIT;
1867
1868 /* So many TCP implementations out there (incorrectly) count the
1869 * initial SYN frame in their delayed-ACK and congestion control
1870 * algorithms that we must have the following bandaid to talk
1871 * efficiently to them. -DaveM
1872 */
9ad7c049 1873 tp->snd_cwnd = TCP_INIT_CWND;
1da177e4
LT
1874
1875 /* See draft-stevens-tcpca-spec-01 for discussion of the
1876 * initialization of these values.
1877 */
0b6a05c1 1878 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1da177e4 1879 tp->snd_cwnd_clamp = ~0;
bee7ca9e 1880 tp->mss_cache = TCP_MSS_DEFAULT;
1da177e4
LT
1881
1882 tp->reordering = sysctl_tcp_reordering;
6687e988 1883 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
1884
1885 sk->sk_state = TCP_CLOSE;
1886
1887 sk->sk_write_space = sk_stream_write_space;
1888 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1889
8292a17a 1890 icsk->icsk_af_ops = &ipv4_specific;
d83d8461 1891 icsk->icsk_sync_mss = tcp_sync_mss;
cfb6eeb4
YH
1892#ifdef CONFIG_TCP_MD5SIG
1893 tp->af_specific = &tcp_sock_ipv4_specific;
1894#endif
1da177e4 1895
435cf559
WAS
1896 /* TCP Cookie Transactions */
1897 if (sysctl_tcp_cookie_size > 0) {
1898 /* Default, cookies without s_data_payload. */
1899 tp->cookie_values =
1900 kzalloc(sizeof(*tp->cookie_values),
1901 sk->sk_allocation);
1902 if (tp->cookie_values != NULL)
1903 kref_init(&tp->cookie_values->kref);
1904 }
1905 /* Presumed zeroed, in order of appearance:
1906 * cookie_in_always, cookie_out_never,
1907 * s_data_constant, s_data_in, s_data_out
1908 */
1da177e4
LT
1909 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1910 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1911
eb4dea58 1912 local_bh_disable();
d1a4c0b3 1913 sock_update_memcg(sk);
180d8cd9 1914 sk_sockets_allocated_inc(sk);
eb4dea58 1915 local_bh_enable();
1da177e4
LT
1916
1917 return 0;
1918}
1919
7d06b2e0 1920void tcp_v4_destroy_sock(struct sock *sk)
1da177e4
LT
1921{
1922 struct tcp_sock *tp = tcp_sk(sk);
1923
1924 tcp_clear_xmit_timers(sk);
1925
6687e988 1926 tcp_cleanup_congestion_control(sk);
317a76f9 1927
1da177e4 1928 /* Cleanup up the write buffer. */
fe067e8a 1929 tcp_write_queue_purge(sk);
1da177e4
LT
1930
1931 /* Cleans up our, hopefully empty, out_of_order_queue. */
e905a9ed 1932 __skb_queue_purge(&tp->out_of_order_queue);
1da177e4 1933
cfb6eeb4
YH
1934#ifdef CONFIG_TCP_MD5SIG
1935 /* Clean up the MD5 key list, if any */
1936 if (tp->md5sig_info) {
1937 tcp_v4_clear_md5_list(sk);
1938 kfree(tp->md5sig_info);
1939 tp->md5sig_info = NULL;
1940 }
1941#endif
1942
1a2449a8
CL
1943#ifdef CONFIG_NET_DMA
1944 /* Cleans up our sk_async_wait_queue */
e905a9ed 1945 __skb_queue_purge(&sk->sk_async_wait_queue);
1a2449a8
CL
1946#endif
1947
1da177e4
LT
1948 /* Clean prequeue, it must be empty really */
1949 __skb_queue_purge(&tp->ucopy.prequeue);
1950
1951 /* Clean up a referenced TCP bind bucket. */
463c84b9 1952 if (inet_csk(sk)->icsk_bind_hash)
ab1e0a13 1953 inet_put_port(sk);
1da177e4
LT
1954
1955 /*
1956 * If sendmsg cached page exists, toss it.
1957 */
1958 if (sk->sk_sndmsg_page) {
1959 __free_page(sk->sk_sndmsg_page);
1960 sk->sk_sndmsg_page = NULL;
1961 }
1962
435cf559
WAS
1963 /* TCP Cookie Transactions */
1964 if (tp->cookie_values != NULL) {
1965 kref_put(&tp->cookie_values->kref,
1966 tcp_cookie_values_release);
1967 tp->cookie_values = NULL;
1968 }
1969
180d8cd9 1970 sk_sockets_allocated_dec(sk);
d1a4c0b3 1971 sock_release_memcg(sk);
1da177e4 1972}
1da177e4
LT
1973EXPORT_SYMBOL(tcp_v4_destroy_sock);
1974
1975#ifdef CONFIG_PROC_FS
1976/* Proc filesystem TCP sock list dumping. */
1977
3ab5aee7 1978static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
1da177e4 1979{
3ab5aee7 1980 return hlist_nulls_empty(head) ? NULL :
8feaf0c0 1981 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
1982}
1983
8feaf0c0 1984static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4 1985{
3ab5aee7
ED
1986 return !is_a_nulls(tw->tw_node.next) ?
1987 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1da177e4
LT
1988}
1989
a8b690f9
TH
1990/*
1991 * Get next listener socket follow cur. If cur is NULL, get first socket
1992 * starting from bucket given in st->bucket; when st->bucket is zero the
1993 * very first socket in the hash table is returned.
1994 */
1da177e4
LT
1995static void *listening_get_next(struct seq_file *seq, void *cur)
1996{
463c84b9 1997 struct inet_connection_sock *icsk;
c25eb3bf 1998 struct hlist_nulls_node *node;
1da177e4 1999 struct sock *sk = cur;
5caea4ea 2000 struct inet_listen_hashbucket *ilb;
5799de0b 2001 struct tcp_iter_state *st = seq->private;
a4146b1b 2002 struct net *net = seq_file_net(seq);
1da177e4
LT
2003
2004 if (!sk) {
a8b690f9 2005 ilb = &tcp_hashinfo.listening_hash[st->bucket];
5caea4ea 2006 spin_lock_bh(&ilb->lock);
c25eb3bf 2007 sk = sk_nulls_head(&ilb->head);
a8b690f9 2008 st->offset = 0;
1da177e4
LT
2009 goto get_sk;
2010 }
5caea4ea 2011 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1da177e4 2012 ++st->num;
a8b690f9 2013 ++st->offset;
1da177e4
LT
2014
2015 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 2016 struct request_sock *req = cur;
1da177e4 2017
72a3effa 2018 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2019 req = req->dl_next;
2020 while (1) {
2021 while (req) {
bdccc4ca 2022 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2023 cur = req;
2024 goto out;
2025 }
2026 req = req->dl_next;
2027 }
72a3effa 2028 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2029 break;
2030get_req:
463c84b9 2031 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4 2032 }
1bde5ac4 2033 sk = sk_nulls_next(st->syn_wait_sk);
1da177e4 2034 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2035 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2036 } else {
e905a9ed 2037 icsk = inet_csk(sk);
463c84b9
ACM
2038 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2039 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2040 goto start_req;
463c84b9 2041 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1bde5ac4 2042 sk = sk_nulls_next(sk);
1da177e4
LT
2043 }
2044get_sk:
c25eb3bf 2045 sk_nulls_for_each_from(sk, node) {
8475ef9f
PE
2046 if (!net_eq(sock_net(sk), net))
2047 continue;
2048 if (sk->sk_family == st->family) {
1da177e4
LT
2049 cur = sk;
2050 goto out;
2051 }
e905a9ed 2052 icsk = inet_csk(sk);
463c84b9
ACM
2053 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2054 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2055start_req:
2056 st->uid = sock_i_uid(sk);
2057 st->syn_wait_sk = sk;
2058 st->state = TCP_SEQ_STATE_OPENREQ;
2059 st->sbucket = 0;
2060 goto get_req;
2061 }
463c84b9 2062 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2063 }
5caea4ea 2064 spin_unlock_bh(&ilb->lock);
a8b690f9 2065 st->offset = 0;
0f7ff927 2066 if (++st->bucket < INET_LHTABLE_SIZE) {
5caea4ea
ED
2067 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2068 spin_lock_bh(&ilb->lock);
c25eb3bf 2069 sk = sk_nulls_head(&ilb->head);
1da177e4
LT
2070 goto get_sk;
2071 }
2072 cur = NULL;
2073out:
2074 return cur;
2075}
2076
2077static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2078{
a8b690f9
TH
2079 struct tcp_iter_state *st = seq->private;
2080 void *rc;
2081
2082 st->bucket = 0;
2083 st->offset = 0;
2084 rc = listening_get_next(seq, NULL);
1da177e4
LT
2085
2086 while (rc && *pos) {
2087 rc = listening_get_next(seq, rc);
2088 --*pos;
2089 }
2090 return rc;
2091}
2092
6eac5604
AK
2093static inline int empty_bucket(struct tcp_iter_state *st)
2094{
3ab5aee7
ED
2095 return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
2096 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
6eac5604
AK
2097}
2098
a8b690f9
TH
2099/*
2100 * Get first established socket starting from bucket given in st->bucket.
2101 * If st->bucket is zero, the very first socket in the hash is returned.
2102 */
1da177e4
LT
2103static void *established_get_first(struct seq_file *seq)
2104{
5799de0b 2105 struct tcp_iter_state *st = seq->private;
a4146b1b 2106 struct net *net = seq_file_net(seq);
1da177e4
LT
2107 void *rc = NULL;
2108
a8b690f9
TH
2109 st->offset = 0;
2110 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1da177e4 2111 struct sock *sk;
3ab5aee7 2112 struct hlist_nulls_node *node;
8feaf0c0 2113 struct inet_timewait_sock *tw;
9db66bdc 2114 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1da177e4 2115
6eac5604
AK
2116 /* Lockless fast path for the common case of empty buckets */
2117 if (empty_bucket(st))
2118 continue;
2119
9db66bdc 2120 spin_lock_bh(lock);
3ab5aee7 2121 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
f40c8174 2122 if (sk->sk_family != st->family ||
878628fb 2123 !net_eq(sock_net(sk), net)) {
1da177e4
LT
2124 continue;
2125 }
2126 rc = sk;
2127 goto out;
2128 }
2129 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0 2130 inet_twsk_for_each(tw, node,
dbca9b27 2131 &tcp_hashinfo.ehash[st->bucket].twchain) {
28518fc1 2132 if (tw->tw_family != st->family ||
878628fb 2133 !net_eq(twsk_net(tw), net)) {
1da177e4
LT
2134 continue;
2135 }
2136 rc = tw;
2137 goto out;
2138 }
9db66bdc 2139 spin_unlock_bh(lock);
1da177e4
LT
2140 st->state = TCP_SEQ_STATE_ESTABLISHED;
2141 }
2142out:
2143 return rc;
2144}
2145
2146static void *established_get_next(struct seq_file *seq, void *cur)
2147{
2148 struct sock *sk = cur;
8feaf0c0 2149 struct inet_timewait_sock *tw;
3ab5aee7 2150 struct hlist_nulls_node *node;
5799de0b 2151 struct tcp_iter_state *st = seq->private;
a4146b1b 2152 struct net *net = seq_file_net(seq);
1da177e4
LT
2153
2154 ++st->num;
a8b690f9 2155 ++st->offset;
1da177e4
LT
2156
2157 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2158 tw = cur;
2159 tw = tw_next(tw);
2160get_tw:
878628fb 2161 while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
1da177e4
LT
2162 tw = tw_next(tw);
2163 }
2164 if (tw) {
2165 cur = tw;
2166 goto out;
2167 }
9db66bdc 2168 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2169 st->state = TCP_SEQ_STATE_ESTABLISHED;
2170
6eac5604 2171 /* Look for next non empty bucket */
a8b690f9 2172 st->offset = 0;
f373b53b 2173 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
6eac5604
AK
2174 empty_bucket(st))
2175 ;
f373b53b 2176 if (st->bucket > tcp_hashinfo.ehash_mask)
6eac5604
AK
2177 return NULL;
2178
9db66bdc 2179 spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
3ab5aee7 2180 sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4 2181 } else
3ab5aee7 2182 sk = sk_nulls_next(sk);
1da177e4 2183
3ab5aee7 2184 sk_nulls_for_each_from(sk, node) {
878628fb 2185 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1da177e4
LT
2186 goto found;
2187 }
2188
2189 st->state = TCP_SEQ_STATE_TIME_WAIT;
dbca9b27 2190 tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
1da177e4
LT
2191 goto get_tw;
2192found:
2193 cur = sk;
2194out:
2195 return cur;
2196}
2197
2198static void *established_get_idx(struct seq_file *seq, loff_t pos)
2199{
a8b690f9
TH
2200 struct tcp_iter_state *st = seq->private;
2201 void *rc;
2202
2203 st->bucket = 0;
2204 rc = established_get_first(seq);
1da177e4
LT
2205
2206 while (rc && pos) {
2207 rc = established_get_next(seq, rc);
2208 --pos;
7174259e 2209 }
1da177e4
LT
2210 return rc;
2211}
2212
2213static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2214{
2215 void *rc;
5799de0b 2216 struct tcp_iter_state *st = seq->private;
1da177e4 2217
1da177e4
LT
2218 st->state = TCP_SEQ_STATE_LISTENING;
2219 rc = listening_get_idx(seq, &pos);
2220
2221 if (!rc) {
1da177e4
LT
2222 st->state = TCP_SEQ_STATE_ESTABLISHED;
2223 rc = established_get_idx(seq, pos);
2224 }
2225
2226 return rc;
2227}
2228
a8b690f9
TH
2229static void *tcp_seek_last_pos(struct seq_file *seq)
2230{
2231 struct tcp_iter_state *st = seq->private;
2232 int offset = st->offset;
2233 int orig_num = st->num;
2234 void *rc = NULL;
2235
2236 switch (st->state) {
2237 case TCP_SEQ_STATE_OPENREQ:
2238 case TCP_SEQ_STATE_LISTENING:
2239 if (st->bucket >= INET_LHTABLE_SIZE)
2240 break;
2241 st->state = TCP_SEQ_STATE_LISTENING;
2242 rc = listening_get_next(seq, NULL);
2243 while (offset-- && rc)
2244 rc = listening_get_next(seq, rc);
2245 if (rc)
2246 break;
2247 st->bucket = 0;
2248 /* Fallthrough */
2249 case TCP_SEQ_STATE_ESTABLISHED:
2250 case TCP_SEQ_STATE_TIME_WAIT:
2251 st->state = TCP_SEQ_STATE_ESTABLISHED;
2252 if (st->bucket > tcp_hashinfo.ehash_mask)
2253 break;
2254 rc = established_get_first(seq);
2255 while (offset-- && rc)
2256 rc = established_get_next(seq, rc);
2257 }
2258
2259 st->num = orig_num;
2260
2261 return rc;
2262}
2263
1da177e4
LT
2264static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2265{
5799de0b 2266 struct tcp_iter_state *st = seq->private;
a8b690f9
TH
2267 void *rc;
2268
2269 if (*pos && *pos == st->last_pos) {
2270 rc = tcp_seek_last_pos(seq);
2271 if (rc)
2272 goto out;
2273 }
2274
1da177e4
LT
2275 st->state = TCP_SEQ_STATE_LISTENING;
2276 st->num = 0;
a8b690f9
TH
2277 st->bucket = 0;
2278 st->offset = 0;
2279 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2280
2281out:
2282 st->last_pos = *pos;
2283 return rc;
1da177e4
LT
2284}
2285
2286static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2287{
a8b690f9 2288 struct tcp_iter_state *st = seq->private;
1da177e4 2289 void *rc = NULL;
1da177e4
LT
2290
2291 if (v == SEQ_START_TOKEN) {
2292 rc = tcp_get_idx(seq, 0);
2293 goto out;
2294 }
1da177e4
LT
2295
2296 switch (st->state) {
2297 case TCP_SEQ_STATE_OPENREQ:
2298 case TCP_SEQ_STATE_LISTENING:
2299 rc = listening_get_next(seq, v);
2300 if (!rc) {
1da177e4 2301 st->state = TCP_SEQ_STATE_ESTABLISHED;
a8b690f9
TH
2302 st->bucket = 0;
2303 st->offset = 0;
1da177e4
LT
2304 rc = established_get_first(seq);
2305 }
2306 break;
2307 case TCP_SEQ_STATE_ESTABLISHED:
2308 case TCP_SEQ_STATE_TIME_WAIT:
2309 rc = established_get_next(seq, v);
2310 break;
2311 }
2312out:
2313 ++*pos;
a8b690f9 2314 st->last_pos = *pos;
1da177e4
LT
2315 return rc;
2316}
2317
2318static void tcp_seq_stop(struct seq_file *seq, void *v)
2319{
5799de0b 2320 struct tcp_iter_state *st = seq->private;
1da177e4
LT
2321
2322 switch (st->state) {
2323 case TCP_SEQ_STATE_OPENREQ:
2324 if (v) {
463c84b9
ACM
2325 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2326 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2327 }
2328 case TCP_SEQ_STATE_LISTENING:
2329 if (v != SEQ_START_TOKEN)
5caea4ea 2330 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
1da177e4
LT
2331 break;
2332 case TCP_SEQ_STATE_TIME_WAIT:
2333 case TCP_SEQ_STATE_ESTABLISHED:
2334 if (v)
9db66bdc 2335 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1da177e4
LT
2336 break;
2337 }
2338}
2339
73cb88ec 2340int tcp_seq_open(struct inode *inode, struct file *file)
1da177e4
LT
2341{
2342 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
1da177e4 2343 struct tcp_iter_state *s;
52d6f3f1 2344 int err;
1da177e4 2345
52d6f3f1
DL
2346 err = seq_open_net(inode, file, &afinfo->seq_ops,
2347 sizeof(struct tcp_iter_state));
2348 if (err < 0)
2349 return err;
f40c8174 2350
52d6f3f1 2351 s = ((struct seq_file *)file->private_data)->private;
1da177e4 2352 s->family = afinfo->family;
a8b690f9 2353 s->last_pos = 0;
f40c8174
DL
2354 return 0;
2355}
73cb88ec 2356EXPORT_SYMBOL(tcp_seq_open);
f40c8174 2357
6f8b13bc 2358int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4
LT
2359{
2360 int rc = 0;
2361 struct proc_dir_entry *p;
2362
9427c4b3
DL
2363 afinfo->seq_ops.start = tcp_seq_start;
2364 afinfo->seq_ops.next = tcp_seq_next;
2365 afinfo->seq_ops.stop = tcp_seq_stop;
2366
84841c3c 2367 p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
73cb88ec 2368 afinfo->seq_fops, afinfo);
84841c3c 2369 if (!p)
1da177e4
LT
2370 rc = -ENOMEM;
2371 return rc;
2372}
4bc2f18b 2373EXPORT_SYMBOL(tcp_proc_register);
1da177e4 2374
6f8b13bc 2375void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
1da177e4 2376{
6f8b13bc 2377 proc_net_remove(net, afinfo->name);
1da177e4 2378}
4bc2f18b 2379EXPORT_SYMBOL(tcp_proc_unregister);
1da177e4 2380
cf533ea5 2381static void get_openreq4(const struct sock *sk, const struct request_sock *req,
5e659e4c 2382 struct seq_file *f, int i, int uid, int *len)
1da177e4 2383{
2e6599cb 2384 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
2385 int ttd = req->expires - jiffies;
2386
5e659e4c 2387 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2388 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
1da177e4 2389 i,
2e6599cb 2390 ireq->loc_addr,
c720c7e8 2391 ntohs(inet_sk(sk)->inet_sport),
2e6599cb
ACM
2392 ireq->rmt_addr,
2393 ntohs(ireq->rmt_port),
1da177e4
LT
2394 TCP_SYN_RECV,
2395 0, 0, /* could print option size, but that is af dependent. */
2396 1, /* timers active (only the expire timer) */
2397 jiffies_to_clock_t(ttd),
2398 req->retrans,
2399 uid,
2400 0, /* non standard timer */
2401 0, /* open_requests have no inode */
2402 atomic_read(&sk->sk_refcnt),
5e659e4c
PE
2403 req,
2404 len);
1da177e4
LT
2405}
2406
5e659e4c 2407static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
1da177e4
LT
2408{
2409 int timer_active;
2410 unsigned long timer_expires;
cf533ea5 2411 const struct tcp_sock *tp = tcp_sk(sk);
cf4c6bf8 2412 const struct inet_connection_sock *icsk = inet_csk(sk);
cf533ea5 2413 const struct inet_sock *inet = inet_sk(sk);
c720c7e8
ED
2414 __be32 dest = inet->inet_daddr;
2415 __be32 src = inet->inet_rcv_saddr;
2416 __u16 destp = ntohs(inet->inet_dport);
2417 __u16 srcp = ntohs(inet->inet_sport);
49d09007 2418 int rx_queue;
1da177e4 2419
463c84b9 2420 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2421 timer_active = 1;
463c84b9
ACM
2422 timer_expires = icsk->icsk_timeout;
2423 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2424 timer_active = 4;
463c84b9 2425 timer_expires = icsk->icsk_timeout;
cf4c6bf8 2426 } else if (timer_pending(&sk->sk_timer)) {
1da177e4 2427 timer_active = 2;
cf4c6bf8 2428 timer_expires = sk->sk_timer.expires;
1da177e4
LT
2429 } else {
2430 timer_active = 0;
2431 timer_expires = jiffies;
2432 }
2433
49d09007
ED
2434 if (sk->sk_state == TCP_LISTEN)
2435 rx_queue = sk->sk_ack_backlog;
2436 else
2437 /*
2438 * because we dont lock socket, we might find a transient negative value
2439 */
2440 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2441
5e659e4c 2442 seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
71338aa7 2443 "%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
cf4c6bf8 2444 i, src, srcp, dest, destp, sk->sk_state,
47da8ee6 2445 tp->write_seq - tp->snd_una,
49d09007 2446 rx_queue,
1da177e4
LT
2447 timer_active,
2448 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 2449 icsk->icsk_retransmits,
cf4c6bf8 2450 sock_i_uid(sk),
6687e988 2451 icsk->icsk_probes_out,
cf4c6bf8
IJ
2452 sock_i_ino(sk),
2453 atomic_read(&sk->sk_refcnt), sk,
7be87351
SH
2454 jiffies_to_clock_t(icsk->icsk_rto),
2455 jiffies_to_clock_t(icsk->icsk_ack.ato),
463c84b9 2456 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4 2457 tp->snd_cwnd,
0b6a05c1 2458 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
5e659e4c 2459 len);
1da177e4
LT
2460}
2461
cf533ea5 2462static void get_timewait4_sock(const struct inet_timewait_sock *tw,
5e659e4c 2463 struct seq_file *f, int i, int *len)
1da177e4 2464{
23f33c2d 2465 __be32 dest, src;
1da177e4
LT
2466 __u16 destp, srcp;
2467 int ttd = tw->tw_ttd - jiffies;
2468
2469 if (ttd < 0)
2470 ttd = 0;
2471
2472 dest = tw->tw_daddr;
2473 src = tw->tw_rcv_saddr;
2474 destp = ntohs(tw->tw_dport);
2475 srcp = ntohs(tw->tw_sport);
2476
5e659e4c 2477 seq_printf(f, "%4d: %08X:%04X %08X:%04X"
71338aa7 2478 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
1da177e4
LT
2479 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2480 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
5e659e4c 2481 atomic_read(&tw->tw_refcnt), tw, len);
1da177e4
LT
2482}
2483
2484#define TMPSZ 150
2485
2486static int tcp4_seq_show(struct seq_file *seq, void *v)
2487{
5799de0b 2488 struct tcp_iter_state *st;
5e659e4c 2489 int len;
1da177e4
LT
2490
2491 if (v == SEQ_START_TOKEN) {
2492 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2493 " sl local_address rem_address st tx_queue "
2494 "rx_queue tr tm->when retrnsmt uid timeout "
2495 "inode");
2496 goto out;
2497 }
2498 st = seq->private;
2499
2500 switch (st->state) {
2501 case TCP_SEQ_STATE_LISTENING:
2502 case TCP_SEQ_STATE_ESTABLISHED:
5e659e4c 2503 get_tcp4_sock(v, seq, st->num, &len);
1da177e4
LT
2504 break;
2505 case TCP_SEQ_STATE_OPENREQ:
5e659e4c 2506 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
1da177e4
LT
2507 break;
2508 case TCP_SEQ_STATE_TIME_WAIT:
5e659e4c 2509 get_timewait4_sock(v, seq, st->num, &len);
1da177e4
LT
2510 break;
2511 }
5e659e4c 2512 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
1da177e4
LT
2513out:
2514 return 0;
2515}
2516
73cb88ec
AV
2517static const struct file_operations tcp_afinfo_seq_fops = {
2518 .owner = THIS_MODULE,
2519 .open = tcp_seq_open,
2520 .read = seq_read,
2521 .llseek = seq_lseek,
2522 .release = seq_release_net
2523};
2524
1da177e4 2525static struct tcp_seq_afinfo tcp4_seq_afinfo = {
1da177e4
LT
2526 .name = "tcp",
2527 .family = AF_INET,
73cb88ec 2528 .seq_fops = &tcp_afinfo_seq_fops,
9427c4b3
DL
2529 .seq_ops = {
2530 .show = tcp4_seq_show,
2531 },
1da177e4
LT
2532};
2533
2c8c1e72 2534static int __net_init tcp4_proc_init_net(struct net *net)
757764f6
PE
2535{
2536 return tcp_proc_register(net, &tcp4_seq_afinfo);
2537}
2538
2c8c1e72 2539static void __net_exit tcp4_proc_exit_net(struct net *net)
757764f6
PE
2540{
2541 tcp_proc_unregister(net, &tcp4_seq_afinfo);
2542}
2543
2544static struct pernet_operations tcp4_net_ops = {
2545 .init = tcp4_proc_init_net,
2546 .exit = tcp4_proc_exit_net,
2547};
2548
1da177e4
LT
2549int __init tcp4_proc_init(void)
2550{
757764f6 2551 return register_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2552}
2553
2554void tcp4_proc_exit(void)
2555{
757764f6 2556 unregister_pernet_subsys(&tcp4_net_ops);
1da177e4
LT
2557}
2558#endif /* CONFIG_PROC_FS */
2559
bf296b12
HX
2560struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2561{
b71d1d42 2562 const struct iphdr *iph = skb_gro_network_header(skb);
bf296b12
HX
2563
2564 switch (skb->ip_summed) {
2565 case CHECKSUM_COMPLETE:
86911732 2566 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
bf296b12
HX
2567 skb->csum)) {
2568 skb->ip_summed = CHECKSUM_UNNECESSARY;
2569 break;
2570 }
2571
2572 /* fall through */
2573 case CHECKSUM_NONE:
2574 NAPI_GRO_CB(skb)->flush = 1;
2575 return NULL;
2576 }
2577
2578 return tcp_gro_receive(head, skb);
2579}
bf296b12
HX
2580
2581int tcp4_gro_complete(struct sk_buff *skb)
2582{
b71d1d42 2583 const struct iphdr *iph = ip_hdr(skb);
bf296b12
HX
2584 struct tcphdr *th = tcp_hdr(skb);
2585
2586 th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
2587 iph->saddr, iph->daddr, 0);
2588 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
2589
2590 return tcp_gro_complete(skb);
2591}
bf296b12 2592
1da177e4
LT
2593struct proto tcp_prot = {
2594 .name = "TCP",
2595 .owner = THIS_MODULE,
2596 .close = tcp_close,
2597 .connect = tcp_v4_connect,
2598 .disconnect = tcp_disconnect,
463c84b9 2599 .accept = inet_csk_accept,
1da177e4
LT
2600 .ioctl = tcp_ioctl,
2601 .init = tcp_v4_init_sock,
2602 .destroy = tcp_v4_destroy_sock,
2603 .shutdown = tcp_shutdown,
2604 .setsockopt = tcp_setsockopt,
2605 .getsockopt = tcp_getsockopt,
1da177e4 2606 .recvmsg = tcp_recvmsg,
7ba42910
CG
2607 .sendmsg = tcp_sendmsg,
2608 .sendpage = tcp_sendpage,
1da177e4 2609 .backlog_rcv = tcp_v4_do_rcv,
ab1e0a13
ACM
2610 .hash = inet_hash,
2611 .unhash = inet_unhash,
2612 .get_port = inet_csk_get_port,
1da177e4
LT
2613 .enter_memory_pressure = tcp_enter_memory_pressure,
2614 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2615 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2616 .memory_allocated = &tcp_memory_allocated,
2617 .memory_pressure = &tcp_memory_pressure,
1da177e4
LT
2618 .sysctl_wmem = sysctl_tcp_wmem,
2619 .sysctl_rmem = sysctl_tcp_rmem,
2620 .max_header = MAX_TCP_HEADER,
2621 .obj_size = sizeof(struct tcp_sock),
3ab5aee7 2622 .slab_flags = SLAB_DESTROY_BY_RCU,
6d6ee43e 2623 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2624 .rsk_prot = &tcp_request_sock_ops,
39d8cda7 2625 .h.hashinfo = &tcp_hashinfo,
7ba42910 2626 .no_autobind = true,
543d9cfe
ACM
2627#ifdef CONFIG_COMPAT
2628 .compat_setsockopt = compat_tcp_setsockopt,
2629 .compat_getsockopt = compat_tcp_getsockopt,
2630#endif
d1a4c0b3
GC
2631#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2632 .init_cgroup = tcp_init_cgroup,
2633 .destroy_cgroup = tcp_destroy_cgroup,
2634 .proto_cgroup = tcp_proto_cgroup,
2635#endif
1da177e4 2636};
4bc2f18b 2637EXPORT_SYMBOL(tcp_prot);
1da177e4 2638
046ee902
DL
2639static int __net_init tcp_sk_init(struct net *net)
2640{
2641 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
2642 PF_INET, SOCK_RAW, IPPROTO_TCP, net);
2643}
2644
2645static void __net_exit tcp_sk_exit(struct net *net)
2646{
2647 inet_ctl_sock_destroy(net->ipv4.tcp_sock);
b099ce26
EB
2648}
2649
2650static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2651{
2652 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
046ee902
DL
2653}
2654
2655static struct pernet_operations __net_initdata tcp_sk_ops = {
b099ce26
EB
2656 .init = tcp_sk_init,
2657 .exit = tcp_sk_exit,
2658 .exit_batch = tcp_sk_exit_batch,
046ee902
DL
2659};
2660
9b0f976f 2661void __init tcp_v4_init(void)
1da177e4 2662{
5caea4ea 2663 inet_hashinfo_init(&tcp_hashinfo);
6a1b3054 2664 if (register_pernet_subsys(&tcp_sk_ops))
1da177e4 2665 panic("Failed to create the TCP control socket.\n");
1da177e4 2666}