]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/ipv4/tcp_ipv4.c
[DCCP] CCID2: Code optimizations
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / tcp_ipv4.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Implementation of the Transmission Control Protocol(TCP).
7 *
8 * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
9 *
10 * IPv4 specific functions
11 *
12 *
13 * code split from:
14 * linux/ipv4/tcp.c
15 * linux/ipv4/tcp_input.c
16 * linux/ipv4/tcp_output.c
17 *
18 * See tcp.c for author information
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 */
25
26/*
27 * Changes:
28 * David S. Miller : New socket lookup architecture.
29 * This code is dedicated to John Dyson.
30 * David S. Miller : Change semantics of established hash,
31 * half is devoted to TIME_WAIT sockets
32 * and the rest go in the other half.
33 * Andi Kleen : Add support for syncookies and fixed
34 * some bugs: ip options weren't passed to
35 * the TCP layer, missed a check for an
36 * ACK bit.
37 * Andi Kleen : Implemented fast path mtu discovery.
38 * Fixed many serious bugs in the
60236fdd 39 * request_sock handling and moved
1da177e4
LT
40 * most of it into the af independent code.
41 * Added tail drop and some other bugfixes.
caa20d9a 42 * Added new listen semantics.
1da177e4
LT
43 * Mike McLagan : Routing by source
44 * Juan Jose Ciarlante: ip_dynaddr bits
45 * Andi Kleen: various fixes.
46 * Vitaly E. Lavrov : Transparent proxy revived after year
47 * coma.
48 * Andi Kleen : Fix new listen.
49 * Andi Kleen : Fix accept error reporting.
50 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
51 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
52 * a single port at the same time.
53 */
54
1da177e4
LT
55
56#include <linux/types.h>
57#include <linux/fcntl.h>
58#include <linux/module.h>
59#include <linux/random.h>
60#include <linux/cache.h>
61#include <linux/jhash.h>
62#include <linux/init.h>
63#include <linux/times.h>
64
65#include <net/icmp.h>
304a1618 66#include <net/inet_hashtables.h>
1da177e4 67#include <net/tcp.h>
20380731 68#include <net/transp_v6.h>
1da177e4
LT
69#include <net/ipv6.h>
70#include <net/inet_common.h>
6d6ee43e 71#include <net/timewait_sock.h>
1da177e4 72#include <net/xfrm.h>
1a2449a8 73#include <net/netdma.h>
1da177e4
LT
74
75#include <linux/inet.h>
76#include <linux/ipv6.h>
77#include <linux/stddef.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80
cfb6eeb4
YH
81#include <linux/crypto.h>
82#include <linux/scatterlist.h>
83
ab32ea5d
BH
84int sysctl_tcp_tw_reuse __read_mostly;
85int sysctl_tcp_low_latency __read_mostly;
1da177e4
LT
86
87/* Check TCP sequence numbers in ICMP packets. */
88#define ICMP_MIN_LENGTH 8
89
90/* Socket used for sending RSTs */
91static struct socket *tcp_socket;
92
8292a17a 93void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
1da177e4 94
cfb6eeb4
YH
95#ifdef CONFIG_TCP_MD5SIG
96static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr);
97static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
98 __be32 saddr, __be32 daddr, struct tcphdr *th,
99 int protocol, int tcplen);
100#endif
101
0f7ff927 102struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
e4d91918 103 .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock),
0f7ff927
ACM
104 .lhash_users = ATOMIC_INIT(0),
105 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait),
1da177e4
LT
106};
107
463c84b9
ACM
108static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
109{
971af18b
ACM
110 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
111 inet_csk_bind_conflict);
463c84b9
ACM
112}
113
1da177e4
LT
114static void tcp_v4_hash(struct sock *sk)
115{
81849d10 116 inet_hash(&tcp_hashinfo, sk);
1da177e4
LT
117}
118
119void tcp_unhash(struct sock *sk)
120{
81849d10 121 inet_unhash(&tcp_hashinfo, sk);
1da177e4
LT
122}
123
a94f723d 124static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
1da177e4
LT
125{
126 return secure_tcp_sequence_number(skb->nh.iph->daddr,
127 skb->nh.iph->saddr,
128 skb->h.th->dest,
129 skb->h.th->source);
130}
131
6d6ee43e
ACM
132int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
133{
134 const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
135 struct tcp_sock *tp = tcp_sk(sk);
136
137 /* With PAWS, it is safe from the viewpoint
138 of data integrity. Even without PAWS it is safe provided sequence
139 spaces do not overlap i.e. at data rates <= 80Mbit/sec.
140
141 Actually, the idea is close to VJ's one, only timestamp cache is
142 held not per host, but per port pair and TW bucket is used as state
143 holder.
144
145 If TW bucket has been already destroyed we fall back to VJ's scheme
146 and use initial timestamp retrieved from peer table.
147 */
148 if (tcptw->tw_ts_recent_stamp &&
149 (twp == NULL || (sysctl_tcp_tw_reuse &&
150 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
151 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
152 if (tp->write_seq == 0)
153 tp->write_seq = 1;
154 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
155 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
156 sock_hold(sktw);
157 return 1;
158 }
159
160 return 0;
161}
162
163EXPORT_SYMBOL_GPL(tcp_twsk_unique);
164
1da177e4
LT
165/* This will initiate an outgoing connection. */
166int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
167{
168 struct inet_sock *inet = inet_sk(sk);
169 struct tcp_sock *tp = tcp_sk(sk);
170 struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
171 struct rtable *rt;
bada8adc 172 __be32 daddr, nexthop;
1da177e4
LT
173 int tmp;
174 int err;
175
176 if (addr_len < sizeof(struct sockaddr_in))
177 return -EINVAL;
178
179 if (usin->sin_family != AF_INET)
180 return -EAFNOSUPPORT;
181
182 nexthop = daddr = usin->sin_addr.s_addr;
183 if (inet->opt && inet->opt->srr) {
184 if (!daddr)
185 return -EINVAL;
186 nexthop = inet->opt->faddr;
187 }
188
189 tmp = ip_route_connect(&rt, nexthop, inet->saddr,
190 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
191 IPPROTO_TCP,
192 inet->sport, usin->sin_port, sk);
193 if (tmp < 0)
194 return tmp;
195
196 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
197 ip_rt_put(rt);
198 return -ENETUNREACH;
199 }
200
201 if (!inet->opt || !inet->opt->srr)
202 daddr = rt->rt_dst;
203
204 if (!inet->saddr)
205 inet->saddr = rt->rt_src;
206 inet->rcv_saddr = inet->saddr;
207
208 if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) {
209 /* Reset inherited state */
210 tp->rx_opt.ts_recent = 0;
211 tp->rx_opt.ts_recent_stamp = 0;
212 tp->write_seq = 0;
213 }
214
295ff7ed 215 if (tcp_death_row.sysctl_tw_recycle &&
1da177e4
LT
216 !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) {
217 struct inet_peer *peer = rt_get_peer(rt);
218
219 /* VJ's idea. We save last timestamp seen from
220 * the destination in peer table, when entering state TIME-WAIT
221 * and initialize rx_opt.ts_recent from it, when trying new connection.
222 */
223
224 if (peer && peer->tcp_ts_stamp + TCP_PAWS_MSL >= xtime.tv_sec) {
225 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
226 tp->rx_opt.ts_recent = peer->tcp_ts;
227 }
228 }
229
230 inet->dport = usin->sin_port;
231 inet->daddr = daddr;
232
d83d8461 233 inet_csk(sk)->icsk_ext_hdr_len = 0;
1da177e4 234 if (inet->opt)
d83d8461 235 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
1da177e4
LT
236
237 tp->rx_opt.mss_clamp = 536;
238
239 /* Socket identity is still unknown (sport may be zero).
240 * However we set state to SYN-SENT and not releasing socket
241 * lock select source port, enter ourselves into the hash tables and
242 * complete initialization after this.
243 */
244 tcp_set_state(sk, TCP_SYN_SENT);
a7f5e7f1 245 err = inet_hash_connect(&tcp_death_row, sk);
1da177e4
LT
246 if (err)
247 goto failure;
248
5d39a795 249 err = ip_route_newports(&rt, IPPROTO_TCP, inet->sport, inet->dport, sk);
1da177e4
LT
250 if (err)
251 goto failure;
252
253 /* OK, now commit destination to socket. */
bcd76111 254 sk->sk_gso_type = SKB_GSO_TCPV4;
6cbb0df7 255 sk_setup_caps(sk, &rt->u.dst);
1da177e4
LT
256
257 if (!tp->write_seq)
258 tp->write_seq = secure_tcp_sequence_number(inet->saddr,
259 inet->daddr,
260 inet->sport,
261 usin->sin_port);
262
263 inet->id = tp->write_seq ^ jiffies;
264
265 err = tcp_connect(sk);
266 rt = NULL;
267 if (err)
268 goto failure;
269
270 return 0;
271
272failure:
273 /* This unhashes the socket and releases the local port, if necessary. */
274 tcp_set_state(sk, TCP_CLOSE);
275 ip_rt_put(rt);
276 sk->sk_route_caps = 0;
277 inet->dport = 0;
278 return err;
279}
280
1da177e4
LT
281/*
282 * This routine does path mtu discovery as defined in RFC1191.
283 */
40efc6fa 284static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
1da177e4
LT
285{
286 struct dst_entry *dst;
287 struct inet_sock *inet = inet_sk(sk);
1da177e4
LT
288
289 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
290 * send out by Linux are always <576bytes so they should go through
291 * unfragmented).
292 */
293 if (sk->sk_state == TCP_LISTEN)
294 return;
295
296 /* We don't check in the destentry if pmtu discovery is forbidden
297 * on this route. We just assume that no packet_to_big packets
298 * are send back when pmtu discovery is not active.
299 * There is a small race when the user changes this flag in the
300 * route, but I think that's acceptable.
301 */
302 if ((dst = __sk_dst_check(sk, 0)) == NULL)
303 return;
304
305 dst->ops->update_pmtu(dst, mtu);
306
307 /* Something is about to be wrong... Remember soft error
308 * for the case, if this connection will not able to recover.
309 */
310 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
311 sk->sk_err_soft = EMSGSIZE;
312
313 mtu = dst_mtu(dst);
314
315 if (inet->pmtudisc != IP_PMTUDISC_DONT &&
d83d8461 316 inet_csk(sk)->icsk_pmtu_cookie > mtu) {
1da177e4
LT
317 tcp_sync_mss(sk, mtu);
318
319 /* Resend the TCP packet because it's
320 * clear that the old packet has been
321 * dropped. This is the new "fast" path mtu
322 * discovery.
323 */
324 tcp_simple_retransmit(sk);
325 } /* else let the usual retransmit timer handle it */
326}
327
328/*
329 * This routine is called by the ICMP module when it gets some
330 * sort of error condition. If err < 0 then the socket should
331 * be closed and the error returned to the user. If err > 0
332 * it's just the icmp type << 8 | icmp code. After adjustment
333 * header points to the first 8 bytes of the tcp header. We need
334 * to find the appropriate port.
335 *
336 * The locking strategy used here is very "optimistic". When
337 * someone else accesses the socket the ICMP is just dropped
338 * and for some paths there is no check at all.
339 * A more general error queue to queue errors for later handling
340 * is probably better.
341 *
342 */
343
344void tcp_v4_err(struct sk_buff *skb, u32 info)
345{
346 struct iphdr *iph = (struct iphdr *)skb->data;
347 struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
348 struct tcp_sock *tp;
349 struct inet_sock *inet;
350 int type = skb->h.icmph->type;
351 int code = skb->h.icmph->code;
352 struct sock *sk;
353 __u32 seq;
354 int err;
355
356 if (skb->len < (iph->ihl << 2) + 8) {
357 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
358 return;
359 }
360
e48c414e 361 sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
463c84b9 362 th->source, inet_iif(skb));
1da177e4
LT
363 if (!sk) {
364 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
365 return;
366 }
367 if (sk->sk_state == TCP_TIME_WAIT) {
9469c7b4 368 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
369 return;
370 }
371
372 bh_lock_sock(sk);
373 /* If too many ICMPs get dropped on busy
374 * servers this needs to be solved differently.
375 */
376 if (sock_owned_by_user(sk))
377 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
378
379 if (sk->sk_state == TCP_CLOSE)
380 goto out;
381
382 tp = tcp_sk(sk);
383 seq = ntohl(th->seq);
384 if (sk->sk_state != TCP_LISTEN &&
385 !between(seq, tp->snd_una, tp->snd_nxt)) {
06ca719f 386 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
1da177e4
LT
387 goto out;
388 }
389
390 switch (type) {
391 case ICMP_SOURCE_QUENCH:
392 /* Just silently ignore these. */
393 goto out;
394 case ICMP_PARAMETERPROB:
395 err = EPROTO;
396 break;
397 case ICMP_DEST_UNREACH:
398 if (code > NR_ICMP_UNREACH)
399 goto out;
400
401 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
402 if (!sock_owned_by_user(sk))
403 do_pmtu_discovery(sk, iph, info);
404 goto out;
405 }
406
407 err = icmp_err_convert[code].errno;
408 break;
409 case ICMP_TIME_EXCEEDED:
410 err = EHOSTUNREACH;
411 break;
412 default:
413 goto out;
414 }
415
416 switch (sk->sk_state) {
60236fdd 417 struct request_sock *req, **prev;
1da177e4
LT
418 case TCP_LISTEN:
419 if (sock_owned_by_user(sk))
420 goto out;
421
463c84b9
ACM
422 req = inet_csk_search_req(sk, &prev, th->dest,
423 iph->daddr, iph->saddr);
1da177e4
LT
424 if (!req)
425 goto out;
426
427 /* ICMPs are not backlogged, hence we cannot get
428 an established socket here.
429 */
430 BUG_TRAP(!req->sk);
431
2e6599cb 432 if (seq != tcp_rsk(req)->snt_isn) {
1da177e4
LT
433 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
434 goto out;
435 }
436
437 /*
438 * Still in SYN_RECV, just remove it silently.
439 * There is no good way to pass the error to the newly
440 * created socket, and POSIX does not want network
441 * errors returned from accept().
442 */
463c84b9 443 inet_csk_reqsk_queue_drop(sk, req, prev);
1da177e4
LT
444 goto out;
445
446 case TCP_SYN_SENT:
447 case TCP_SYN_RECV: /* Cannot happen.
448 It can f.e. if SYNs crossed.
449 */
450 if (!sock_owned_by_user(sk)) {
1da177e4
LT
451 sk->sk_err = err;
452
453 sk->sk_error_report(sk);
454
455 tcp_done(sk);
456 } else {
457 sk->sk_err_soft = err;
458 }
459 goto out;
460 }
461
462 /* If we've already connected we will keep trying
463 * until we time out, or the user gives up.
464 *
465 * rfc1122 4.2.3.9 allows to consider as hard errors
466 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
467 * but it is obsoleted by pmtu discovery).
468 *
469 * Note, that in modern internet, where routing is unreliable
470 * and in each dark corner broken firewalls sit, sending random
471 * errors ordered by their masters even this two messages finally lose
472 * their original sense (even Linux sends invalid PORT_UNREACHs)
473 *
474 * Now we are in compliance with RFCs.
475 * --ANK (980905)
476 */
477
478 inet = inet_sk(sk);
479 if (!sock_owned_by_user(sk) && inet->recverr) {
480 sk->sk_err = err;
481 sk->sk_error_report(sk);
482 } else { /* Only an error on timeout */
483 sk->sk_err_soft = err;
484 }
485
486out:
487 bh_unlock_sock(sk);
488 sock_put(sk);
489}
490
491/* This routine computes an IPv4 TCP checksum. */
8292a17a 492void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
1da177e4
LT
493{
494 struct inet_sock *inet = inet_sk(sk);
8292a17a 495 struct tcphdr *th = skb->h.th;
1da177e4 496
84fa7933 497 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1da177e4
LT
498 th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
499 skb->csum = offsetof(struct tcphdr, check);
500 } else {
501 th->check = tcp_v4_check(th, len, inet->saddr, inet->daddr,
502 csum_partial((char *)th,
503 th->doff << 2,
504 skb->csum));
505 }
506}
507
a430a43d
HX
508int tcp_v4_gso_send_check(struct sk_buff *skb)
509{
510 struct iphdr *iph;
511 struct tcphdr *th;
512
513 if (!pskb_may_pull(skb, sizeof(*th)))
514 return -EINVAL;
515
516 iph = skb->nh.iph;
517 th = skb->h.th;
518
519 th->check = 0;
520 th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
521 skb->csum = offsetof(struct tcphdr, check);
84fa7933 522 skb->ip_summed = CHECKSUM_PARTIAL;
a430a43d
HX
523 return 0;
524}
525
1da177e4
LT
526/*
527 * This routine will send an RST to the other tcp.
528 *
529 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
530 * for reset.
531 * Answer: if a packet caused RST, it is not for a socket
532 * existing in our system, if it is matched to a socket,
533 * it is just duplicate segment or bug in other side's TCP.
534 * So that we build reply only basing on parameters
535 * arrived with segment.
536 * Exception: precedence violation. We do not implement it in any case.
537 */
538
cfb6eeb4 539static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
1da177e4
LT
540{
541 struct tcphdr *th = skb->h.th;
cfb6eeb4
YH
542 struct {
543 struct tcphdr th;
544#ifdef CONFIG_TCP_MD5SIG
714e85be 545 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
cfb6eeb4
YH
546#endif
547 } rep;
1da177e4 548 struct ip_reply_arg arg;
cfb6eeb4
YH
549#ifdef CONFIG_TCP_MD5SIG
550 struct tcp_md5sig_key *key;
551#endif
1da177e4
LT
552
553 /* Never send a reset in response to a reset. */
554 if (th->rst)
555 return;
556
557 if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL)
558 return;
559
560 /* Swap the send and the receive. */
cfb6eeb4
YH
561 memset(&rep, 0, sizeof(rep));
562 rep.th.dest = th->source;
563 rep.th.source = th->dest;
564 rep.th.doff = sizeof(struct tcphdr) / 4;
565 rep.th.rst = 1;
1da177e4
LT
566
567 if (th->ack) {
cfb6eeb4 568 rep.th.seq = th->ack_seq;
1da177e4 569 } else {
cfb6eeb4
YH
570 rep.th.ack = 1;
571 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
572 skb->len - (th->doff << 2));
1da177e4
LT
573 }
574
575 memset(&arg, 0, sizeof arg);
cfb6eeb4
YH
576 arg.iov[0].iov_base = (unsigned char *)&rep;
577 arg.iov[0].iov_len = sizeof(rep.th);
578
579#ifdef CONFIG_TCP_MD5SIG
580 key = sk ? tcp_v4_md5_do_lookup(sk, skb->nh.iph->daddr) : NULL;
581 if (key) {
582 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
583 (TCPOPT_NOP << 16) |
584 (TCPOPT_MD5SIG << 8) |
585 TCPOLEN_MD5SIG);
586 /* Update length and the length the header thinks exists */
587 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
588 rep.th.doff = arg.iov[0].iov_len / 4;
589
590 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
591 key,
592 skb->nh.iph->daddr,
593 skb->nh.iph->saddr,
594 &rep.th, IPPROTO_TCP,
595 arg.iov[0].iov_len);
596 }
597#endif
598
1da177e4
LT
599 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
600 skb->nh.iph->saddr, /*XXX*/
601 sizeof(struct tcphdr), IPPROTO_TCP, 0);
602 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
603
cfb6eeb4 604 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
1da177e4
LT
605
606 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
607 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
608}
609
610/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
611 outside socket context is ugly, certainly. What can I do?
612 */
613
cfb6eeb4
YH
614static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
615 struct sk_buff *skb, u32 seq, u32 ack,
1da177e4
LT
616 u32 win, u32 ts)
617{
618 struct tcphdr *th = skb->h.th;
619 struct {
620 struct tcphdr th;
714e85be 621 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
cfb6eeb4 622#ifdef CONFIG_TCP_MD5SIG
714e85be 623 + (TCPOLEN_MD5SIG_ALIGNED >> 2)
cfb6eeb4
YH
624#endif
625 ];
1da177e4
LT
626 } rep;
627 struct ip_reply_arg arg;
cfb6eeb4
YH
628#ifdef CONFIG_TCP_MD5SIG
629 struct tcp_md5sig_key *key;
630 struct tcp_md5sig_key tw_key;
631#endif
1da177e4
LT
632
633 memset(&rep.th, 0, sizeof(struct tcphdr));
634 memset(&arg, 0, sizeof arg);
635
636 arg.iov[0].iov_base = (unsigned char *)&rep;
637 arg.iov[0].iov_len = sizeof(rep.th);
638 if (ts) {
cfb6eeb4
YH
639 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
640 (TCPOPT_TIMESTAMP << 8) |
641 TCPOLEN_TIMESTAMP);
642 rep.opt[1] = htonl(tcp_time_stamp);
643 rep.opt[2] = htonl(ts);
644 arg.iov[0].iov_len = TCPOLEN_TSTAMP_ALIGNED;
1da177e4
LT
645 }
646
647 /* Swap the send and the receive. */
648 rep.th.dest = th->source;
649 rep.th.source = th->dest;
650 rep.th.doff = arg.iov[0].iov_len / 4;
651 rep.th.seq = htonl(seq);
652 rep.th.ack_seq = htonl(ack);
653 rep.th.ack = 1;
654 rep.th.window = htons(win);
655
cfb6eeb4
YH
656#ifdef CONFIG_TCP_MD5SIG
657 /*
658 * The SKB holds an imcoming packet, but may not have a valid ->sk
659 * pointer. This is especially the case when we're dealing with a
660 * TIME_WAIT ack, because the sk structure is long gone, and only
661 * the tcp_timewait_sock remains. So the md5 key is stashed in that
662 * structure, and we use it in preference. I believe that (twsk ||
663 * skb->sk) holds true, but we program defensively.
664 */
665 if (!twsk && skb->sk) {
666 key = tcp_v4_md5_do_lookup(skb->sk, skb->nh.iph->daddr);
667 } else if (twsk && twsk->tw_md5_keylen) {
668 tw_key.key = twsk->tw_md5_key;
669 tw_key.keylen = twsk->tw_md5_keylen;
670 key = &tw_key;
671 } else {
672 key = NULL;
673 }
674
675 if (key) {
676 int offset = (ts) ? 3 : 0;
677
678 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
679 (TCPOPT_NOP << 16) |
680 (TCPOPT_MD5SIG << 8) |
681 TCPOLEN_MD5SIG);
682 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
683 rep.th.doff = arg.iov[0].iov_len/4;
684
685 tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
686 key,
687 skb->nh.iph->daddr,
688 skb->nh.iph->saddr,
689 &rep.th, IPPROTO_TCP,
690 arg.iov[0].iov_len);
691 }
692#endif
693
1da177e4
LT
694 arg.csum = csum_tcpudp_nofold(skb->nh.iph->daddr,
695 skb->nh.iph->saddr, /*XXX*/
696 arg.iov[0].iov_len, IPPROTO_TCP, 0);
697 arg.csumoffset = offsetof(struct tcphdr, check) / 2;
698
699 ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len);
700
701 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
702}
703
704static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
705{
8feaf0c0 706 struct inet_timewait_sock *tw = inet_twsk(sk);
cfb6eeb4 707 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1da177e4 708
cfb6eeb4 709 tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
8feaf0c0 710 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent);
1da177e4 711
8feaf0c0 712 inet_twsk_put(tw);
1da177e4
LT
713}
714
60236fdd 715static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1da177e4 716{
cfb6eeb4
YH
717 tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
718 tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
1da177e4
LT
719 req->ts_recent);
720}
721
1da177e4
LT
722/*
723 * Send a SYN-ACK after having received an ACK.
60236fdd 724 * This still operates on a request_sock only, not on a big
1da177e4
LT
725 * socket.
726 */
60236fdd 727static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
1da177e4
LT
728 struct dst_entry *dst)
729{
2e6599cb 730 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
731 int err = -1;
732 struct sk_buff * skb;
733
734 /* First, grab a route. */
463c84b9 735 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1da177e4
LT
736 goto out;
737
738 skb = tcp_make_synack(sk, dst, req);
739
740 if (skb) {
741 struct tcphdr *th = skb->h.th;
742
743 th->check = tcp_v4_check(th, skb->len,
2e6599cb
ACM
744 ireq->loc_addr,
745 ireq->rmt_addr,
1da177e4
LT
746 csum_partial((char *)th, skb->len,
747 skb->csum));
748
2e6599cb
ACM
749 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
750 ireq->rmt_addr,
751 ireq->opt);
b9df3cb8 752 err = net_xmit_eval(err);
1da177e4
LT
753 }
754
755out:
756 dst_release(dst);
757 return err;
758}
759
760/*
60236fdd 761 * IPv4 request_sock destructor.
1da177e4 762 */
60236fdd 763static void tcp_v4_reqsk_destructor(struct request_sock *req)
1da177e4 764{
a51482bd 765 kfree(inet_rsk(req)->opt);
1da177e4
LT
766}
767
80e40daa 768#ifdef CONFIG_SYN_COOKIES
40efc6fa 769static void syn_flood_warning(struct sk_buff *skb)
1da177e4
LT
770{
771 static unsigned long warntime;
772
773 if (time_after(jiffies, (warntime + HZ * 60))) {
774 warntime = jiffies;
775 printk(KERN_INFO
776 "possible SYN flooding on port %d. Sending cookies.\n",
777 ntohs(skb->h.th->dest));
778 }
779}
80e40daa 780#endif
1da177e4
LT
781
782/*
60236fdd 783 * Save and compile IPv4 options into the request_sock if needed.
1da177e4 784 */
40efc6fa
SH
785static struct ip_options *tcp_v4_save_options(struct sock *sk,
786 struct sk_buff *skb)
1da177e4
LT
787{
788 struct ip_options *opt = &(IPCB(skb)->opt);
789 struct ip_options *dopt = NULL;
790
791 if (opt && opt->optlen) {
792 int opt_size = optlength(opt);
793 dopt = kmalloc(opt_size, GFP_ATOMIC);
794 if (dopt) {
795 if (ip_options_echo(dopt, skb)) {
796 kfree(dopt);
797 dopt = NULL;
798 }
799 }
800 }
801 return dopt;
802}
803
cfb6eeb4
YH
804#ifdef CONFIG_TCP_MD5SIG
805/*
806 * RFC2385 MD5 checksumming requires a mapping of
807 * IP address->MD5 Key.
808 * We need to maintain these in the sk structure.
809 */
810
811/* Find the Key structure for an address. */
812static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
813{
814 struct tcp_sock *tp = tcp_sk(sk);
815 int i;
816
817 if (!tp->md5sig_info || !tp->md5sig_info->entries4)
818 return NULL;
819 for (i = 0; i < tp->md5sig_info->entries4; i++) {
820 if (tp->md5sig_info->keys4[i].addr == addr)
821 return (struct tcp_md5sig_key *)&tp->md5sig_info->keys4[i];
822 }
823 return NULL;
824}
825
826struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
827 struct sock *addr_sk)
828{
829 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr);
830}
831
832EXPORT_SYMBOL(tcp_v4_md5_lookup);
833
834struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
835 struct request_sock *req)
836{
837 return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr);
838}
839
840/* This can be called on a newly created socket, from other files */
841int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
842 u8 *newkey, u8 newkeylen)
843{
844 /* Add Key to the list */
845 struct tcp4_md5sig_key *key;
846 struct tcp_sock *tp = tcp_sk(sk);
847 struct tcp4_md5sig_key *keys;
848
849 key = (struct tcp4_md5sig_key *) tcp_v4_md5_do_lookup(sk, addr);
850 if (key) {
851 /* Pre-existing entry - just update that one. */
852 kfree (key->key);
853 key->key = newkey;
854 key->keylen = newkeylen;
855 } else {
856 if (!tp->md5sig_info) {
857 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
858 if (!tp->md5sig_info) {
859 kfree(newkey);
860 return -ENOMEM;
861 }
862 }
863 if (tcp_alloc_md5sig_pool() == NULL) {
864 kfree(newkey);
865 return -ENOMEM;
866 }
867 if (tp->md5sig_info->alloced4 == tp->md5sig_info->entries4) {
868 keys = kmalloc((sizeof(struct tcp4_md5sig_key) *
869 (tp->md5sig_info->entries4 + 1)), GFP_ATOMIC);
870 if (!keys) {
871 kfree(newkey);
872 tcp_free_md5sig_pool();
873 return -ENOMEM;
874 }
875
876 if (tp->md5sig_info->entries4)
877 memcpy(keys, tp->md5sig_info->keys4,
878 (sizeof (struct tcp4_md5sig_key) *
879 tp->md5sig_info->entries4));
880
881 /* Free old key list, and reference new one */
882 if (tp->md5sig_info->keys4)
883 kfree(tp->md5sig_info->keys4);
884 tp->md5sig_info->keys4 = keys;
885 tp->md5sig_info->alloced4++;
886 }
887 tp->md5sig_info->entries4++;
888 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].addr = addr;
889 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].key = newkey;
890 tp->md5sig_info->keys4[tp->md5sig_info->entries4 - 1].keylen = newkeylen;
891 }
892 return 0;
893}
894
895EXPORT_SYMBOL(tcp_v4_md5_do_add);
896
897static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
898 u8 *newkey, u8 newkeylen)
899{
900 return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr,
901 newkey, newkeylen);
902}
903
904int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
905{
906 struct tcp_sock *tp = tcp_sk(sk);
907 int i;
908
909 for (i = 0; i < tp->md5sig_info->entries4; i++) {
910 if (tp->md5sig_info->keys4[i].addr == addr) {
911 /* Free the key */
912 kfree(tp->md5sig_info->keys4[i].key);
913 tp->md5sig_info->entries4--;
914
915 if (tp->md5sig_info->entries4 == 0) {
916 kfree(tp->md5sig_info->keys4);
917 tp->md5sig_info->keys4 = NULL;
918 } else {
919 /* Need to do some manipulation */
920 if (tp->md5sig_info->entries4 != i)
921 memcpy(&tp->md5sig_info->keys4[i],
922 &tp->md5sig_info->keys4[i+1],
923 (tp->md5sig_info->entries4 - i)
924 * sizeof (struct tcp4_md5sig_key));
925 }
926 tcp_free_md5sig_pool();
927 return 0;
928 }
929 }
930 return -ENOENT;
931}
932
933EXPORT_SYMBOL(tcp_v4_md5_do_del);
934
935static void tcp_v4_clear_md5_list (struct sock *sk)
936{
937 struct tcp_sock *tp = tcp_sk(sk);
938
939 /* Free each key, then the set of key keys,
940 * the crypto element, and then decrement our
941 * hold on the last resort crypto.
942 */
943 if (tp->md5sig_info->entries4) {
944 int i;
945 for (i = 0; i < tp->md5sig_info->entries4; i++)
946 kfree(tp->md5sig_info->keys4[i].key);
947 tp->md5sig_info->entries4 = 0;
948 tcp_free_md5sig_pool();
949 }
950 if (tp->md5sig_info->keys4) {
951 kfree(tp->md5sig_info->keys4);
952 tp->md5sig_info->keys4 = NULL;
953 tp->md5sig_info->alloced4 = 0;
954 }
955}
956
957static int tcp_v4_parse_md5_keys (struct sock *sk, char __user *optval,
958 int optlen)
959{
960 struct tcp_md5sig cmd;
961 struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
962 u8 *newkey;
963
964 if (optlen < sizeof(cmd))
965 return -EINVAL;
966
967 if (copy_from_user (&cmd, optval, sizeof(cmd)))
968 return -EFAULT;
969
970 if (sin->sin_family != AF_INET)
971 return -EINVAL;
972
973 if (!cmd.tcpm_key || !cmd.tcpm_keylen) {
974 if (!tcp_sk(sk)->md5sig_info)
975 return -ENOENT;
976 return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr);
977 }
978
979 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
980 return -EINVAL;
981
982 if (!tcp_sk(sk)->md5sig_info) {
983 struct tcp_sock *tp = tcp_sk(sk);
984 struct tcp_md5sig_info *p;
985
986 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
987 if (!p)
988 return -EINVAL;
989
990 tp->md5sig_info = p;
991
992 }
993
994 newkey = kmalloc(cmd.tcpm_keylen, GFP_KERNEL);
995 if (!newkey)
996 return -ENOMEM;
997 memcpy(newkey, cmd.tcpm_key, cmd.tcpm_keylen);
998 return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
999 newkey, cmd.tcpm_keylen);
1000}
1001
1002static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1003 __be32 saddr, __be32 daddr,
1004 struct tcphdr *th, int protocol,
1005 int tcplen)
1006{
1007 struct scatterlist sg[4];
1008 __u16 data_len;
1009 int block = 0;
1010#ifdef CONFIG_TCP_MD5SIG_DEBUG
1011 int i;
1012#endif
1013 __u16 old_checksum;
1014 struct tcp_md5sig_pool *hp;
1015 struct tcp4_pseudohdr *bp;
1016 struct hash_desc *desc;
1017 int err;
1018 unsigned int nbytes = 0;
1019
1020 /*
1021 * Okay, so RFC2385 is turned on for this connection,
1022 * so we need to generate the MD5 hash for the packet now.
1023 */
1024
1025 hp = tcp_get_md5sig_pool();
1026 if (!hp)
1027 goto clear_hash_noput;
1028
1029 bp = &hp->md5_blk.ip4;
1030 desc = &hp->md5_desc;
1031
1032 /*
1033 * 1. the TCP pseudo-header (in the order: source IP address,
1034 * destination IP address, zero-padded protocol number, and
1035 * segment length)
1036 */
1037 bp->saddr = saddr;
1038 bp->daddr = daddr;
1039 bp->pad = 0;
1040 bp->protocol = protocol;
1041 bp->len = htons(tcplen);
1042 sg_set_buf(&sg[block++], bp, sizeof(*bp));
1043 nbytes += sizeof(*bp);
1044
1045#ifdef CONFIG_TCP_MD5SIG_DEBUG
1046 printk("Calcuating hash for: ");
1047 for (i = 0; i < sizeof (*bp); i++)
1048 printk ("%02x ", (unsigned int)((unsigned char *)bp)[i]);
1049 printk(" ");
1050#endif
1051
1052 /* 2. the TCP header, excluding options, and assuming a
1053 * checksum of zero/
1054 */
1055 old_checksum = th->check;
1056 th->check = 0;
1057 sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
1058 nbytes += sizeof(struct tcphdr);
1059#ifdef CONFIG_TCP_MD5SIG_DEBUG
1060 for (i = 0; i < sizeof (struct tcphdr); i++)
1061 printk (" %02x", (unsigned int)((unsigned char *)th)[i]);
1062#endif
1063 /* 3. the TCP segment data (if any) */
1064 data_len = tcplen - (th->doff << 2);
1065 if (data_len > 0) {
1066 unsigned char *data = (unsigned char *)th + (th->doff << 2);
1067 sg_set_buf(&sg[block++], data, data_len);
1068 nbytes += data_len;
1069 }
1070
1071 /* 4. an independently-specified key or password, known to both
1072 * TCPs and presumably connection-specific
1073 */
1074 sg_set_buf(&sg[block++], key->key, key->keylen);
1075 nbytes += key->keylen;
1076
1077#ifdef CONFIG_TCP_MD5SIG_DEBUG
1078 printk (" and password: ");
1079 for (i = 0; i < key->keylen; i++)
1080 printk ("%02x ", (unsigned int)key->key[i]);
1081#endif
1082
1083 /* Now store the Hash into the packet */
1084 err = crypto_hash_init(desc);
1085 if (err)
1086 goto clear_hash;
1087 err = crypto_hash_update(desc, sg, nbytes);
1088 if (err)
1089 goto clear_hash;
1090 err = crypto_hash_final(desc, md5_hash);
1091 if (err)
1092 goto clear_hash;
1093
1094 /* Reset header, and free up the crypto */
1095 tcp_put_md5sig_pool();
1096 th->check = old_checksum;
1097
1098out:
1099#ifdef CONFIG_TCP_MD5SIG_DEBUG
1100 printk(" result:");
1101 for (i = 0; i < 16; i++)
1102 printk (" %02x", (unsigned int)(((u8*)md5_hash)[i]));
1103 printk("\n");
1104#endif
1105 return 0;
1106clear_hash:
1107 tcp_put_md5sig_pool();
1108clear_hash_noput:
1109 memset(md5_hash, 0, 16);
1110 goto out;
1111}
1112
1113int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
1114 struct sock *sk,
1115 struct dst_entry *dst,
1116 struct request_sock *req,
1117 struct tcphdr *th, int protocol,
1118 int tcplen)
1119{
1120 __be32 saddr, daddr;
1121
1122 if (sk) {
1123 saddr = inet_sk(sk)->saddr;
1124 daddr = inet_sk(sk)->daddr;
1125 } else {
1126 struct rtable *rt = (struct rtable *)dst;
1127 BUG_ON(!rt);
1128 saddr = rt->rt_src;
1129 daddr = rt->rt_dst;
1130 }
1131 return tcp_v4_do_calc_md5_hash(md5_hash, key,
1132 saddr, daddr,
1133 th, protocol, tcplen);
1134}
1135
1136EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
1137
1138static int tcp_v4_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
1139{
1140 /*
1141 * This gets called for each TCP segment that arrives
1142 * so we want to be efficient.
1143 * We have 3 drop cases:
1144 * o No MD5 hash and one expected.
1145 * o MD5 hash and we're not expecting one.
1146 * o MD5 hash and its wrong.
1147 */
1148 __u8 *hash_location = NULL;
1149 struct tcp_md5sig_key *hash_expected;
1150 struct iphdr *iph = skb->nh.iph;
1151 struct tcphdr *th = skb->h.th;
1152 int length = (th->doff << 2) - sizeof (struct tcphdr);
1153 int genhash;
1154 unsigned char *ptr;
1155 unsigned char newhash[16];
1156
1157 hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
1158
1159 /*
1160 * If the TCP option length is less than the TCP_MD5SIG
1161 * option length, then we can shortcut
1162 */
1163 if (length < TCPOLEN_MD5SIG) {
1164 if (hash_expected)
1165 return 1;
1166 else
1167 return 0;
1168 }
1169
1170 /* Okay, we can't shortcut - we have to grub through the options */
1171 ptr = (unsigned char *)(th + 1);
1172 while (length > 0) {
1173 int opcode = *ptr++;
1174 int opsize;
1175
1176 switch (opcode) {
1177 case TCPOPT_EOL:
1178 goto done_opts;
1179 case TCPOPT_NOP:
1180 length--;
1181 continue;
1182 default:
1183 opsize = *ptr++;
1184 if (opsize < 2)
1185 goto done_opts;
1186 if (opsize > length)
1187 goto done_opts;
1188
1189 if (opcode == TCPOPT_MD5SIG) {
1190 hash_location = ptr;
1191 goto done_opts;
1192 }
1193 }
1194 ptr += opsize-2;
1195 length -= opsize;
1196 }
1197done_opts:
1198 /* We've parsed the options - do we have a hash? */
1199 if (!hash_expected && !hash_location)
1200 return 0;
1201
1202 if (hash_expected && !hash_location) {
1203 if (net_ratelimit()) {
1204 printk(KERN_INFO "MD5 Hash NOT expected but found "
1205 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1206 NIPQUAD (iph->saddr), ntohs(th->source),
1207 NIPQUAD (iph->daddr), ntohs(th->dest));
1208 }
1209 return 1;
1210 }
1211
1212 if (!hash_expected && hash_location) {
1213 if (net_ratelimit()) {
1214 printk(KERN_INFO "MD5 Hash NOT expected but found "
1215 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n",
1216 NIPQUAD (iph->saddr), ntohs(th->source),
1217 NIPQUAD (iph->daddr), ntohs(th->dest));
1218 }
1219 return 1;
1220 }
1221
1222 /* Okay, so this is hash_expected and hash_location -
1223 * so we need to calculate the checksum.
1224 */
1225 genhash = tcp_v4_do_calc_md5_hash(newhash,
1226 hash_expected,
1227 iph->saddr, iph->daddr,
1228 th, sk->sk_protocol,
1229 skb->len);
1230
1231 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1232 if (net_ratelimit()) {
1233 printk(KERN_INFO "MD5 Hash failed for "
1234 "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n",
1235 NIPQUAD (iph->saddr), ntohs(th->source),
1236 NIPQUAD (iph->daddr), ntohs(th->dest),
1237 genhash ? " tcp_v4_calc_md5_hash failed" : "");
1238#ifdef CONFIG_TCP_MD5SIG_DEBUG
1239 do {
1240 int i;
1241 printk("Received: ");
1242 for (i = 0; i < 16; i++)
1243 printk("%02x ", 0xff & (int)hash_location[i]);
1244 printk("\n");
1245 printk("Calculated: ");
1246 for (i = 0; i < 16; i++)
1247 printk("%02x ", 0xff & (int)newhash[i]);
1248 printk("\n");
1249 } while(0);
1250#endif
1251 }
1252 return 1;
1253 }
1254 return 0;
1255}
1256
1257#endif
1258
72a3effa 1259struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1da177e4 1260 .family = PF_INET,
2e6599cb 1261 .obj_size = sizeof(struct tcp_request_sock),
1da177e4 1262 .rtx_syn_ack = tcp_v4_send_synack,
60236fdd
ACM
1263 .send_ack = tcp_v4_reqsk_send_ack,
1264 .destructor = tcp_v4_reqsk_destructor,
1da177e4
LT
1265 .send_reset = tcp_v4_send_reset,
1266};
1267
cfb6eeb4
YH
1268struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1269#ifdef CONFIG_TCP_MD5SIG
1270 .md5_lookup = tcp_v4_reqsk_md5_lookup,
1271#endif
1272};
1273
6d6ee43e
ACM
1274static struct timewait_sock_ops tcp_timewait_sock_ops = {
1275 .twsk_obj_size = sizeof(struct tcp_timewait_sock),
1276 .twsk_unique = tcp_twsk_unique,
cfb6eeb4 1277 .twsk_destructor= tcp_twsk_destructor,
6d6ee43e
ACM
1278};
1279
1da177e4
LT
1280int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1281{
2e6599cb 1282 struct inet_request_sock *ireq;
1da177e4 1283 struct tcp_options_received tmp_opt;
60236fdd 1284 struct request_sock *req;
adaf345b
AV
1285 __be32 saddr = skb->nh.iph->saddr;
1286 __be32 daddr = skb->nh.iph->daddr;
1da177e4
LT
1287 __u32 isn = TCP_SKB_CB(skb)->when;
1288 struct dst_entry *dst = NULL;
1289#ifdef CONFIG_SYN_COOKIES
1290 int want_cookie = 0;
1291#else
1292#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1293#endif
1294
1295 /* Never answer to SYNs send to broadcast or multicast */
1296 if (((struct rtable *)skb->dst)->rt_flags &
1297 (RTCF_BROADCAST | RTCF_MULTICAST))
1298 goto drop;
1299
1300 /* TW buckets are converted to open requests without
1301 * limitations, they conserve resources and peer is
1302 * evidently real one.
1303 */
463c84b9 1304 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1da177e4
LT
1305#ifdef CONFIG_SYN_COOKIES
1306 if (sysctl_tcp_syncookies) {
1307 want_cookie = 1;
1308 } else
1309#endif
1310 goto drop;
1311 }
1312
1313 /* Accept backlog is full. If we have already queued enough
1314 * of warm entries in syn queue, drop request. It is better than
1315 * clogging syn queue with openreqs with exponentially increasing
1316 * timeout.
1317 */
463c84b9 1318 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1da177e4
LT
1319 goto drop;
1320
60236fdd 1321 req = reqsk_alloc(&tcp_request_sock_ops);
1da177e4
LT
1322 if (!req)
1323 goto drop;
1324
cfb6eeb4
YH
1325#ifdef CONFIG_TCP_MD5SIG
1326 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
1327#endif
1328
1da177e4
LT
1329 tcp_clear_options(&tmp_opt);
1330 tmp_opt.mss_clamp = 536;
1331 tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss;
1332
1333 tcp_parse_options(skb, &tmp_opt, 0);
1334
1335 if (want_cookie) {
1336 tcp_clear_options(&tmp_opt);
1337 tmp_opt.saw_tstamp = 0;
1338 }
1339
1340 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1341 /* Some OSes (unknown ones, but I see them on web server, which
1342 * contains information interesting only for windows'
1343 * users) do not send their stamp in SYN. It is easy case.
1344 * We simply do not advertise TS support.
1345 */
1346 tmp_opt.saw_tstamp = 0;
1347 tmp_opt.tstamp_ok = 0;
1348 }
1349 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1350
1351 tcp_openreq_init(req, &tmp_opt, skb);
1352
4237c75c
VY
1353 if (security_inet_conn_request(sk, skb, req))
1354 goto drop_and_free;
1355
2e6599cb
ACM
1356 ireq = inet_rsk(req);
1357 ireq->loc_addr = daddr;
1358 ireq->rmt_addr = saddr;
1359 ireq->opt = tcp_v4_save_options(sk, skb);
1da177e4
LT
1360 if (!want_cookie)
1361 TCP_ECN_create_request(req, skb->h.th);
1362
1363 if (want_cookie) {
1364#ifdef CONFIG_SYN_COOKIES
1365 syn_flood_warning(skb);
1366#endif
1367 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1368 } else if (!isn) {
1369 struct inet_peer *peer = NULL;
1370
1371 /* VJ's idea. We save last timestamp seen
1372 * from the destination in peer table, when entering
1373 * state TIME-WAIT, and check against it before
1374 * accepting new connection request.
1375 *
1376 * If "isn" is not zero, this request hit alive
1377 * timewait bucket, so that all the necessary checks
1378 * are made in the function processing timewait state.
1379 */
1380 if (tmp_opt.saw_tstamp &&
295ff7ed 1381 tcp_death_row.sysctl_tw_recycle &&
463c84b9 1382 (dst = inet_csk_route_req(sk, req)) != NULL &&
1da177e4
LT
1383 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1384 peer->v4daddr == saddr) {
1385 if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
1386 (s32)(peer->tcp_ts - req->ts_recent) >
1387 TCP_PAWS_WINDOW) {
1388 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
1389 dst_release(dst);
1390 goto drop_and_free;
1391 }
1392 }
1393 /* Kill the following clause, if you dislike this way. */
1394 else if (!sysctl_tcp_syncookies &&
463c84b9 1395 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1da177e4
LT
1396 (sysctl_max_syn_backlog >> 2)) &&
1397 (!peer || !peer->tcp_ts_stamp) &&
1398 (!dst || !dst_metric(dst, RTAX_RTT))) {
1399 /* Without syncookies last quarter of
1400 * backlog is filled with destinations,
1401 * proven to be alive.
1402 * It means that we continue to communicate
1403 * to destinations, already remembered
1404 * to the moment of synflood.
1405 */
64ce2073
PM
1406 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open "
1407 "request from %u.%u.%u.%u/%u\n",
1408 NIPQUAD(saddr),
1409 ntohs(skb->h.th->source));
1da177e4
LT
1410 dst_release(dst);
1411 goto drop_and_free;
1412 }
1413
a94f723d 1414 isn = tcp_v4_init_sequence(skb);
1da177e4 1415 }
2e6599cb 1416 tcp_rsk(req)->snt_isn = isn;
1da177e4
LT
1417
1418 if (tcp_v4_send_synack(sk, req, dst))
1419 goto drop_and_free;
1420
1421 if (want_cookie) {
60236fdd 1422 reqsk_free(req);
1da177e4 1423 } else {
3f421baa 1424 inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1da177e4
LT
1425 }
1426 return 0;
1427
1428drop_and_free:
60236fdd 1429 reqsk_free(req);
1da177e4 1430drop:
1da177e4
LT
1431 return 0;
1432}
1433
1434
1435/*
1436 * The three way handshake has completed - we got a valid synack -
1437 * now create the new socket.
1438 */
1439struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
60236fdd 1440 struct request_sock *req,
1da177e4
LT
1441 struct dst_entry *dst)
1442{
2e6599cb 1443 struct inet_request_sock *ireq;
1da177e4
LT
1444 struct inet_sock *newinet;
1445 struct tcp_sock *newtp;
1446 struct sock *newsk;
cfb6eeb4
YH
1447#ifdef CONFIG_TCP_MD5SIG
1448 struct tcp_md5sig_key *key;
1449#endif
1da177e4
LT
1450
1451 if (sk_acceptq_is_full(sk))
1452 goto exit_overflow;
1453
463c84b9 1454 if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL)
1da177e4
LT
1455 goto exit;
1456
1457 newsk = tcp_create_openreq_child(sk, req, skb);
1458 if (!newsk)
1459 goto exit;
1460
bcd76111 1461 newsk->sk_gso_type = SKB_GSO_TCPV4;
6cbb0df7 1462 sk_setup_caps(newsk, dst);
1da177e4
LT
1463
1464 newtp = tcp_sk(newsk);
1465 newinet = inet_sk(newsk);
2e6599cb
ACM
1466 ireq = inet_rsk(req);
1467 newinet->daddr = ireq->rmt_addr;
1468 newinet->rcv_saddr = ireq->loc_addr;
1469 newinet->saddr = ireq->loc_addr;
1470 newinet->opt = ireq->opt;
1471 ireq->opt = NULL;
463c84b9 1472 newinet->mc_index = inet_iif(skb);
1da177e4 1473 newinet->mc_ttl = skb->nh.iph->ttl;
d83d8461 1474 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1da177e4 1475 if (newinet->opt)
d83d8461 1476 inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
1da177e4
LT
1477 newinet->id = newtp->write_seq ^ jiffies;
1478
5d424d5a 1479 tcp_mtup_init(newsk);
1da177e4
LT
1480 tcp_sync_mss(newsk, dst_mtu(dst));
1481 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1482 tcp_initialize_rcv_mss(newsk);
1483
cfb6eeb4
YH
1484#ifdef CONFIG_TCP_MD5SIG
1485 /* Copy over the MD5 key from the original socket */
1486 if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) {
1487 /*
1488 * We're using one, so create a matching key
1489 * on the newsk structure. If we fail to get
1490 * memory, then we end up not copying the key
1491 * across. Shucks.
1492 */
1493 char *newkey = kmalloc(key->keylen, GFP_ATOMIC);
1494 if (newkey) {
1495 memcpy(newkey, key->key, key->keylen);
1496 tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
1497 newkey, key->keylen);
1498 }
1499 }
1500#endif
1501
f3f05f70 1502 __inet_hash(&tcp_hashinfo, newsk, 0);
2d8c4ce5 1503 __inet_inherit_port(&tcp_hashinfo, sk, newsk);
1da177e4
LT
1504
1505 return newsk;
1506
1507exit_overflow:
1508 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1509exit:
1510 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1511 dst_release(dst);
1512 return NULL;
1513}
1514
1515static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1516{
1517 struct tcphdr *th = skb->h.th;
1518 struct iphdr *iph = skb->nh.iph;
1da177e4 1519 struct sock *nsk;
60236fdd 1520 struct request_sock **prev;
1da177e4 1521 /* Find possible connection requests. */
463c84b9
ACM
1522 struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
1523 iph->saddr, iph->daddr);
1da177e4
LT
1524 if (req)
1525 return tcp_check_req(sk, skb, req, prev);
1526
8f491069
HX
1527 nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
1528 th->source, skb->nh.iph->daddr,
1529 th->dest, inet_iif(skb));
1da177e4
LT
1530
1531 if (nsk) {
1532 if (nsk->sk_state != TCP_TIME_WAIT) {
1533 bh_lock_sock(nsk);
1534 return nsk;
1535 }
9469c7b4 1536 inet_twsk_put(inet_twsk(nsk));
1da177e4
LT
1537 return NULL;
1538 }
1539
1540#ifdef CONFIG_SYN_COOKIES
1541 if (!th->rst && !th->syn && th->ack)
1542 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1543#endif
1544 return sk;
1545}
1546
b51655b9 1547static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
1da177e4 1548{
84fa7933 1549 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1da177e4 1550 if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr,
fb286bb2
HX
1551 skb->nh.iph->daddr, skb->csum)) {
1552 skb->ip_summed = CHECKSUM_UNNECESSARY;
1da177e4 1553 return 0;
fb286bb2 1554 }
1da177e4 1555 }
fb286bb2
HX
1556
1557 skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, skb->nh.iph->daddr,
1558 skb->len, IPPROTO_TCP, 0);
1559
1da177e4 1560 if (skb->len <= 76) {
fb286bb2 1561 return __skb_checksum_complete(skb);
1da177e4
LT
1562 }
1563 return 0;
1564}
1565
1566
1567/* The socket must have it's spinlock held when we get
1568 * here.
1569 *
1570 * We have a potential double-lock case here, so even when
1571 * doing backlog processing we use the BH locking scheme.
1572 * This is because we cannot sleep with the original spinlock
1573 * held.
1574 */
1575int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1576{
cfb6eeb4
YH
1577 struct sock *rsk;
1578#ifdef CONFIG_TCP_MD5SIG
1579 /*
1580 * We really want to reject the packet as early as possible
1581 * if:
1582 * o We're expecting an MD5'd packet and this is no MD5 tcp option
1583 * o There is an MD5 option and we're not expecting one
1584 */
1585 if (tcp_v4_inbound_md5_hash (sk, skb))
1586 goto discard;
1587#endif
1588
1da177e4
LT
1589 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1590 TCP_CHECK_TIMER(sk);
cfb6eeb4
YH
1591 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len)) {
1592 rsk = sk;
1da177e4 1593 goto reset;
cfb6eeb4 1594 }
1da177e4
LT
1595 TCP_CHECK_TIMER(sk);
1596 return 0;
1597 }
1598
1599 if (skb->len < (skb->h.th->doff << 2) || tcp_checksum_complete(skb))
1600 goto csum_err;
1601
1602 if (sk->sk_state == TCP_LISTEN) {
1603 struct sock *nsk = tcp_v4_hnd_req(sk, skb);
1604 if (!nsk)
1605 goto discard;
1606
1607 if (nsk != sk) {
cfb6eeb4
YH
1608 if (tcp_child_process(sk, nsk, skb)) {
1609 rsk = nsk;
1da177e4 1610 goto reset;
cfb6eeb4 1611 }
1da177e4
LT
1612 return 0;
1613 }
1614 }
1615
1616 TCP_CHECK_TIMER(sk);
cfb6eeb4
YH
1617 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len)) {
1618 rsk = sk;
1da177e4 1619 goto reset;
cfb6eeb4 1620 }
1da177e4
LT
1621 TCP_CHECK_TIMER(sk);
1622 return 0;
1623
1624reset:
cfb6eeb4 1625 tcp_v4_send_reset(rsk, skb);
1da177e4
LT
1626discard:
1627 kfree_skb(skb);
1628 /* Be careful here. If this function gets more complicated and
1629 * gcc suffers from register pressure on the x86, sk (in %ebx)
1630 * might be destroyed here. This current version compiles correctly,
1631 * but you have been warned.
1632 */
1633 return 0;
1634
1635csum_err:
1636 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1637 goto discard;
1638}
1639
1640/*
1641 * From tcp_input.c
1642 */
1643
1644int tcp_v4_rcv(struct sk_buff *skb)
1645{
1646 struct tcphdr *th;
1647 struct sock *sk;
1648 int ret;
1649
1650 if (skb->pkt_type != PACKET_HOST)
1651 goto discard_it;
1652
1653 /* Count it even if it's bad */
1654 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1655
1656 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1657 goto discard_it;
1658
1659 th = skb->h.th;
1660
1661 if (th->doff < sizeof(struct tcphdr) / 4)
1662 goto bad_packet;
1663 if (!pskb_may_pull(skb, th->doff * 4))
1664 goto discard_it;
1665
1666 /* An explanation is required here, I think.
1667 * Packet length and doff are validated by header prediction,
caa20d9a 1668 * provided case of th->doff==0 is eliminated.
1da177e4
LT
1669 * So, we defer the checks. */
1670 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
fb286bb2 1671 tcp_v4_checksum_init(skb)))
1da177e4
LT
1672 goto bad_packet;
1673
1674 th = skb->h.th;
1675 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1676 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1677 skb->len - th->doff * 4);
1678 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1679 TCP_SKB_CB(skb)->when = 0;
1680 TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
1681 TCP_SKB_CB(skb)->sacked = 0;
1682
e48c414e 1683 sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
8f491069 1684 skb->nh.iph->daddr, th->dest,
463c84b9 1685 inet_iif(skb));
1da177e4
LT
1686
1687 if (!sk)
1688 goto no_tcp_socket;
1689
1690process:
1691 if (sk->sk_state == TCP_TIME_WAIT)
1692 goto do_time_wait;
1693
1694 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1695 goto discard_and_relse;
b59c2701 1696 nf_reset(skb);
1da177e4 1697
fda9ef5d 1698 if (sk_filter(sk, skb))
1da177e4
LT
1699 goto discard_and_relse;
1700
1701 skb->dev = NULL;
1702
c6366184 1703 bh_lock_sock_nested(sk);
1da177e4
LT
1704 ret = 0;
1705 if (!sock_owned_by_user(sk)) {
1a2449a8
CL
1706#ifdef CONFIG_NET_DMA
1707 struct tcp_sock *tp = tcp_sk(sk);
1708 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1709 tp->ucopy.dma_chan = get_softnet_dma();
1710 if (tp->ucopy.dma_chan)
1da177e4 1711 ret = tcp_v4_do_rcv(sk, skb);
1a2449a8
CL
1712 else
1713#endif
1714 {
1715 if (!tcp_prequeue(sk, skb))
1716 ret = tcp_v4_do_rcv(sk, skb);
1717 }
1da177e4
LT
1718 } else
1719 sk_add_backlog(sk, skb);
1720 bh_unlock_sock(sk);
1721
1722 sock_put(sk);
1723
1724 return ret;
1725
1726no_tcp_socket:
1727 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1728 goto discard_it;
1729
1730 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1731bad_packet:
1732 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1733 } else {
cfb6eeb4 1734 tcp_v4_send_reset(NULL, skb);
1da177e4
LT
1735 }
1736
1737discard_it:
1738 /* Discard frame. */
1739 kfree_skb(skb);
1740 return 0;
1741
1742discard_and_relse:
1743 sock_put(sk);
1744 goto discard_it;
1745
1746do_time_wait:
1747 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
9469c7b4 1748 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1749 goto discard_it;
1750 }
1751
1752 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
1753 TCP_INC_STATS_BH(TCP_MIB_INERRS);
9469c7b4 1754 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1755 goto discard_it;
1756 }
9469c7b4 1757 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1da177e4 1758 case TCP_TW_SYN: {
33b62231
ACM
1759 struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
1760 skb->nh.iph->daddr,
8f491069 1761 th->dest,
463c84b9 1762 inet_iif(skb));
1da177e4 1763 if (sk2) {
9469c7b4
YH
1764 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
1765 inet_twsk_put(inet_twsk(sk));
1da177e4
LT
1766 sk = sk2;
1767 goto process;
1768 }
1769 /* Fall through to ACK */
1770 }
1771 case TCP_TW_ACK:
1772 tcp_v4_timewait_ack(sk, skb);
1773 break;
1774 case TCP_TW_RST:
1775 goto no_tcp_socket;
1776 case TCP_TW_SUCCESS:;
1777 }
1778 goto discard_it;
1779}
1780
1da177e4
LT
1781/* VJ's idea. Save last timestamp seen from this destination
1782 * and hold it at least for normal timewait interval to use for duplicate
1783 * segment detection in subsequent connections, before they enter synchronized
1784 * state.
1785 */
1786
1787int tcp_v4_remember_stamp(struct sock *sk)
1788{
1789 struct inet_sock *inet = inet_sk(sk);
1790 struct tcp_sock *tp = tcp_sk(sk);
1791 struct rtable *rt = (struct rtable *)__sk_dst_get(sk);
1792 struct inet_peer *peer = NULL;
1793 int release_it = 0;
1794
1795 if (!rt || rt->rt_dst != inet->daddr) {
1796 peer = inet_getpeer(inet->daddr, 1);
1797 release_it = 1;
1798 } else {
1799 if (!rt->peer)
1800 rt_bind_peer(rt, 1);
1801 peer = rt->peer;
1802 }
1803
1804 if (peer) {
1805 if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 ||
1806 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
1807 peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) {
1808 peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp;
1809 peer->tcp_ts = tp->rx_opt.ts_recent;
1810 }
1811 if (release_it)
1812 inet_putpeer(peer);
1813 return 1;
1814 }
1815
1816 return 0;
1817}
1818
8feaf0c0 1819int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1da177e4 1820{
8feaf0c0 1821 struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
1da177e4
LT
1822
1823 if (peer) {
8feaf0c0
ACM
1824 const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
1825
1826 if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 ||
1da177e4 1827 (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec &&
8feaf0c0
ACM
1828 peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) {
1829 peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp;
1830 peer->tcp_ts = tcptw->tw_ts_recent;
1da177e4
LT
1831 }
1832 inet_putpeer(peer);
1833 return 1;
1834 }
1835
1836 return 0;
1837}
1838
8292a17a 1839struct inet_connection_sock_af_ops ipv4_specific = {
543d9cfe
ACM
1840 .queue_xmit = ip_queue_xmit,
1841 .send_check = tcp_v4_send_check,
1842 .rebuild_header = inet_sk_rebuild_header,
1843 .conn_request = tcp_v4_conn_request,
1844 .syn_recv_sock = tcp_v4_syn_recv_sock,
1845 .remember_stamp = tcp_v4_remember_stamp,
1846 .net_header_len = sizeof(struct iphdr),
1847 .setsockopt = ip_setsockopt,
1848 .getsockopt = ip_getsockopt,
1849 .addr2sockaddr = inet_csk_addr2sockaddr,
1850 .sockaddr_len = sizeof(struct sockaddr_in),
3fdadf7d 1851#ifdef CONFIG_COMPAT
543d9cfe
ACM
1852 .compat_setsockopt = compat_ip_setsockopt,
1853 .compat_getsockopt = compat_ip_getsockopt,
3fdadf7d 1854#endif
1da177e4
LT
1855};
1856
cfb6eeb4
YH
1857struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1858#ifdef CONFIG_TCP_MD5SIG
1859 .md5_lookup = tcp_v4_md5_lookup,
1860 .calc_md5_hash = tcp_v4_calc_md5_hash,
1861 .md5_add = tcp_v4_md5_add_func,
1862 .md5_parse = tcp_v4_parse_md5_keys,
1863#endif
1864};
1865
1da177e4
LT
1866/* NOTE: A lot of things set to zero explicitly by call to
1867 * sk_alloc() so need not be done here.
1868 */
1869static int tcp_v4_init_sock(struct sock *sk)
1870{
6687e988 1871 struct inet_connection_sock *icsk = inet_csk(sk);
1da177e4
LT
1872 struct tcp_sock *tp = tcp_sk(sk);
1873
1874 skb_queue_head_init(&tp->out_of_order_queue);
1875 tcp_init_xmit_timers(sk);
1876 tcp_prequeue_init(tp);
1877
6687e988 1878 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1da177e4
LT
1879 tp->mdev = TCP_TIMEOUT_INIT;
1880
1881 /* So many TCP implementations out there (incorrectly) count the
1882 * initial SYN frame in their delayed-ACK and congestion control
1883 * algorithms that we must have the following bandaid to talk
1884 * efficiently to them. -DaveM
1885 */
1886 tp->snd_cwnd = 2;
1887
1888 /* See draft-stevens-tcpca-spec-01 for discussion of the
1889 * initialization of these values.
1890 */
1891 tp->snd_ssthresh = 0x7fffffff; /* Infinity */
1892 tp->snd_cwnd_clamp = ~0;
c1b4a7e6 1893 tp->mss_cache = 536;
1da177e4
LT
1894
1895 tp->reordering = sysctl_tcp_reordering;
6687e988 1896 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1da177e4
LT
1897
1898 sk->sk_state = TCP_CLOSE;
1899
1900 sk->sk_write_space = sk_stream_write_space;
1901 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1902
8292a17a 1903 icsk->icsk_af_ops = &ipv4_specific;
d83d8461 1904 icsk->icsk_sync_mss = tcp_sync_mss;
cfb6eeb4
YH
1905#ifdef CONFIG_TCP_MD5SIG
1906 tp->af_specific = &tcp_sock_ipv4_specific;
1907#endif
1da177e4
LT
1908
1909 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1910 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1911
1912 atomic_inc(&tcp_sockets_allocated);
1913
1914 return 0;
1915}
1916
1917int tcp_v4_destroy_sock(struct sock *sk)
1918{
1919 struct tcp_sock *tp = tcp_sk(sk);
1920
1921 tcp_clear_xmit_timers(sk);
1922
6687e988 1923 tcp_cleanup_congestion_control(sk);
317a76f9 1924
1da177e4
LT
1925 /* Cleanup up the write buffer. */
1926 sk_stream_writequeue_purge(sk);
1927
1928 /* Cleans up our, hopefully empty, out_of_order_queue. */
1929 __skb_queue_purge(&tp->out_of_order_queue);
1930
cfb6eeb4
YH
1931#ifdef CONFIG_TCP_MD5SIG
1932 /* Clean up the MD5 key list, if any */
1933 if (tp->md5sig_info) {
1934 tcp_v4_clear_md5_list(sk);
1935 kfree(tp->md5sig_info);
1936 tp->md5sig_info = NULL;
1937 }
1938#endif
1939
1a2449a8
CL
1940#ifdef CONFIG_NET_DMA
1941 /* Cleans up our sk_async_wait_queue */
1942 __skb_queue_purge(&sk->sk_async_wait_queue);
1943#endif
1944
1da177e4
LT
1945 /* Clean prequeue, it must be empty really */
1946 __skb_queue_purge(&tp->ucopy.prequeue);
1947
1948 /* Clean up a referenced TCP bind bucket. */
463c84b9 1949 if (inet_csk(sk)->icsk_bind_hash)
2d8c4ce5 1950 inet_put_port(&tcp_hashinfo, sk);
1da177e4
LT
1951
1952 /*
1953 * If sendmsg cached page exists, toss it.
1954 */
1955 if (sk->sk_sndmsg_page) {
1956 __free_page(sk->sk_sndmsg_page);
1957 sk->sk_sndmsg_page = NULL;
1958 }
1959
1960 atomic_dec(&tcp_sockets_allocated);
1961
1962 return 0;
1963}
1964
1965EXPORT_SYMBOL(tcp_v4_destroy_sock);
1966
1967#ifdef CONFIG_PROC_FS
1968/* Proc filesystem TCP sock list dumping. */
1969
8feaf0c0 1970static inline struct inet_timewait_sock *tw_head(struct hlist_head *head)
1da177e4
LT
1971{
1972 return hlist_empty(head) ? NULL :
8feaf0c0 1973 list_entry(head->first, struct inet_timewait_sock, tw_node);
1da177e4
LT
1974}
1975
8feaf0c0 1976static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1da177e4
LT
1977{
1978 return tw->tw_node.next ?
1979 hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1980}
1981
1982static void *listening_get_next(struct seq_file *seq, void *cur)
1983{
463c84b9 1984 struct inet_connection_sock *icsk;
1da177e4
LT
1985 struct hlist_node *node;
1986 struct sock *sk = cur;
1987 struct tcp_iter_state* st = seq->private;
1988
1989 if (!sk) {
1990 st->bucket = 0;
6e04e021 1991 sk = sk_head(&tcp_hashinfo.listening_hash[0]);
1da177e4
LT
1992 goto get_sk;
1993 }
1994
1995 ++st->num;
1996
1997 if (st->state == TCP_SEQ_STATE_OPENREQ) {
60236fdd 1998 struct request_sock *req = cur;
1da177e4 1999
72a3effa 2000 icsk = inet_csk(st->syn_wait_sk);
1da177e4
LT
2001 req = req->dl_next;
2002 while (1) {
2003 while (req) {
60236fdd 2004 if (req->rsk_ops->family == st->family) {
1da177e4
LT
2005 cur = req;
2006 goto out;
2007 }
2008 req = req->dl_next;
2009 }
72a3effa 2010 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
1da177e4
LT
2011 break;
2012get_req:
463c84b9 2013 req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
1da177e4
LT
2014 }
2015 sk = sk_next(st->syn_wait_sk);
2016 st->state = TCP_SEQ_STATE_LISTENING;
463c84b9 2017 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2018 } else {
463c84b9
ACM
2019 icsk = inet_csk(sk);
2020 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2021 if (reqsk_queue_len(&icsk->icsk_accept_queue))
1da177e4 2022 goto start_req;
463c84b9 2023 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2024 sk = sk_next(sk);
2025 }
2026get_sk:
2027 sk_for_each_from(sk, node) {
2028 if (sk->sk_family == st->family) {
2029 cur = sk;
2030 goto out;
2031 }
463c84b9
ACM
2032 icsk = inet_csk(sk);
2033 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2034 if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
1da177e4
LT
2035start_req:
2036 st->uid = sock_i_uid(sk);
2037 st->syn_wait_sk = sk;
2038 st->state = TCP_SEQ_STATE_OPENREQ;
2039 st->sbucket = 0;
2040 goto get_req;
2041 }
463c84b9 2042 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4 2043 }
0f7ff927 2044 if (++st->bucket < INET_LHTABLE_SIZE) {
6e04e021 2045 sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]);
1da177e4
LT
2046 goto get_sk;
2047 }
2048 cur = NULL;
2049out:
2050 return cur;
2051}
2052
2053static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2054{
2055 void *rc = listening_get_next(seq, NULL);
2056
2057 while (rc && *pos) {
2058 rc = listening_get_next(seq, rc);
2059 --*pos;
2060 }
2061 return rc;
2062}
2063
2064static void *established_get_first(struct seq_file *seq)
2065{
2066 struct tcp_iter_state* st = seq->private;
2067 void *rc = NULL;
2068
6e04e021 2069 for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) {
1da177e4
LT
2070 struct sock *sk;
2071 struct hlist_node *node;
8feaf0c0 2072 struct inet_timewait_sock *tw;
1da177e4
LT
2073
2074 /* We can reschedule _before_ having picked the target: */
2075 cond_resched_softirq();
2076
6e04e021
ACM
2077 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2078 sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1da177e4
LT
2079 if (sk->sk_family != st->family) {
2080 continue;
2081 }
2082 rc = sk;
2083 goto out;
2084 }
2085 st->state = TCP_SEQ_STATE_TIME_WAIT;
8feaf0c0
ACM
2086 inet_twsk_for_each(tw, node,
2087 &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) {
1da177e4
LT
2088 if (tw->tw_family != st->family) {
2089 continue;
2090 }
2091 rc = tw;
2092 goto out;
2093 }
6e04e021 2094 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
1da177e4
LT
2095 st->state = TCP_SEQ_STATE_ESTABLISHED;
2096 }
2097out:
2098 return rc;
2099}
2100
2101static void *established_get_next(struct seq_file *seq, void *cur)
2102{
2103 struct sock *sk = cur;
8feaf0c0 2104 struct inet_timewait_sock *tw;
1da177e4
LT
2105 struct hlist_node *node;
2106 struct tcp_iter_state* st = seq->private;
2107
2108 ++st->num;
2109
2110 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2111 tw = cur;
2112 tw = tw_next(tw);
2113get_tw:
2114 while (tw && tw->tw_family != st->family) {
2115 tw = tw_next(tw);
2116 }
2117 if (tw) {
2118 cur = tw;
2119 goto out;
2120 }
6e04e021 2121 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
1da177e4
LT
2122 st->state = TCP_SEQ_STATE_ESTABLISHED;
2123
2124 /* We can reschedule between buckets: */
2125 cond_resched_softirq();
2126
6e04e021
ACM
2127 if (++st->bucket < tcp_hashinfo.ehash_size) {
2128 read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
2129 sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
1da177e4
LT
2130 } else {
2131 cur = NULL;
2132 goto out;
2133 }
2134 } else
2135 sk = sk_next(sk);
2136
2137 sk_for_each_from(sk, node) {
2138 if (sk->sk_family == st->family)
2139 goto found;
2140 }
2141
2142 st->state = TCP_SEQ_STATE_TIME_WAIT;
6e04e021 2143 tw = tw_head(&tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain);
1da177e4
LT
2144 goto get_tw;
2145found:
2146 cur = sk;
2147out:
2148 return cur;
2149}
2150
2151static void *established_get_idx(struct seq_file *seq, loff_t pos)
2152{
2153 void *rc = established_get_first(seq);
2154
2155 while (rc && pos) {
2156 rc = established_get_next(seq, rc);
2157 --pos;
2158 }
2159 return rc;
2160}
2161
2162static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2163{
2164 void *rc;
2165 struct tcp_iter_state* st = seq->private;
2166
f3f05f70 2167 inet_listen_lock(&tcp_hashinfo);
1da177e4
LT
2168 st->state = TCP_SEQ_STATE_LISTENING;
2169 rc = listening_get_idx(seq, &pos);
2170
2171 if (!rc) {
f3f05f70 2172 inet_listen_unlock(&tcp_hashinfo);
1da177e4
LT
2173 local_bh_disable();
2174 st->state = TCP_SEQ_STATE_ESTABLISHED;
2175 rc = established_get_idx(seq, pos);
2176 }
2177
2178 return rc;
2179}
2180
2181static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2182{
2183 struct tcp_iter_state* st = seq->private;
2184 st->state = TCP_SEQ_STATE_LISTENING;
2185 st->num = 0;
2186 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2187}
2188
2189static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2190{
2191 void *rc = NULL;
2192 struct tcp_iter_state* st;
2193
2194 if (v == SEQ_START_TOKEN) {
2195 rc = tcp_get_idx(seq, 0);
2196 goto out;
2197 }
2198 st = seq->private;
2199
2200 switch (st->state) {
2201 case TCP_SEQ_STATE_OPENREQ:
2202 case TCP_SEQ_STATE_LISTENING:
2203 rc = listening_get_next(seq, v);
2204 if (!rc) {
f3f05f70 2205 inet_listen_unlock(&tcp_hashinfo);
1da177e4
LT
2206 local_bh_disable();
2207 st->state = TCP_SEQ_STATE_ESTABLISHED;
2208 rc = established_get_first(seq);
2209 }
2210 break;
2211 case TCP_SEQ_STATE_ESTABLISHED:
2212 case TCP_SEQ_STATE_TIME_WAIT:
2213 rc = established_get_next(seq, v);
2214 break;
2215 }
2216out:
2217 ++*pos;
2218 return rc;
2219}
2220
2221static void tcp_seq_stop(struct seq_file *seq, void *v)
2222{
2223 struct tcp_iter_state* st = seq->private;
2224
2225 switch (st->state) {
2226 case TCP_SEQ_STATE_OPENREQ:
2227 if (v) {
463c84b9
ACM
2228 struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
2229 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
1da177e4
LT
2230 }
2231 case TCP_SEQ_STATE_LISTENING:
2232 if (v != SEQ_START_TOKEN)
f3f05f70 2233 inet_listen_unlock(&tcp_hashinfo);
1da177e4
LT
2234 break;
2235 case TCP_SEQ_STATE_TIME_WAIT:
2236 case TCP_SEQ_STATE_ESTABLISHED:
2237 if (v)
6e04e021 2238 read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
1da177e4
LT
2239 local_bh_enable();
2240 break;
2241 }
2242}
2243
2244static int tcp_seq_open(struct inode *inode, struct file *file)
2245{
2246 struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
2247 struct seq_file *seq;
2248 struct tcp_iter_state *s;
2249 int rc;
2250
2251 if (unlikely(afinfo == NULL))
2252 return -EINVAL;
2253
0da974f4 2254 s = kzalloc(sizeof(*s), GFP_KERNEL);
1da177e4
LT
2255 if (!s)
2256 return -ENOMEM;
1da177e4
LT
2257 s->family = afinfo->family;
2258 s->seq_ops.start = tcp_seq_start;
2259 s->seq_ops.next = tcp_seq_next;
2260 s->seq_ops.show = afinfo->seq_show;
2261 s->seq_ops.stop = tcp_seq_stop;
2262
2263 rc = seq_open(file, &s->seq_ops);
2264 if (rc)
2265 goto out_kfree;
2266 seq = file->private_data;
2267 seq->private = s;
2268out:
2269 return rc;
2270out_kfree:
2271 kfree(s);
2272 goto out;
2273}
2274
2275int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
2276{
2277 int rc = 0;
2278 struct proc_dir_entry *p;
2279
2280 if (!afinfo)
2281 return -EINVAL;
2282 afinfo->seq_fops->owner = afinfo->owner;
2283 afinfo->seq_fops->open = tcp_seq_open;
2284 afinfo->seq_fops->read = seq_read;
2285 afinfo->seq_fops->llseek = seq_lseek;
2286 afinfo->seq_fops->release = seq_release_private;
2287
2288 p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
2289 if (p)
2290 p->data = afinfo;
2291 else
2292 rc = -ENOMEM;
2293 return rc;
2294}
2295
2296void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
2297{
2298 if (!afinfo)
2299 return;
2300 proc_net_remove(afinfo->name);
2301 memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
2302}
2303
60236fdd 2304static void get_openreq4(struct sock *sk, struct request_sock *req,
1da177e4
LT
2305 char *tmpbuf, int i, int uid)
2306{
2e6599cb 2307 const struct inet_request_sock *ireq = inet_rsk(req);
1da177e4
LT
2308 int ttd = req->expires - jiffies;
2309
2310 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2311 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2312 i,
2e6599cb 2313 ireq->loc_addr,
1da177e4 2314 ntohs(inet_sk(sk)->sport),
2e6599cb
ACM
2315 ireq->rmt_addr,
2316 ntohs(ireq->rmt_port),
1da177e4
LT
2317 TCP_SYN_RECV,
2318 0, 0, /* could print option size, but that is af dependent. */
2319 1, /* timers active (only the expire timer) */
2320 jiffies_to_clock_t(ttd),
2321 req->retrans,
2322 uid,
2323 0, /* non standard timer */
2324 0, /* open_requests have no inode */
2325 atomic_read(&sk->sk_refcnt),
2326 req);
2327}
2328
2329static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i)
2330{
2331 int timer_active;
2332 unsigned long timer_expires;
2333 struct tcp_sock *tp = tcp_sk(sp);
463c84b9 2334 const struct inet_connection_sock *icsk = inet_csk(sp);
1da177e4 2335 struct inet_sock *inet = inet_sk(sp);
714e85be
AV
2336 __be32 dest = inet->daddr;
2337 __be32 src = inet->rcv_saddr;
1da177e4
LT
2338 __u16 destp = ntohs(inet->dport);
2339 __u16 srcp = ntohs(inet->sport);
2340
463c84b9 2341 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1da177e4 2342 timer_active = 1;
463c84b9
ACM
2343 timer_expires = icsk->icsk_timeout;
2344 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1da177e4 2345 timer_active = 4;
463c84b9 2346 timer_expires = icsk->icsk_timeout;
1da177e4
LT
2347 } else if (timer_pending(&sp->sk_timer)) {
2348 timer_active = 2;
2349 timer_expires = sp->sk_timer.expires;
2350 } else {
2351 timer_active = 0;
2352 timer_expires = jiffies;
2353 }
2354
2355 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2356 "%08X %5d %8d %lu %d %p %u %u %u %u %d",
2357 i, src, srcp, dest, destp, sp->sk_state,
47da8ee6
SS
2358 tp->write_seq - tp->snd_una,
2359 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1da177e4
LT
2360 timer_active,
2361 jiffies_to_clock_t(timer_expires - jiffies),
463c84b9 2362 icsk->icsk_retransmits,
1da177e4 2363 sock_i_uid(sp),
6687e988 2364 icsk->icsk_probes_out,
1da177e4
LT
2365 sock_i_ino(sp),
2366 atomic_read(&sp->sk_refcnt), sp,
463c84b9
ACM
2367 icsk->icsk_rto,
2368 icsk->icsk_ack.ato,
2369 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
1da177e4
LT
2370 tp->snd_cwnd,
2371 tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh);
2372}
2373
8feaf0c0 2374static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i)
1da177e4 2375{
23f33c2d 2376 __be32 dest, src;
1da177e4
LT
2377 __u16 destp, srcp;
2378 int ttd = tw->tw_ttd - jiffies;
2379
2380 if (ttd < 0)
2381 ttd = 0;
2382
2383 dest = tw->tw_daddr;
2384 src = tw->tw_rcv_saddr;
2385 destp = ntohs(tw->tw_dport);
2386 srcp = ntohs(tw->tw_sport);
2387
2388 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
2389 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2390 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2391 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2392 atomic_read(&tw->tw_refcnt), tw);
2393}
2394
2395#define TMPSZ 150
2396
2397static int tcp4_seq_show(struct seq_file *seq, void *v)
2398{
2399 struct tcp_iter_state* st;
2400 char tmpbuf[TMPSZ + 1];
2401
2402 if (v == SEQ_START_TOKEN) {
2403 seq_printf(seq, "%-*s\n", TMPSZ - 1,
2404 " sl local_address rem_address st tx_queue "
2405 "rx_queue tr tm->when retrnsmt uid timeout "
2406 "inode");
2407 goto out;
2408 }
2409 st = seq->private;
2410
2411 switch (st->state) {
2412 case TCP_SEQ_STATE_LISTENING:
2413 case TCP_SEQ_STATE_ESTABLISHED:
2414 get_tcp4_sock(v, tmpbuf, st->num);
2415 break;
2416 case TCP_SEQ_STATE_OPENREQ:
2417 get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid);
2418 break;
2419 case TCP_SEQ_STATE_TIME_WAIT:
2420 get_timewait4_sock(v, tmpbuf, st->num);
2421 break;
2422 }
2423 seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf);
2424out:
2425 return 0;
2426}
2427
2428static struct file_operations tcp4_seq_fops;
2429static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2430 .owner = THIS_MODULE,
2431 .name = "tcp",
2432 .family = AF_INET,
2433 .seq_show = tcp4_seq_show,
2434 .seq_fops = &tcp4_seq_fops,
2435};
2436
2437int __init tcp4_proc_init(void)
2438{
2439 return tcp_proc_register(&tcp4_seq_afinfo);
2440}
2441
2442void tcp4_proc_exit(void)
2443{
2444 tcp_proc_unregister(&tcp4_seq_afinfo);
2445}
2446#endif /* CONFIG_PROC_FS */
2447
2448struct proto tcp_prot = {
2449 .name = "TCP",
2450 .owner = THIS_MODULE,
2451 .close = tcp_close,
2452 .connect = tcp_v4_connect,
2453 .disconnect = tcp_disconnect,
463c84b9 2454 .accept = inet_csk_accept,
1da177e4
LT
2455 .ioctl = tcp_ioctl,
2456 .init = tcp_v4_init_sock,
2457 .destroy = tcp_v4_destroy_sock,
2458 .shutdown = tcp_shutdown,
2459 .setsockopt = tcp_setsockopt,
2460 .getsockopt = tcp_getsockopt,
2461 .sendmsg = tcp_sendmsg,
2462 .recvmsg = tcp_recvmsg,
2463 .backlog_rcv = tcp_v4_do_rcv,
2464 .hash = tcp_v4_hash,
2465 .unhash = tcp_unhash,
2466 .get_port = tcp_v4_get_port,
2467 .enter_memory_pressure = tcp_enter_memory_pressure,
2468 .sockets_allocated = &tcp_sockets_allocated,
0a5578cf 2469 .orphan_count = &tcp_orphan_count,
1da177e4
LT
2470 .memory_allocated = &tcp_memory_allocated,
2471 .memory_pressure = &tcp_memory_pressure,
2472 .sysctl_mem = sysctl_tcp_mem,
2473 .sysctl_wmem = sysctl_tcp_wmem,
2474 .sysctl_rmem = sysctl_tcp_rmem,
2475 .max_header = MAX_TCP_HEADER,
2476 .obj_size = sizeof(struct tcp_sock),
6d6ee43e 2477 .twsk_prot = &tcp_timewait_sock_ops,
60236fdd 2478 .rsk_prot = &tcp_request_sock_ops,
543d9cfe
ACM
2479#ifdef CONFIG_COMPAT
2480 .compat_setsockopt = compat_tcp_setsockopt,
2481 .compat_getsockopt = compat_tcp_getsockopt,
2482#endif
1da177e4
LT
2483};
2484
1da177e4
LT
2485void __init tcp_v4_init(struct net_proto_family *ops)
2486{
c4d93909 2487 if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, IPPROTO_TCP) < 0)
1da177e4 2488 panic("Failed to create the TCP control socket.\n");
1da177e4
LT
2489}
2490
2491EXPORT_SYMBOL(ipv4_specific);
1da177e4 2492EXPORT_SYMBOL(tcp_hashinfo);
1da177e4 2493EXPORT_SYMBOL(tcp_prot);
1da177e4
LT
2494EXPORT_SYMBOL(tcp_unhash);
2495EXPORT_SYMBOL(tcp_v4_conn_request);
2496EXPORT_SYMBOL(tcp_v4_connect);
2497EXPORT_SYMBOL(tcp_v4_do_rcv);
1da177e4
LT
2498EXPORT_SYMBOL(tcp_v4_remember_stamp);
2499EXPORT_SYMBOL(tcp_v4_send_check);
2500EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2501
2502#ifdef CONFIG_PROC_FS
2503EXPORT_SYMBOL(tcp_proc_register);
2504EXPORT_SYMBOL(tcp_proc_unregister);
2505#endif
2506EXPORT_SYMBOL(sysctl_local_port_range);
1da177e4 2507EXPORT_SYMBOL(sysctl_tcp_low_latency);
1da177e4 2508