]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/netfilter/nf_conntrack_proto_tcp.c
net: fix a race in sock_queue_err_skb()
[mirror_ubuntu-bionic-kernel.git] / net / netfilter / nf_conntrack_proto_tcp.c
CommitLineData
9fb9cbb1
YK
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
9fb9cbb1
YK
7 */
8
9fb9cbb1 9#include <linux/types.h>
9fb9cbb1 10#include <linux/timer.h>
9fb9cbb1
YK
11#include <linux/module.h>
12#include <linux/in.h>
13#include <linux/tcp.h>
14#include <linux/spinlock.h>
15#include <linux/skbuff.h>
16#include <linux/ipv6.h>
17#include <net/ip6_checksum.h>
534f81a5 18#include <asm/unaligned.h>
9fb9cbb1
YK
19
20#include <net/tcp.h>
21
22#include <linux/netfilter.h>
23#include <linux/netfilter_ipv4.h>
24#include <linux/netfilter_ipv6.h>
25#include <net/netfilter/nf_conntrack.h>
605dcad6 26#include <net/netfilter/nf_conntrack_l4proto.h>
f6180121 27#include <net/netfilter/nf_conntrack_ecache.h>
f01ffbd6 28#include <net/netfilter/nf_log.h>
9d2493f8
CP
29#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
9fb9cbb1 31
601e68e1
YH
32/* "Be conservative in what you do,
33 be liberal in what you accept from others."
9fb9cbb1 34 If it's non-zero, we mark only out of window RST segments as INVALID. */
3aef0fd9 35static int nf_ct_tcp_be_liberal __read_mostly = 0;
9fb9cbb1 36
a09113c2 37/* If it is set to zero, we disable picking up already established
9fb9cbb1 38 connections. */
3aef0fd9 39static int nf_ct_tcp_loose __read_mostly = 1;
9fb9cbb1 40
601e68e1
YH
41/* Max number of the retransmitted packets without receiving an (acceptable)
42 ACK from the destination. If this number is reached, a shorter timer
9fb9cbb1 43 will be started. */
3aef0fd9 44static int nf_ct_tcp_max_retrans __read_mostly = 3;
9fb9cbb1
YK
45
46 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
47 closely. They're more complex. --RR */
48
82f568fc 49static const char *const tcp_conntrack_names[] = {
9fb9cbb1
YK
50 "NONE",
51 "SYN_SENT",
52 "SYN_RECV",
53 "ESTABLISHED",
54 "FIN_WAIT",
55 "CLOSE_WAIT",
56 "LAST_ACK",
57 "TIME_WAIT",
58 "CLOSE",
874ab923 59 "SYN_SENT2",
9fb9cbb1 60};
601e68e1 61
9fb9cbb1
YK
62#define SECS * HZ
63#define MINS * 60 SECS
64#define HOURS * 60 MINS
65#define DAYS * 24 HOURS
66
33ee4464 67static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
2d646286
PM
68 [TCP_CONNTRACK_SYN_SENT] = 2 MINS,
69 [TCP_CONNTRACK_SYN_RECV] = 60 SECS,
70 [TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
71 [TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
72 [TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
73 [TCP_CONNTRACK_LAST_ACK] = 30 SECS,
74 [TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
75 [TCP_CONNTRACK_CLOSE] = 10 SECS,
874ab923 76 [TCP_CONNTRACK_SYN_SENT2] = 2 MINS,
33ee4464
PNA
77/* RFC1122 says the R2 limit should be at least 100 seconds.
78 Linux uses 15 packets as limit, which corresponds
79 to ~13-30min depending on RTO. */
80 [TCP_CONNTRACK_RETRANS] = 5 MINS,
81 [TCP_CONNTRACK_UNACK] = 5 MINS,
2d646286 82};
601e68e1 83
9fb9cbb1
YK
84#define sNO TCP_CONNTRACK_NONE
85#define sSS TCP_CONNTRACK_SYN_SENT
86#define sSR TCP_CONNTRACK_SYN_RECV
87#define sES TCP_CONNTRACK_ESTABLISHED
88#define sFW TCP_CONNTRACK_FIN_WAIT
89#define sCW TCP_CONNTRACK_CLOSE_WAIT
90#define sLA TCP_CONNTRACK_LAST_ACK
91#define sTW TCP_CONNTRACK_TIME_WAIT
92#define sCL TCP_CONNTRACK_CLOSE
874ab923 93#define sS2 TCP_CONNTRACK_SYN_SENT2
9fb9cbb1
YK
94#define sIV TCP_CONNTRACK_MAX
95#define sIG TCP_CONNTRACK_IGNORE
96
97/* What TCP flags are set from RST/SYN/FIN/ACK. */
98enum tcp_bit_set {
99 TCP_SYN_SET,
100 TCP_SYNACK_SET,
101 TCP_FIN_SET,
102 TCP_ACK_SET,
103 TCP_RST_SET,
104 TCP_NONE_SET,
105};
601e68e1 106
9fb9cbb1
YK
107/*
108 * The TCP state transition table needs a few words...
109 *
110 * We are the man in the middle. All the packets go through us
111 * but might get lost in transit to the destination.
601e68e1 112 * It is assumed that the destinations can't receive segments
9fb9cbb1
YK
113 * we haven't seen.
114 *
115 * The checked segment is in window, but our windows are *not*
116 * equivalent with the ones of the sender/receiver. We always
117 * try to guess the state of the current sender.
118 *
119 * The meaning of the states are:
120 *
121 * NONE: initial state
601e68e1 122 * SYN_SENT: SYN-only packet seen
874ab923 123 * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open
9fb9cbb1
YK
124 * SYN_RECV: SYN-ACK packet seen
125 * ESTABLISHED: ACK packet seen
126 * FIN_WAIT: FIN packet seen
601e68e1 127 * CLOSE_WAIT: ACK seen (after FIN)
9fb9cbb1
YK
128 * LAST_ACK: FIN seen (after FIN)
129 * TIME_WAIT: last ACK seen
b2155e7f 130 * CLOSE: closed connection (RST)
9fb9cbb1 131 *
9fb9cbb1 132 * Packets marked as IGNORED (sIG):
601e68e1
YH
133 * if they may be either invalid or valid
134 * and the receiver may send back a connection
9fb9cbb1
YK
135 * closing RST or a SYN/ACK.
136 *
137 * Packets marked as INVALID (sIV):
874ab923 138 * if we regard them as truly invalid packets
9fb9cbb1 139 */
a5e73c29 140static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
9fb9cbb1
YK
141 {
142/* ORIGINAL */
874ab923
JK
143/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
144/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
9fb9cbb1
YK
145/*
146 * sNO -> sSS Initialize a new connection
147 * sSS -> sSS Retransmitted SYN
874ab923
JK
148 * sS2 -> sS2 Late retransmitted SYN
149 * sSR -> sIG
9fb9cbb1 150 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
601e68e1 151 * are errors. Receiver will reply with RST
9fb9cbb1
YK
152 * and close the connection.
153 * Or we are not in sync and hold a dead connection.
154 * sFW -> sIG
155 * sCW -> sIG
156 * sLA -> sIG
157 * sTW -> sSS Reopened connection (RFC 1122).
158 * sCL -> sSS
159 */
874ab923
JK
160/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
161/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
9fb9cbb1 162/*
874ab923
JK
163 * sNO -> sIV Too late and no reason to do anything
164 * sSS -> sIV Client can't send SYN and then SYN/ACK
165 * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
166 * sSR -> sIG
167 * sES -> sIG Error: SYNs in window outside the SYN_SENT state
168 * are errors. Receiver will reply with RST
169 * and close the connection.
170 * Or we are not in sync and hold a dead connection.
171 * sFW -> sIG
172 * sCW -> sIG
173 * sLA -> sIG
174 * sTW -> sIG
175 * sCL -> sIG
9fb9cbb1 176 */
874ab923 177/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
9fb9cbb1
YK
178/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
179/*
180 * sNO -> sIV Too late and no reason to do anything...
181 * sSS -> sIV Client migth not send FIN in this state:
182 * we enforce waiting for a SYN/ACK reply first.
874ab923 183 * sS2 -> sIV
9fb9cbb1
YK
184 * sSR -> sFW Close started.
185 * sES -> sFW
186 * sFW -> sLA FIN seen in both directions, waiting for
601e68e1 187 * the last ACK.
9fb9cbb1
YK
188 * Migth be a retransmitted FIN as well...
189 * sCW -> sLA
190 * sLA -> sLA Retransmitted FIN. Remain in the same state.
191 * sTW -> sTW
192 * sCL -> sCL
193 */
874ab923 194/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
9fb9cbb1
YK
195/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
196/*
197 * sNO -> sES Assumed.
198 * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
874ab923 199 * sS2 -> sIV
9fb9cbb1
YK
200 * sSR -> sES Established state is reached.
201 * sES -> sES :-)
202 * sFW -> sCW Normal close request answered by ACK.
203 * sCW -> sCW
204 * sLA -> sTW Last ACK detected.
205 * sTW -> sTW Retransmitted last ACK. Remain in the same state.
206 * sCL -> sCL
207 */
874ab923
JK
208/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
209/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
9fb9cbb1
YK
210/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
211 },
212 {
213/* REPLY */
874ab923
JK
214/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
215/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
9fb9cbb1
YK
216/*
217 * sNO -> sIV Never reached.
874ab923
JK
218 * sSS -> sS2 Simultaneous open
219 * sS2 -> sS2 Retransmitted simultaneous SYN
220 * sSR -> sIV Invalid SYN packets sent by the server
221 * sES -> sIV
9fb9cbb1
YK
222 * sFW -> sIV
223 * sCW -> sIV
224 * sLA -> sIV
225 * sTW -> sIV Reopened connection, but server may not do it.
226 * sCL -> sIV
227 */
874ab923 228/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
8a80c79a 229/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
9fb9cbb1
YK
230/*
231 * sSS -> sSR Standard open.
874ab923 232 * sS2 -> sSR Simultaneous open
8a80c79a 233 * sSR -> sIG Retransmitted SYN/ACK, ignore it.
9fb9cbb1
YK
234 * sES -> sIG Late retransmitted SYN/ACK?
235 * sFW -> sIG Might be SYN/ACK answering ignored SYN
236 * sCW -> sIG
237 * sLA -> sIG
238 * sTW -> sIG
239 * sCL -> sIG
240 */
874ab923 241/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
9fb9cbb1
YK
242/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
243/*
244 * sSS -> sIV Server might not send FIN in this state.
874ab923 245 * sS2 -> sIV
9fb9cbb1
YK
246 * sSR -> sFW Close started.
247 * sES -> sFW
248 * sFW -> sLA FIN seen in both directions.
249 * sCW -> sLA
250 * sLA -> sLA Retransmitted FIN.
251 * sTW -> sTW
252 * sCL -> sCL
253 */
874ab923
JK
254/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
255/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
9fb9cbb1 256/*
73f30602 257 * sSS -> sIG Might be a half-open connection.
874ab923 258 * sS2 -> sIG
9fb9cbb1
YK
259 * sSR -> sSR Might answer late resent SYN.
260 * sES -> sES :-)
261 * sFW -> sCW Normal close request answered by ACK.
262 * sCW -> sCW
263 * sLA -> sTW Last ACK detected.
264 * sTW -> sTW Retransmitted last ACK.
265 * sCL -> sCL
266 */
874ab923
JK
267/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
268/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
9fb9cbb1 269/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
601e68e1 270 }
9fb9cbb1
YK
271};
272
09f263cd
JE
273static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
274 struct nf_conntrack_tuple *tuple)
9fb9cbb1 275{
82f568fc
JE
276 const struct tcphdr *hp;
277 struct tcphdr _hdr;
9fb9cbb1
YK
278
279 /* Actually only need first 8 bytes. */
280 hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
281 if (hp == NULL)
09f263cd 282 return false;
9fb9cbb1
YK
283
284 tuple->src.u.tcp.port = hp->source;
285 tuple->dst.u.tcp.port = hp->dest;
286
09f263cd 287 return true;
9fb9cbb1
YK
288}
289
09f263cd
JE
290static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
291 const struct nf_conntrack_tuple *orig)
9fb9cbb1
YK
292{
293 tuple->src.u.tcp.port = orig->dst.u.tcp.port;
294 tuple->dst.u.tcp.port = orig->src.u.tcp.port;
09f263cd 295 return true;
9fb9cbb1
YK
296}
297
298/* Print out the per-protocol part of the tuple. */
299static int tcp_print_tuple(struct seq_file *s,
300 const struct nf_conntrack_tuple *tuple)
301{
302 return seq_printf(s, "sport=%hu dport=%hu ",
303 ntohs(tuple->src.u.tcp.port),
304 ntohs(tuple->dst.u.tcp.port));
305}
306
307/* Print out the private part of the conntrack. */
440f0d58 308static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
9fb9cbb1
YK
309{
310 enum tcp_conntrack state;
311
440f0d58 312 spin_lock_bh(&ct->lock);
c88130bc 313 state = ct->proto.tcp.state;
440f0d58 314 spin_unlock_bh(&ct->lock);
9fb9cbb1
YK
315
316 return seq_printf(s, "%s ", tcp_conntrack_names[state]);
317}
318
319static unsigned int get_conntrack_index(const struct tcphdr *tcph)
320{
321 if (tcph->rst) return TCP_RST_SET;
322 else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
323 else if (tcph->fin) return TCP_FIN_SET;
324 else if (tcph->ack) return TCP_ACK_SET;
325 else return TCP_NONE_SET;
326}
327
328/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
329 in IP Filter' by Guido van Rooij.
601e68e1 330
631dd1a8
JM
331 http://www.sane.nl/events/sane2000/papers.html
332 http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
601e68e1 333
9fb9cbb1
YK
334 The boundaries and the conditions are changed according to RFC793:
335 the packet must intersect the window (i.e. segments may be
336 after the right or before the left edge) and thus receivers may ACK
337 segments after the right edge of the window.
338
601e68e1 339 td_maxend = max(sack + max(win,1)) seen in reply packets
9fb9cbb1
YK
340 td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
341 td_maxwin += seq + len - sender.td_maxend
342 if seq + len > sender.td_maxend
343 td_end = max(seq + len) seen in sent packets
601e68e1 344
9fb9cbb1
YK
345 I. Upper bound for valid data: seq <= sender.td_maxend
346 II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
84ebe1cd
JK
347 III. Upper bound for valid (s)ack: sack <= receiver.td_end
348 IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW
9fb9cbb1 349
84ebe1cd
JK
350 where sack is the highest right edge of sack block found in the packet
351 or ack in the case of packet without SACK option.
9fb9cbb1 352
84ebe1cd 353 The upper bound limit for a valid (s)ack is not ignored -
601e68e1 354 we doesn't have to deal with fragments.
9fb9cbb1
YK
355*/
356
357static inline __u32 segment_seq_plus_len(__u32 seq,
358 size_t len,
359 unsigned int dataoff,
82f568fc 360 const struct tcphdr *tcph)
9fb9cbb1
YK
361{
362 /* XXX Should I use payload length field in IP/IPv6 header ?
363 * - YK */
364 return (seq + len - dataoff - tcph->doff*4
365 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
366}
601e68e1 367
9fb9cbb1
YK
368/* Fixme: what about big packets? */
369#define MAXACKWINCONST 66000
370#define MAXACKWINDOW(sender) \
371 ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
372 : MAXACKWINCONST)
601e68e1 373
9fb9cbb1
YK
374/*
375 * Simplified tcp_parse_options routine from tcp_input.c
376 */
377static void tcp_options(const struct sk_buff *skb,
378 unsigned int dataoff,
82f568fc 379 const struct tcphdr *tcph,
9fb9cbb1
YK
380 struct ip_ct_tcp_state *state)
381{
382 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
82f568fc 383 const unsigned char *ptr;
9fb9cbb1
YK
384 int length = (tcph->doff*4) - sizeof(struct tcphdr);
385
386 if (!length)
387 return;
388
389 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
390 length, buff);
391 BUG_ON(ptr == NULL);
392
601e68e1 393 state->td_scale =
9fb9cbb1
YK
394 state->flags = 0;
395
396 while (length > 0) {
397 int opcode=*ptr++;
398 int opsize;
399
400 switch (opcode) {
401 case TCPOPT_EOL:
402 return;
403 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
404 length--;
405 continue;
406 default:
407 opsize=*ptr++;
408 if (opsize < 2) /* "silly options" */
409 return;
410 if (opsize > length)
4a5cc84a 411 return; /* don't parse partial options */
9fb9cbb1 412
601e68e1 413 if (opcode == TCPOPT_SACK_PERM
9fb9cbb1
YK
414 && opsize == TCPOLEN_SACK_PERM)
415 state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
416 else if (opcode == TCPOPT_WINDOW
417 && opsize == TCPOLEN_WINDOW) {
418 state->td_scale = *(u_int8_t *)ptr;
419
420 if (state->td_scale > 14) {
421 /* See RFC1323 */
422 state->td_scale = 14;
423 }
424 state->flags |=
425 IP_CT_TCP_FLAG_WINDOW_SCALE;
426 }
427 ptr += opsize - 2;
428 length -= opsize;
429 }
430 }
431}
432
433static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
82f568fc 434 const struct tcphdr *tcph, __u32 *sack)
9fb9cbb1 435{
601e68e1 436 unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
82f568fc 437 const unsigned char *ptr;
9fb9cbb1
YK
438 int length = (tcph->doff*4) - sizeof(struct tcphdr);
439 __u32 tmp;
440
441 if (!length)
442 return;
443
444 ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
445 length, buff);
446 BUG_ON(ptr == NULL);
447
448 /* Fast path for timestamp-only option */
bb9fc373 449 if (length == TCPOLEN_TSTAMP_ALIGNED
8f05ce91
YH
450 && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
451 | (TCPOPT_NOP << 16)
452 | (TCPOPT_TIMESTAMP << 8)
453 | TCPOLEN_TIMESTAMP))
9fb9cbb1
YK
454 return;
455
456 while (length > 0) {
457 int opcode = *ptr++;
458 int opsize, i;
459
460 switch (opcode) {
461 case TCPOPT_EOL:
462 return;
463 case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
464 length--;
465 continue;
466 default:
467 opsize = *ptr++;
468 if (opsize < 2) /* "silly options" */
469 return;
470 if (opsize > length)
4a5cc84a 471 return; /* don't parse partial options */
9fb9cbb1 472
601e68e1
YH
473 if (opcode == TCPOPT_SACK
474 && opsize >= (TCPOLEN_SACK_BASE
475 + TCPOLEN_SACK_PERBLOCK)
476 && !((opsize - TCPOLEN_SACK_BASE)
477 % TCPOLEN_SACK_PERBLOCK)) {
478 for (i = 0;
479 i < (opsize - TCPOLEN_SACK_BASE);
480 i += TCPOLEN_SACK_PERBLOCK) {
534f81a5 481 tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
9fb9cbb1
YK
482
483 if (after(tmp, *sack))
484 *sack = tmp;
485 }
486 return;
487 }
488 ptr += opsize - 2;
489 length -= opsize;
490 }
491 }
492}
493
f9dd09c7
JK
494#ifdef CONFIG_NF_NAT_NEEDED
495static inline s16 nat_offset(const struct nf_conn *ct,
496 enum ip_conntrack_dir dir,
497 u32 seq)
498{
499 typeof(nf_ct_nat_offset) get_offset = rcu_dereference(nf_ct_nat_offset);
500
501 return get_offset != NULL ? get_offset(ct, dir, seq) : 0;
502}
503#define NAT_OFFSET(pf, ct, dir, seq) \
504 (pf == NFPROTO_IPV4 ? nat_offset(ct, dir, seq) : 0)
505#else
506#define NAT_OFFSET(pf, ct, dir, seq) 0
507#endif
508
09f263cd
JE
509static bool tcp_in_window(const struct nf_conn *ct,
510 struct ip_ct_tcp *state,
511 enum ip_conntrack_dir dir,
512 unsigned int index,
513 const struct sk_buff *skb,
514 unsigned int dataoff,
515 const struct tcphdr *tcph,
76108cea 516 u_int8_t pf)
9fb9cbb1 517{
c2a2c7e0 518 struct net *net = nf_ct_net(ct);
9fb9cbb1
YK
519 struct ip_ct_tcp_state *sender = &state->seen[dir];
520 struct ip_ct_tcp_state *receiver = &state->seen[!dir];
82f568fc 521 const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
9fb9cbb1 522 __u32 seq, ack, sack, end, win, swin;
f9dd09c7 523 s16 receiver_offset;
09f263cd 524 bool res;
9fb9cbb1
YK
525
526 /*
527 * Get the required data from the packet.
528 */
529 seq = ntohl(tcph->seq);
530 ack = sack = ntohl(tcph->ack_seq);
531 win = ntohs(tcph->window);
532 end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
533
534 if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
535 tcp_sack(skb, dataoff, tcph, &sack);
536
f9dd09c7
JK
537 /* Take into account NAT sequence number mangling */
538 receiver_offset = NAT_OFFSET(pf, ct, !dir, ack - 1);
539 ack -= receiver_offset;
540 sack -= receiver_offset;
541
0d53778e
PM
542 pr_debug("tcp_in_window: START\n");
543 pr_debug("tcp_in_window: ");
3c9fba65 544 nf_ct_dump_tuple(tuple);
f9dd09c7
JK
545 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
546 seq, ack, receiver_offset, sack, receiver_offset, win, end);
0d53778e
PM
547 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
548 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
549 sender->td_end, sender->td_maxend, sender->td_maxwin,
550 sender->td_scale,
551 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
552 receiver->td_scale);
9fb9cbb1 553
874ab923 554 if (sender->td_maxwin == 0) {
9fb9cbb1
YK
555 /*
556 * Initialize sender data.
557 */
874ab923 558 if (tcph->syn) {
9fb9cbb1 559 /*
874ab923
JK
560 * SYN-ACK in reply to a SYN
561 * or SYN from reply direction in simultaneous open.
9fb9cbb1 562 */
601e68e1 563 sender->td_end =
9fb9cbb1
YK
564 sender->td_maxend = end;
565 sender->td_maxwin = (win == 0 ? 1 : win);
566
567 tcp_options(skb, dataoff, tcph, sender);
601e68e1 568 /*
9fb9cbb1
YK
569 * RFC 1323:
570 * Both sides must send the Window Scale option
571 * to enable window scaling in either direction.
572 */
573 if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
574 && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
601e68e1 575 sender->td_scale =
9fb9cbb1 576 receiver->td_scale = 0;
874ab923
JK
577 if (!tcph->ack)
578 /* Simultaneous open */
579 return true;
9fb9cbb1
YK
580 } else {
581 /*
582 * We are in the middle of a connection,
583 * its history is lost for us.
584 * Let's try to use the data from the packet.
601e68e1 585 */
9fb9cbb1 586 sender->td_end = end;
fac42a9a 587 win <<= sender->td_scale;
9fb9cbb1
YK
588 sender->td_maxwin = (win == 0 ? 1 : win);
589 sender->td_maxend = end + sender->td_maxwin;
fac42a9a
PNA
590 /*
591 * We haven't seen traffic in the other direction yet
592 * but we have to tweak window tracking to pass III
593 * and IV until that happens.
594 */
595 if (receiver->td_maxwin == 0)
596 receiver->td_end = receiver->td_maxend = sack;
9fb9cbb1
YK
597 }
598 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
599 && dir == IP_CT_DIR_ORIGINAL)
600 || (state->state == TCP_CONNTRACK_SYN_RECV
601 && dir == IP_CT_DIR_REPLY))
602 && after(end, sender->td_end)) {
603 /*
604 * RFC 793: "if a TCP is reinitialized ... then it need
601e68e1 605 * not wait at all; it must only be sure to use sequence
9fb9cbb1
YK
606 * numbers larger than those recently used."
607 */
608 sender->td_end =
609 sender->td_maxend = end;
610 sender->td_maxwin = (win == 0 ? 1 : win);
611
612 tcp_options(skb, dataoff, tcph, sender);
613 }
614
615 if (!(tcph->ack)) {
616 /*
617 * If there is no ACK, just pretend it was set and OK.
618 */
619 ack = sack = receiver->td_end;
601e68e1
YH
620 } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
621 (TCP_FLAG_ACK|TCP_FLAG_RST))
9fb9cbb1
YK
622 && (ack == 0)) {
623 /*
624 * Broken TCP stacks, that set ACK in RST packets as well
625 * with zero ack value.
626 */
627 ack = sack = receiver->td_end;
628 }
629
630 if (seq == end
631 && (!tcph->rst
632 || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
633 /*
634 * Packets contains no data: we assume it is valid
635 * and check the ack value only.
636 * However RST segments are always validated by their
637 * SEQ number, except when seq == 0 (reset sent answering
638 * SYN.
639 */
640 seq = end = sender->td_end;
641
0d53778e 642 pr_debug("tcp_in_window: ");
3c9fba65 643 nf_ct_dump_tuple(tuple);
f9dd09c7
JK
644 pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
645 seq, ack, receiver_offset, sack, receiver_offset, win, end);
0d53778e
PM
646 pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
647 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
648 sender->td_end, sender->td_maxend, sender->td_maxwin,
649 sender->td_scale,
650 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
651 receiver->td_scale);
652
653 pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
654 before(seq, sender->td_maxend + 1),
655 after(end, sender->td_end - receiver->td_maxwin - 1),
656 before(sack, receiver->td_end + 1),
84ebe1cd 657 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
9fb9cbb1 658
a09113c2
PM
659 if (before(seq, sender->td_maxend + 1) &&
660 after(end, sender->td_end - receiver->td_maxwin - 1) &&
661 before(sack, receiver->td_end + 1) &&
84ebe1cd 662 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
601e68e1 663 /*
9fb9cbb1
YK
664 * Take into account window scaling (RFC 1323).
665 */
666 if (!tcph->syn)
667 win <<= sender->td_scale;
668
669 /*
670 * Update sender data.
671 */
672 swin = win + (sack - ack);
673 if (sender->td_maxwin < swin)
674 sender->td_maxwin = swin;
ae375044 675 if (after(end, sender->td_end)) {
9fb9cbb1 676 sender->td_end = end;
ae375044
PM
677 sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
678 }
bfcaa502
JK
679 if (tcph->ack) {
680 if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
681 sender->td_maxack = ack;
682 sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
683 } else if (after(ack, sender->td_maxack))
684 sender->td_maxack = ack;
685 }
686
9fb9cbb1
YK
687 /*
688 * Update receiver data.
689 */
fac42a9a 690 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
9fb9cbb1
YK
691 receiver->td_maxwin += end - sender->td_maxend;
692 if (after(sack + win, receiver->td_maxend - 1)) {
693 receiver->td_maxend = sack + win;
694 if (win == 0)
695 receiver->td_maxend++;
696 }
ae375044
PM
697 if (ack == receiver->td_end)
698 receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
9fb9cbb1 699
601e68e1 700 /*
9fb9cbb1
YK
701 * Check retransmissions.
702 */
703 if (index == TCP_ACK_SET) {
704 if (state->last_dir == dir
705 && state->last_seq == seq
706 && state->last_ack == ack
c1fe3ca5
GH
707 && state->last_end == end
708 && state->last_win == win)
9fb9cbb1
YK
709 state->retrans++;
710 else {
711 state->last_dir = dir;
712 state->last_seq = seq;
713 state->last_ack = ack;
714 state->last_end = end;
c1fe3ca5 715 state->last_win = win;
9fb9cbb1
YK
716 state->retrans = 0;
717 }
718 }
09f263cd 719 res = true;
9fb9cbb1 720 } else {
09f263cd 721 res = false;
a09113c2
PM
722 if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
723 nf_ct_tcp_be_liberal)
09f263cd 724 res = true;
c2a2c7e0 725 if (!res && LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1
YK
726 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
727 "nf_ct_tcp: %s ",
728 before(seq, sender->td_maxend + 1) ?
729 after(end, sender->td_end - receiver->td_maxwin - 1) ?
730 before(sack, receiver->td_end + 1) ?
f9dd09c7 731 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
9fb9cbb1
YK
732 : "ACK is under the lower bound (possible overly delayed ACK)"
733 : "ACK is over the upper bound (ACKed data not seen yet)"
734 : "SEQ is under the lower bound (already ACKed data retransmitted)"
735 : "SEQ is over the upper bound (over the window of the receiver)");
601e68e1
YH
736 }
737
09f263cd 738 pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
0d53778e
PM
739 "receiver end=%u maxend=%u maxwin=%u\n",
740 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
741 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
9fb9cbb1
YK
742
743 return res;
744}
745
5c8ce7c9 746/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
a3433f35
CG
747static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
748 TCPHDR_URG) + 1] =
9fb9cbb1 749{
a3433f35
CG
750 [TCPHDR_SYN] = 1,
751 [TCPHDR_SYN|TCPHDR_URG] = 1,
752 [TCPHDR_SYN|TCPHDR_ACK] = 1,
753 [TCPHDR_RST] = 1,
754 [TCPHDR_RST|TCPHDR_ACK] = 1,
755 [TCPHDR_FIN|TCPHDR_ACK] = 1,
756 [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1,
757 [TCPHDR_ACK] = 1,
758 [TCPHDR_ACK|TCPHDR_URG] = 1,
9fb9cbb1
YK
759};
760
761/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
8fea97ec 762static int tcp_error(struct net *net, struct nf_conn *tmpl,
74c51a14 763 struct sk_buff *skb,
9fb9cbb1
YK
764 unsigned int dataoff,
765 enum ip_conntrack_info *ctinfo,
76108cea 766 u_int8_t pf,
96f6bf82 767 unsigned int hooknum)
9fb9cbb1 768{
82f568fc
JE
769 const struct tcphdr *th;
770 struct tcphdr _tcph;
9fb9cbb1
YK
771 unsigned int tcplen = skb->len - dataoff;
772 u_int8_t tcpflags;
773
774 /* Smaller that minimal TCP header? */
775 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
776 if (th == NULL) {
c2a2c7e0 777 if (LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1
YK
778 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
779 "nf_ct_tcp: short packet ");
780 return -NF_ACCEPT;
601e68e1
YH
781 }
782
9fb9cbb1
YK
783 /* Not whole TCP header or malformed packet */
784 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
c2a2c7e0 785 if (LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1
YK
786 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
787 "nf_ct_tcp: truncated/malformed packet ");
788 return -NF_ACCEPT;
789 }
601e68e1 790
9fb9cbb1
YK
791 /* Checksum invalid? Ignore.
792 * We skip checking packets on the outgoing path
84fa7933 793 * because the checksum is assumed to be correct.
9fb9cbb1
YK
794 */
795 /* FIXME: Source route IP option packets --RR */
c04d0552 796 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
96f6bf82 797 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
c2a2c7e0 798 if (LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1
YK
799 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
800 "nf_ct_tcp: bad TCP checksum ");
801 return -NF_ACCEPT;
802 }
803
804 /* Check TCP flags. */
a3433f35 805 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
9fb9cbb1 806 if (!tcp_valid_flags[tcpflags]) {
c2a2c7e0 807 if (LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1
YK
808 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
809 "nf_ct_tcp: invalid TCP flag combination ");
810 return -NF_ACCEPT;
811 }
812
813 return NF_ACCEPT;
814}
815
2c8503f5
PNA
816static unsigned int *tcp_get_timeouts(struct net *net)
817{
818 return tcp_timeouts;
819}
820
9fb9cbb1 821/* Returns verdict for packet, or -1 for invalid. */
c88130bc 822static int tcp_packet(struct nf_conn *ct,
9fb9cbb1
YK
823 const struct sk_buff *skb,
824 unsigned int dataoff,
825 enum ip_conntrack_info ctinfo,
76108cea 826 u_int8_t pf,
2c8503f5
PNA
827 unsigned int hooknum,
828 unsigned int *timeouts)
9fb9cbb1 829{
c2a2c7e0 830 struct net *net = nf_ct_net(ct);
0d53778e 831 struct nf_conntrack_tuple *tuple;
9fb9cbb1
YK
832 enum tcp_conntrack new_state, old_state;
833 enum ip_conntrack_dir dir;
82f568fc
JE
834 const struct tcphdr *th;
835 struct tcphdr _tcph;
9fb9cbb1
YK
836 unsigned long timeout;
837 unsigned int index;
838
839 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
840 BUG_ON(th == NULL);
841
440f0d58 842 spin_lock_bh(&ct->lock);
c88130bc 843 old_state = ct->proto.tcp.state;
9fb9cbb1
YK
844 dir = CTINFO2DIR(ctinfo);
845 index = get_conntrack_index(th);
846 new_state = tcp_conntracks[dir][index][old_state];
c88130bc 847 tuple = &ct->tuplehash[dir].tuple;
9fb9cbb1
YK
848
849 switch (new_state) {
17311393
JK
850 case TCP_CONNTRACK_SYN_SENT:
851 if (old_state < TCP_CONNTRACK_TIME_WAIT)
852 break;
b2155e7f
JK
853 /* RFC 1122: "When a connection is closed actively,
854 * it MUST linger in TIME-WAIT state for a time 2xMSL
855 * (Maximum Segment Lifetime). However, it MAY accept
856 * a new SYN from the remote TCP to reopen the connection
857 * directly from TIME-WAIT state, if..."
858 * We ignore the conditions because we are in the
859 * TIME-WAIT state anyway.
860 *
861 * Handle aborted connections: we and the server
862 * think there is an existing connection but the client
863 * aborts it and starts a new one.
864 */
865 if (((ct->proto.tcp.seen[dir].flags
866 | ct->proto.tcp.seen[!dir].flags)
867 & IP_CT_TCP_FLAG_CLOSE_INIT)
c88130bc
PM
868 || (ct->proto.tcp.last_dir == dir
869 && ct->proto.tcp.last_index == TCP_RST_SET)) {
bc34b841
JK
870 /* Attempt to reopen a closed/aborted connection.
871 * Delete this connection and look up again. */
440f0d58 872 spin_unlock_bh(&ct->lock);
2aec609f 873
6b69fe0c
PM
874 /* Only repeat if we can actually remove the timer.
875 * Destruction may already be in progress in process
876 * context and we must give it a chance to terminate.
877 */
2aec609f 878 if (nf_ct_kill(ct))
6b69fe0c 879 return -NF_REPEAT;
ec8d5409 880 return NF_DROP;
17311393
JK
881 }
882 /* Fall through */
9fb9cbb1 883 case TCP_CONNTRACK_IGNORE:
73f30602 884 /* Ignored packets:
b2155e7f
JK
885 *
886 * Our connection entry may be out of sync, so ignore
887 * packets which may signal the real connection between
888 * the client and the server.
73f30602
JK
889 *
890 * a) SYN in ORIGINAL
891 * b) SYN/ACK in REPLY
601e68e1 892 * c) ACK in reply direction after initial SYN in original.
b2155e7f
JK
893 *
894 * If the ignored packet is invalid, the receiver will send
895 * a RST we'll catch below.
73f30602 896 */
9fb9cbb1 897 if (index == TCP_SYNACK_SET
c88130bc
PM
898 && ct->proto.tcp.last_index == TCP_SYN_SET
899 && ct->proto.tcp.last_dir != dir
900 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
b2155e7f 901 /* b) This SYN/ACK acknowledges a SYN that we earlier
9fb9cbb1
YK
902 * ignored as invalid. This means that the client and
903 * the server are both in sync, while the firewall is
c4832c7b
PNA
904 * not. We get in sync from the previously annotated
905 * values.
9fb9cbb1 906 */
c4832c7b
PNA
907 old_state = TCP_CONNTRACK_SYN_SENT;
908 new_state = TCP_CONNTRACK_SYN_RECV;
909 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
910 ct->proto.tcp.last_end;
911 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
912 ct->proto.tcp.last_end;
913 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
914 ct->proto.tcp.last_win == 0 ?
915 1 : ct->proto.tcp.last_win;
916 ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
917 ct->proto.tcp.last_wscale;
918 ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
919 ct->proto.tcp.last_flags;
920 memset(&ct->proto.tcp.seen[dir], 0,
921 sizeof(struct ip_ct_tcp_state));
922 break;
9fb9cbb1 923 }
c88130bc
PM
924 ct->proto.tcp.last_index = index;
925 ct->proto.tcp.last_dir = dir;
926 ct->proto.tcp.last_seq = ntohl(th->seq);
927 ct->proto.tcp.last_end =
9fb9cbb1 928 segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
c4832c7b
PNA
929 ct->proto.tcp.last_win = ntohs(th->window);
930
931 /* a) This is a SYN in ORIGINAL. The client and the server
932 * may be in sync but we are not. In that case, we annotate
933 * the TCP options and let the packet go through. If it is a
934 * valid SYN packet, the server will reply with a SYN/ACK, and
935 * then we'll get in sync. Otherwise, the server ignores it. */
936 if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
937 struct ip_ct_tcp_state seen = {};
938
939 ct->proto.tcp.last_flags =
940 ct->proto.tcp.last_wscale = 0;
941 tcp_options(skb, dataoff, th, &seen);
942 if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
943 ct->proto.tcp.last_flags |=
944 IP_CT_TCP_FLAG_WINDOW_SCALE;
945 ct->proto.tcp.last_wscale = seen.td_scale;
946 }
947 if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
948 ct->proto.tcp.last_flags |=
949 IP_CT_TCP_FLAG_SACK_PERM;
950 }
951 }
440f0d58 952 spin_unlock_bh(&ct->lock);
c2a2c7e0 953 if (LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1 954 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
b2155e7f 955 "nf_ct_tcp: invalid packet ignored ");
9fb9cbb1
YK
956 return NF_ACCEPT;
957 case TCP_CONNTRACK_MAX:
958 /* Invalid packet */
0d53778e
PM
959 pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
960 dir, get_conntrack_index(th), old_state);
440f0d58 961 spin_unlock_bh(&ct->lock);
c2a2c7e0 962 if (LOG_INVALID(net, IPPROTO_TCP))
9fb9cbb1
YK
963 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
964 "nf_ct_tcp: invalid state ");
965 return -NF_ACCEPT;
9fb9cbb1 966 case TCP_CONNTRACK_CLOSE:
bfcaa502
JK
967 if (index == TCP_RST_SET
968 && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
969 && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
970 /* Invalid RST */
334a47f6 971 spin_unlock_bh(&ct->lock);
bfcaa502
JK
972 if (LOG_INVALID(net, IPPROTO_TCP))
973 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
974 "nf_ct_tcp: invalid RST ");
975 return -NF_ACCEPT;
976 }
9fb9cbb1 977 if (index == TCP_RST_SET
c88130bc
PM
978 && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
979 && ct->proto.tcp.last_index == TCP_SYN_SET)
980 || (!test_bit(IPS_ASSURED_BIT, &ct->status)
981 && ct->proto.tcp.last_index == TCP_ACK_SET))
982 && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
93b1fae4 983 /* RST sent to invalid SYN or ACK we had let through
73f30602
JK
984 * at a) and c) above:
985 *
986 * a) SYN was in window then
987 * c) we hold a half-open connection.
988 *
989 * Delete our connection entry.
9fb9cbb1 990 * We skip window checking, because packet might ACK
73f30602 991 * segments we ignored. */
9fb9cbb1
YK
992 goto in_window;
993 }
93b1fae4 994 /* Just fall through */
9fb9cbb1
YK
995 default:
996 /* Keep compilers happy. */
997 break;
998 }
999
c88130bc 1000 if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
9fb9cbb1 1001 skb, dataoff, th, pf)) {
440f0d58 1002 spin_unlock_bh(&ct->lock);
9fb9cbb1
YK
1003 return -NF_ACCEPT;
1004 }
1005 in_window:
1006 /* From now on we have got in-window packets */
c88130bc
PM
1007 ct->proto.tcp.last_index = index;
1008 ct->proto.tcp.last_dir = dir;
9fb9cbb1 1009
0d53778e 1010 pr_debug("tcp_conntracks: ");
3c9fba65 1011 nf_ct_dump_tuple(tuple);
0d53778e
PM
1012 pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1013 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1014 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1015 old_state, new_state);
9fb9cbb1 1016
c88130bc 1017 ct->proto.tcp.state = new_state;
9fb9cbb1 1018 if (old_state != new_state
d0c1fd7a 1019 && new_state == TCP_CONNTRACK_FIN_WAIT)
c88130bc 1020 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
ae375044
PM
1021
1022 if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans &&
2c8503f5
PNA
1023 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1024 timeout = timeouts[TCP_CONNTRACK_RETRANS];
ae375044
PM
1025 else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1026 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
2c8503f5
PNA
1027 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1028 timeout = timeouts[TCP_CONNTRACK_UNACK];
ae375044 1029 else
2c8503f5 1030 timeout = timeouts[new_state];
440f0d58 1031 spin_unlock_bh(&ct->lock);
9fb9cbb1 1032
9fb9cbb1 1033 if (new_state != old_state)
a71996fc 1034 nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
9fb9cbb1 1035
c88130bc 1036 if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
9fb9cbb1
YK
1037 /* If only reply is a RST, we can consider ourselves not to
1038 have an established connection: this is a fairly common
1039 problem case, so we can delete the conntrack
1040 immediately. --RR */
1041 if (th->rst) {
718d4ad9 1042 nf_ct_kill_acct(ct, ctinfo, skb);
9fb9cbb1
YK
1043 return NF_ACCEPT;
1044 }
c88130bc 1045 } else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
9fb9cbb1
YK
1046 && (old_state == TCP_CONNTRACK_SYN_RECV
1047 || old_state == TCP_CONNTRACK_ESTABLISHED)
1048 && new_state == TCP_CONNTRACK_ESTABLISHED) {
601e68e1
YH
1049 /* Set ASSURED if we see see valid ack in ESTABLISHED
1050 after SYN_RECV or a valid answer for a picked up
9fb9cbb1 1051 connection. */
c88130bc 1052 set_bit(IPS_ASSURED_BIT, &ct->status);
858b3133 1053 nf_conntrack_event_cache(IPCT_ASSURED, ct);
9fb9cbb1 1054 }
c88130bc 1055 nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
9fb9cbb1
YK
1056
1057 return NF_ACCEPT;
1058}
601e68e1 1059
9fb9cbb1 1060/* Called when a new connection for this protocol found. */
09f263cd 1061static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
2c8503f5 1062 unsigned int dataoff, unsigned int *timeouts)
9fb9cbb1
YK
1063{
1064 enum tcp_conntrack new_state;
82f568fc
JE
1065 const struct tcphdr *th;
1066 struct tcphdr _tcph;
1067 const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1068 const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
9fb9cbb1
YK
1069
1070 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1071 BUG_ON(th == NULL);
1072
1073 /* Don't need lock here: this conntrack not in circulation yet */
e5fc9e7a 1074 new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
9fb9cbb1
YK
1075
1076 /* Invalid: delete conntrack */
1077 if (new_state >= TCP_CONNTRACK_MAX) {
0d53778e 1078 pr_debug("nf_ct_tcp: invalid new deleting.\n");
09f263cd 1079 return false;
9fb9cbb1
YK
1080 }
1081
1082 if (new_state == TCP_CONNTRACK_SYN_SENT) {
e5fc9e7a 1083 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
9fb9cbb1 1084 /* SYN packet */
c88130bc 1085 ct->proto.tcp.seen[0].td_end =
9fb9cbb1
YK
1086 segment_seq_plus_len(ntohl(th->seq), skb->len,
1087 dataoff, th);
c88130bc
PM
1088 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1089 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1090 ct->proto.tcp.seen[0].td_maxwin = 1;
1091 ct->proto.tcp.seen[0].td_maxend =
1092 ct->proto.tcp.seen[0].td_end;
1093
1094 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
9fb9cbb1
YK
1095 } else if (nf_ct_tcp_loose == 0) {
1096 /* Don't try to pick up connections. */
09f263cd 1097 return false;
9fb9cbb1 1098 } else {
e5fc9e7a 1099 memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
9fb9cbb1
YK
1100 /*
1101 * We are in the middle of a connection,
1102 * its history is lost for us.
1103 * Let's try to use the data from the packet.
1104 */
c88130bc 1105 ct->proto.tcp.seen[0].td_end =
9fb9cbb1
YK
1106 segment_seq_plus_len(ntohl(th->seq), skb->len,
1107 dataoff, th);
c88130bc
PM
1108 ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1109 if (ct->proto.tcp.seen[0].td_maxwin == 0)
1110 ct->proto.tcp.seen[0].td_maxwin = 1;
1111 ct->proto.tcp.seen[0].td_maxend =
1112 ct->proto.tcp.seen[0].td_end +
1113 ct->proto.tcp.seen[0].td_maxwin;
9fb9cbb1 1114
a09113c2
PM
1115 /* We assume SACK and liberal window checking to handle
1116 * window scaling */
c88130bc
PM
1117 ct->proto.tcp.seen[0].flags =
1118 ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1119 IP_CT_TCP_FLAG_BE_LIBERAL;
9fb9cbb1 1120 }
601e68e1 1121
9fb9cbb1 1122 /* tcp_packet will set them */
c88130bc 1123 ct->proto.tcp.last_index = TCP_NONE_SET;
601e68e1 1124
0d53778e
PM
1125 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1126 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1127 sender->td_end, sender->td_maxend, sender->td_maxwin,
1128 sender->td_scale,
1129 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1130 receiver->td_scale);
09f263cd 1131 return true;
9fb9cbb1 1132}
c1d10adb 1133
c0cd1156 1134#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
c1d10adb
PNA
1135
1136#include <linux/netfilter/nfnetlink.h>
1137#include <linux/netfilter/nfnetlink_conntrack.h>
1138
fdf70832 1139static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
440f0d58 1140 struct nf_conn *ct)
c1d10adb 1141{
df6fb868 1142 struct nlattr *nest_parms;
c8e2078c 1143 struct nf_ct_tcp_flags tmp = {};
601e68e1 1144
440f0d58 1145 spin_lock_bh(&ct->lock);
df6fb868
PM
1146 nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1147 if (!nest_parms)
1148 goto nla_put_failure;
1149
77236b6e 1150 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state);
c8e2078c 1151
77236b6e
PM
1152 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1153 ct->proto.tcp.seen[0].td_scale);
c8e2078c 1154
77236b6e
PM
1155 NLA_PUT_U8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1156 ct->proto.tcp.seen[1].td_scale);
c8e2078c
PNA
1157
1158 tmp.flags = ct->proto.tcp.seen[0].flags;
df6fb868 1159 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
c8e2078c
PNA
1160 sizeof(struct nf_ct_tcp_flags), &tmp);
1161
1162 tmp.flags = ct->proto.tcp.seen[1].flags;
df6fb868 1163 NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
c8e2078c 1164 sizeof(struct nf_ct_tcp_flags), &tmp);
440f0d58 1165 spin_unlock_bh(&ct->lock);
c1d10adb 1166
df6fb868 1167 nla_nest_end(skb, nest_parms);
c1d10adb
PNA
1168
1169 return 0;
1170
df6fb868 1171nla_put_failure:
440f0d58 1172 spin_unlock_bh(&ct->lock);
c1d10adb
PNA
1173 return -1;
1174}
1175
f73e924c
PM
1176static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1177 [CTA_PROTOINFO_TCP_STATE] = { .type = NLA_U8 },
1178 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1179 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 },
1180 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) },
1181 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) },
c1d10adb
PNA
1182};
1183
fdf70832 1184static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
c1d10adb 1185{
2f0d2f10 1186 struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
df6fb868 1187 struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
f73e924c 1188 int err;
c1d10adb
PNA
1189
1190 /* updates could not contain anything about the private
1191 * protocol info, in that case skip the parsing */
2f0d2f10 1192 if (!pattr)
c1d10adb
PNA
1193 return 0;
1194
2f0d2f10 1195 err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
f73e924c
PM
1196 if (err < 0)
1197 return err;
c1d10adb 1198
5f7da4d2
PM
1199 if (tb[CTA_PROTOINFO_TCP_STATE] &&
1200 nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
c1d10adb
PNA
1201 return -EINVAL;
1202
440f0d58 1203 spin_lock_bh(&ct->lock);
5f7da4d2
PM
1204 if (tb[CTA_PROTOINFO_TCP_STATE])
1205 ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
c8e2078c 1206
df6fb868 1207 if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
c8e2078c 1208 struct nf_ct_tcp_flags *attr =
df6fb868 1209 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
c8e2078c
PNA
1210 ct->proto.tcp.seen[0].flags &= ~attr->mask;
1211 ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1212 }
1213
df6fb868 1214 if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
c8e2078c 1215 struct nf_ct_tcp_flags *attr =
df6fb868 1216 nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
c8e2078c
PNA
1217 ct->proto.tcp.seen[1].flags &= ~attr->mask;
1218 ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1219 }
1220
df6fb868
PM
1221 if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1222 tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
c8e2078c
PNA
1223 ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1224 ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
77236b6e
PM
1225 ct->proto.tcp.seen[0].td_scale =
1226 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1227 ct->proto.tcp.seen[1].td_scale =
1228 nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
c8e2078c 1229 }
440f0d58 1230 spin_unlock_bh(&ct->lock);
c1d10adb
PNA
1231
1232 return 0;
1233}
a400c30e
HE
1234
1235static int tcp_nlattr_size(void)
1236{
1237 return nla_total_size(0) /* CTA_PROTOINFO_TCP */
1238 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1239}
1240
1241static int tcp_nlattr_tuple_size(void)
1242{
1243 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1244}
c1d10adb 1245#endif
933a41e7 1246
50978462
PNA
1247#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1248
1249#include <linux/netfilter/nfnetlink.h>
1250#include <linux/netfilter/nfnetlink_cttimeout.h>
1251
1252static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], void *data)
1253{
1254 unsigned int *timeouts = data;
1255 int i;
1256
1257 /* set default TCP timeouts. */
1258 for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1259 timeouts[i] = tcp_timeouts[i];
1260
1261 if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1262 timeouts[TCP_CONNTRACK_SYN_SENT] =
1263 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1264 }
1265 if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1266 timeouts[TCP_CONNTRACK_SYN_RECV] =
1267 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1268 }
1269 if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1270 timeouts[TCP_CONNTRACK_ESTABLISHED] =
1271 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1272 }
1273 if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1274 timeouts[TCP_CONNTRACK_FIN_WAIT] =
1275 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1276 }
1277 if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1278 timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1279 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1280 }
1281 if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1282 timeouts[TCP_CONNTRACK_LAST_ACK] =
1283 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1284 }
1285 if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1286 timeouts[TCP_CONNTRACK_TIME_WAIT] =
1287 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1288 }
1289 if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1290 timeouts[TCP_CONNTRACK_CLOSE] =
1291 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1292 }
1293 if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1294 timeouts[TCP_CONNTRACK_SYN_SENT2] =
1295 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1296 }
1297 if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1298 timeouts[TCP_CONNTRACK_RETRANS] =
1299 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1300 }
1301 if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1302 timeouts[TCP_CONNTRACK_UNACK] =
1303 ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1304 }
1305 return 0;
1306}
1307
1308static int
1309tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1310{
1311 const unsigned int *timeouts = data;
1312
1313 NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1314 htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ));
1315 NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1316 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ));
1317 NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1318 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ));
1319 NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1320 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ));
1321 NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1322 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ));
1323 NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1324 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ));
1325 NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1326 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ));
1327 NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_CLOSE,
1328 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ));
1329 NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1330 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ));
1331 NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_RETRANS,
1332 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ));
1333 NLA_PUT_BE32(skb, CTA_TIMEOUT_TCP_UNACK,
1334 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ));
1335 return 0;
1336
1337nla_put_failure:
1338 return -ENOSPC;
1339}
1340
1341static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1342 [CTA_TIMEOUT_TCP_SYN_SENT] = { .type = NLA_U32 },
1343 [CTA_TIMEOUT_TCP_SYN_RECV] = { .type = NLA_U32 },
1344 [CTA_TIMEOUT_TCP_ESTABLISHED] = { .type = NLA_U32 },
1345 [CTA_TIMEOUT_TCP_FIN_WAIT] = { .type = NLA_U32 },
1346 [CTA_TIMEOUT_TCP_CLOSE_WAIT] = { .type = NLA_U32 },
1347 [CTA_TIMEOUT_TCP_LAST_ACK] = { .type = NLA_U32 },
1348 [CTA_TIMEOUT_TCP_TIME_WAIT] = { .type = NLA_U32 },
1349 [CTA_TIMEOUT_TCP_CLOSE] = { .type = NLA_U32 },
1350 [CTA_TIMEOUT_TCP_SYN_SENT2] = { .type = NLA_U32 },
1351};
1352#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1353
933a41e7
PM
1354#ifdef CONFIG_SYSCTL
1355static unsigned int tcp_sysctl_table_users;
1356static struct ctl_table_header *tcp_sysctl_header;
1357static struct ctl_table tcp_sysctl_table[] = {
1358 {
933a41e7 1359 .procname = "nf_conntrack_tcp_timeout_syn_sent",
2d646286 1360 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
933a41e7
PM
1361 .maxlen = sizeof(unsigned int),
1362 .mode = 0644,
6d9f239a 1363 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1364 },
1365 {
933a41e7 1366 .procname = "nf_conntrack_tcp_timeout_syn_recv",
2d646286 1367 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
933a41e7
PM
1368 .maxlen = sizeof(unsigned int),
1369 .mode = 0644,
6d9f239a 1370 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1371 },
1372 {
933a41e7 1373 .procname = "nf_conntrack_tcp_timeout_established",
2d646286 1374 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
933a41e7
PM
1375 .maxlen = sizeof(unsigned int),
1376 .mode = 0644,
6d9f239a 1377 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1378 },
1379 {
933a41e7 1380 .procname = "nf_conntrack_tcp_timeout_fin_wait",
2d646286 1381 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
933a41e7
PM
1382 .maxlen = sizeof(unsigned int),
1383 .mode = 0644,
6d9f239a 1384 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1385 },
1386 {
933a41e7 1387 .procname = "nf_conntrack_tcp_timeout_close_wait",
2d646286 1388 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
933a41e7
PM
1389 .maxlen = sizeof(unsigned int),
1390 .mode = 0644,
6d9f239a 1391 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1392 },
1393 {
933a41e7 1394 .procname = "nf_conntrack_tcp_timeout_last_ack",
2d646286 1395 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
933a41e7
PM
1396 .maxlen = sizeof(unsigned int),
1397 .mode = 0644,
6d9f239a 1398 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1399 },
1400 {
933a41e7 1401 .procname = "nf_conntrack_tcp_timeout_time_wait",
2d646286 1402 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
933a41e7
PM
1403 .maxlen = sizeof(unsigned int),
1404 .mode = 0644,
6d9f239a 1405 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1406 },
1407 {
933a41e7 1408 .procname = "nf_conntrack_tcp_timeout_close",
2d646286 1409 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
933a41e7
PM
1410 .maxlen = sizeof(unsigned int),
1411 .mode = 0644,
6d9f239a 1412 .proc_handler = proc_dointvec_jiffies,
933a41e7
PM
1413 },
1414 {
933a41e7 1415 .procname = "nf_conntrack_tcp_timeout_max_retrans",
33ee4464 1416 .data = &tcp_timeouts[TCP_CONNTRACK_RETRANS],
933a41e7
PM
1417 .maxlen = sizeof(unsigned int),
1418 .mode = 0644,
6d9f239a 1419 .proc_handler = proc_dointvec_jiffies,
933a41e7 1420 },
ae375044
PM
1421 {
1422 .procname = "nf_conntrack_tcp_timeout_unacknowledged",
33ee4464 1423 .data = &tcp_timeouts[TCP_CONNTRACK_UNACK],
ae375044
PM
1424 .maxlen = sizeof(unsigned int),
1425 .mode = 0644,
6d9f239a 1426 .proc_handler = proc_dointvec_jiffies,
ae375044 1427 },
933a41e7 1428 {
933a41e7
PM
1429 .procname = "nf_conntrack_tcp_loose",
1430 .data = &nf_ct_tcp_loose,
1431 .maxlen = sizeof(unsigned int),
1432 .mode = 0644,
6d9f239a 1433 .proc_handler = proc_dointvec,
933a41e7
PM
1434 },
1435 {
933a41e7
PM
1436 .procname = "nf_conntrack_tcp_be_liberal",
1437 .data = &nf_ct_tcp_be_liberal,
1438 .maxlen = sizeof(unsigned int),
1439 .mode = 0644,
6d9f239a 1440 .proc_handler = proc_dointvec,
933a41e7
PM
1441 },
1442 {
933a41e7
PM
1443 .procname = "nf_conntrack_tcp_max_retrans",
1444 .data = &nf_ct_tcp_max_retrans,
1445 .maxlen = sizeof(unsigned int),
1446 .mode = 0644,
6d9f239a 1447 .proc_handler = proc_dointvec,
933a41e7 1448 },
f8572d8f 1449 { }
933a41e7 1450};
a999e683
PM
1451
1452#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1453static struct ctl_table tcp_compat_sysctl_table[] = {
1454 {
a999e683 1455 .procname = "ip_conntrack_tcp_timeout_syn_sent",
2d646286 1456 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT],
a999e683
PM
1457 .maxlen = sizeof(unsigned int),
1458 .mode = 0644,
6d9f239a 1459 .proc_handler = proc_dointvec_jiffies,
a999e683 1460 },
874ab923
JK
1461 {
1462 .procname = "ip_conntrack_tcp_timeout_syn_sent2",
1463 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2],
1464 .maxlen = sizeof(unsigned int),
1465 .mode = 0644,
1466 .proc_handler = proc_dointvec_jiffies,
1467 },
a999e683 1468 {
a999e683 1469 .procname = "ip_conntrack_tcp_timeout_syn_recv",
2d646286 1470 .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
a999e683
PM
1471 .maxlen = sizeof(unsigned int),
1472 .mode = 0644,
6d9f239a 1473 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1474 },
1475 {
a999e683 1476 .procname = "ip_conntrack_tcp_timeout_established",
2d646286 1477 .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED],
a999e683
PM
1478 .maxlen = sizeof(unsigned int),
1479 .mode = 0644,
6d9f239a 1480 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1481 },
1482 {
a999e683 1483 .procname = "ip_conntrack_tcp_timeout_fin_wait",
2d646286 1484 .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT],
a999e683
PM
1485 .maxlen = sizeof(unsigned int),
1486 .mode = 0644,
6d9f239a 1487 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1488 },
1489 {
a999e683 1490 .procname = "ip_conntrack_tcp_timeout_close_wait",
2d646286 1491 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT],
a999e683
PM
1492 .maxlen = sizeof(unsigned int),
1493 .mode = 0644,
6d9f239a 1494 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1495 },
1496 {
a999e683 1497 .procname = "ip_conntrack_tcp_timeout_last_ack",
2d646286 1498 .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK],
a999e683
PM
1499 .maxlen = sizeof(unsigned int),
1500 .mode = 0644,
6d9f239a 1501 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1502 },
1503 {
a999e683 1504 .procname = "ip_conntrack_tcp_timeout_time_wait",
2d646286 1505 .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT],
a999e683
PM
1506 .maxlen = sizeof(unsigned int),
1507 .mode = 0644,
6d9f239a 1508 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1509 },
1510 {
a999e683 1511 .procname = "ip_conntrack_tcp_timeout_close",
2d646286 1512 .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE],
a999e683
PM
1513 .maxlen = sizeof(unsigned int),
1514 .mode = 0644,
6d9f239a 1515 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1516 },
1517 {
a999e683 1518 .procname = "ip_conntrack_tcp_timeout_max_retrans",
33ee4464 1519 .data = &tcp_timeouts[TCP_CONNTRACK_RETRANS],
a999e683
PM
1520 .maxlen = sizeof(unsigned int),
1521 .mode = 0644,
6d9f239a 1522 .proc_handler = proc_dointvec_jiffies,
a999e683
PM
1523 },
1524 {
a999e683
PM
1525 .procname = "ip_conntrack_tcp_loose",
1526 .data = &nf_ct_tcp_loose,
1527 .maxlen = sizeof(unsigned int),
1528 .mode = 0644,
6d9f239a 1529 .proc_handler = proc_dointvec,
a999e683
PM
1530 },
1531 {
a999e683
PM
1532 .procname = "ip_conntrack_tcp_be_liberal",
1533 .data = &nf_ct_tcp_be_liberal,
1534 .maxlen = sizeof(unsigned int),
1535 .mode = 0644,
6d9f239a 1536 .proc_handler = proc_dointvec,
a999e683
PM
1537 },
1538 {
a999e683
PM
1539 .procname = "ip_conntrack_tcp_max_retrans",
1540 .data = &nf_ct_tcp_max_retrans,
1541 .maxlen = sizeof(unsigned int),
1542 .mode = 0644,
6d9f239a 1543 .proc_handler = proc_dointvec,
a999e683 1544 },
f8572d8f 1545 { }
a999e683
PM
1546};
1547#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
933a41e7
PM
1548#endif /* CONFIG_SYSCTL */
1549
61075af5 1550struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
9fb9cbb1
YK
1551{
1552 .l3proto = PF_INET,
605dcad6 1553 .l4proto = IPPROTO_TCP,
9fb9cbb1
YK
1554 .name = "tcp",
1555 .pkt_to_tuple = tcp_pkt_to_tuple,
1556 .invert_tuple = tcp_invert_tuple,
1557 .print_tuple = tcp_print_tuple,
1558 .print_conntrack = tcp_print_conntrack,
1559 .packet = tcp_packet,
2c8503f5 1560 .get_timeouts = tcp_get_timeouts,
9fb9cbb1 1561 .new = tcp_new,
96f6bf82 1562 .error = tcp_error,
c0cd1156 1563#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
fdf70832 1564 .to_nlattr = tcp_to_nlattr,
a400c30e 1565 .nlattr_size = tcp_nlattr_size,
fdf70832
PM
1566 .from_nlattr = nlattr_to_tcp,
1567 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1568 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
a400c30e 1569 .nlattr_tuple_size = tcp_nlattr_tuple_size,
f73e924c 1570 .nla_policy = nf_ct_port_nla_policy,
c1d10adb 1571#endif
50978462
PNA
1572#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1573 .ctnl_timeout = {
1574 .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
1575 .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
1576 .nlattr_max = CTA_TIMEOUT_TCP_MAX,
1577 .obj_size = sizeof(unsigned int) *
1578 TCP_CONNTRACK_TIMEOUT_MAX,
1579 .nla_policy = tcp_timeout_nla_policy,
1580 },
1581#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
933a41e7
PM
1582#ifdef CONFIG_SYSCTL
1583 .ctl_table_users = &tcp_sysctl_table_users,
1584 .ctl_table_header = &tcp_sysctl_header,
1585 .ctl_table = tcp_sysctl_table,
a999e683
PM
1586#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1587 .ctl_compat_table = tcp_compat_sysctl_table,
1588#endif
933a41e7 1589#endif
9fb9cbb1 1590};
13b18339 1591EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
9fb9cbb1 1592
61075af5 1593struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
9fb9cbb1
YK
1594{
1595 .l3proto = PF_INET6,
605dcad6 1596 .l4proto = IPPROTO_TCP,
9fb9cbb1
YK
1597 .name = "tcp",
1598 .pkt_to_tuple = tcp_pkt_to_tuple,
1599 .invert_tuple = tcp_invert_tuple,
1600 .print_tuple = tcp_print_tuple,
1601 .print_conntrack = tcp_print_conntrack,
1602 .packet = tcp_packet,
2c8503f5 1603 .get_timeouts = tcp_get_timeouts,
9fb9cbb1 1604 .new = tcp_new,
96f6bf82 1605 .error = tcp_error,
c0cd1156 1606#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
fdf70832 1607 .to_nlattr = tcp_to_nlattr,
a400c30e 1608 .nlattr_size = tcp_nlattr_size,
fdf70832
PM
1609 .from_nlattr = nlattr_to_tcp,
1610 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1611 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
a400c30e 1612 .nlattr_tuple_size = tcp_nlattr_tuple_size,
f73e924c 1613 .nla_policy = nf_ct_port_nla_policy,
c1d10adb 1614#endif
50978462
PNA
1615#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1616 .ctnl_timeout = {
1617 .nlattr_to_obj = tcp_timeout_nlattr_to_obj,
1618 .obj_to_nlattr = tcp_timeout_obj_to_nlattr,
1619 .nlattr_max = CTA_TIMEOUT_TCP_MAX,
1620 .obj_size = sizeof(unsigned int) *
1621 TCP_CONNTRACK_TIMEOUT_MAX,
1622 .nla_policy = tcp_timeout_nla_policy,
1623 },
1624#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
933a41e7
PM
1625#ifdef CONFIG_SYSCTL
1626 .ctl_table_users = &tcp_sysctl_table_users,
1627 .ctl_table_header = &tcp_sysctl_header,
1628 .ctl_table = tcp_sysctl_table,
1629#endif
9fb9cbb1 1630};
13b18339 1631EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);