1 // SPDX-License-Identifier: GPL-2.0-only
2 /* (C) 1999-2001 Paul `Rusty' Russell
3  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
4  * (C) 2002-2013 Jozsef Kadlecsik <kadlec@netfilter.org>
5  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6  */
7 
8 #include <linux/types.h>
9 #include <linux/timer.h>
10 #include <linux/module.h>
11 #include <linux/in.h>
12 #include <linux/tcp.h>
13 #include <linux/spinlock.h>
14 #include <linux/skbuff.h>
15 #include <linux/ipv6.h>
16 #include <net/ip6_checksum.h>
17 #include <asm/unaligned.h>
18 
19 #include <net/tcp.h>
20 
21 #include <linux/netfilter.h>
22 #include <linux/netfilter_ipv4.h>
23 #include <linux/netfilter_ipv6.h>
24 #include <net/netfilter/nf_conntrack.h>
25 #include <net/netfilter/nf_conntrack_l4proto.h>
26 #include <net/netfilter/nf_conntrack_ecache.h>
27 #include <net/netfilter/nf_conntrack_seqadj.h>
28 #include <net/netfilter/nf_conntrack_synproxy.h>
29 #include <net/netfilter/nf_conntrack_timeout.h>
30 #include <net/netfilter/nf_log.h>
31 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
32 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
33 
34   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
35      closely.  They're more complex. --RR */
36 
37 static const char *const tcp_conntrack_names[] = {
38 	"NONE",
39 	"SYN_SENT",
40 	"SYN_RECV",
41 	"ESTABLISHED",
42 	"FIN_WAIT",
43 	"CLOSE_WAIT",
44 	"LAST_ACK",
45 	"TIME_WAIT",
46 	"CLOSE",
47 	"SYN_SENT2",
48 };
49 
50 #define SECS * HZ
51 #define MINS * 60 SECS
52 #define HOURS * 60 MINS
53 #define DAYS * 24 HOURS
54 
55 static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = {
56 	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
57 	[TCP_CONNTRACK_SYN_RECV]	= 60 SECS,
58 	[TCP_CONNTRACK_ESTABLISHED]	= 5 DAYS,
59 	[TCP_CONNTRACK_FIN_WAIT]	= 2 MINS,
60 	[TCP_CONNTRACK_CLOSE_WAIT]	= 60 SECS,
61 	[TCP_CONNTRACK_LAST_ACK]	= 30 SECS,
62 	[TCP_CONNTRACK_TIME_WAIT]	= 2 MINS,
63 	[TCP_CONNTRACK_CLOSE]		= 10 SECS,
64 	[TCP_CONNTRACK_SYN_SENT2]	= 2 MINS,
65 /* RFC1122 says the R2 limit should be at least 100 seconds.
66    Linux uses 15 packets as limit, which corresponds
67    to ~13-30min depending on RTO. */
68 	[TCP_CONNTRACK_RETRANS]		= 5 MINS,
69 	[TCP_CONNTRACK_UNACK]		= 5 MINS,
70 };
71 
72 #define sNO TCP_CONNTRACK_NONE
73 #define sSS TCP_CONNTRACK_SYN_SENT
74 #define sSR TCP_CONNTRACK_SYN_RECV
75 #define sES TCP_CONNTRACK_ESTABLISHED
76 #define sFW TCP_CONNTRACK_FIN_WAIT
77 #define sCW TCP_CONNTRACK_CLOSE_WAIT
78 #define sLA TCP_CONNTRACK_LAST_ACK
79 #define sTW TCP_CONNTRACK_TIME_WAIT
80 #define sCL TCP_CONNTRACK_CLOSE
81 #define sS2 TCP_CONNTRACK_SYN_SENT2
82 #define sIV TCP_CONNTRACK_MAX
83 #define sIG TCP_CONNTRACK_IGNORE
84 
85 /* What TCP flags are set from RST/SYN/FIN/ACK. */
86 enum tcp_bit_set {
87 	TCP_SYN_SET,
88 	TCP_SYNACK_SET,
89 	TCP_FIN_SET,
90 	TCP_ACK_SET,
91 	TCP_RST_SET,
92 	TCP_NONE_SET,
93 };
94 
95 /*
96  * The TCP state transition table needs a few words...
97  *
98  * We are the man in the middle. All the packets go through us
99  * but might get lost in transit to the destination.
100  * It is assumed that the destinations can't receive segments
101  * we haven't seen.
102  *
103  * The checked segment is in window, but our windows are *not*
104  * equivalent with the ones of the sender/receiver. We always
105  * try to guess the state of the current sender.
106  *
107  * The meaning of the states are:
108  *
109  * NONE:	initial state
110  * SYN_SENT:	SYN-only packet seen
111  * SYN_SENT2:	SYN-only packet seen from reply dir, simultaneous open
112  * SYN_RECV:	SYN-ACK packet seen
113  * ESTABLISHED:	ACK packet seen
114  * FIN_WAIT:	FIN packet seen
115  * CLOSE_WAIT:	ACK seen (after FIN)
116  * LAST_ACK:	FIN seen (after FIN)
117  * TIME_WAIT:	last ACK seen
118  * CLOSE:	closed connection (RST)
119  *
120  * Packets marked as IGNORED (sIG):
121  *	if they may be either invalid or valid
122  *	and the receiver may send back a connection
123  *	closing RST or a SYN/ACK.
124  *
125  * Packets marked as INVALID (sIV):
126  *	if we regard them as truly invalid packets
127  */
128 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
129 	{
130 /* ORIGINAL */
131 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
132 /*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
133 /*
134  *	sNO -> sSS	Initialize a new connection
135  *	sSS -> sSS	Retransmitted SYN
136  *	sS2 -> sS2	Late retransmitted SYN
137  *	sSR -> sIG
138  *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
139  *			are errors. Receiver will reply with RST
140  *			and close the connection.
141  *			Or we are not in sync and hold a dead connection.
142  *	sFW -> sIG
143  *	sCW -> sIG
144  *	sLA -> sIG
145  *	sTW -> sSS	Reopened connection (RFC 1122).
146  *	sCL -> sSS
147  */
148 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
149 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
150 /*
151  *	sNO -> sIV	Too late and no reason to do anything
152  *	sSS -> sIV	Client can't send SYN and then SYN/ACK
153  *	sS2 -> sSR	SYN/ACK sent to SYN2 in simultaneous open
154  *	sSR -> sSR	Late retransmitted SYN/ACK in simultaneous open
155  *	sES -> sIV	Invalid SYN/ACK packets sent by the client
156  *	sFW -> sIV
157  *	sCW -> sIV
158  *	sLA -> sIV
159  *	sTW -> sIV
160  *	sCL -> sIV
161  */
162 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
163 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
164 /*
165  *	sNO -> sIV	Too late and no reason to do anything...
166  *	sSS -> sIV	Client migth not send FIN in this state:
167  *			we enforce waiting for a SYN/ACK reply first.
168  *	sS2 -> sIV
169  *	sSR -> sFW	Close started.
170  *	sES -> sFW
171  *	sFW -> sLA	FIN seen in both directions, waiting for
172  *			the last ACK.
173  *			Migth be a retransmitted FIN as well...
174  *	sCW -> sLA
175  *	sLA -> sLA	Retransmitted FIN. Remain in the same state.
176  *	sTW -> sTW
177  *	sCL -> sCL
178  */
179 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
180 /*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
181 /*
182  *	sNO -> sES	Assumed.
183  *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
184  *	sS2 -> sIV
185  *	sSR -> sES	Established state is reached.
186  *	sES -> sES	:-)
187  *	sFW -> sCW	Normal close request answered by ACK.
188  *	sCW -> sCW
189  *	sLA -> sTW	Last ACK detected (RFC5961 challenged)
190  *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
191  *	sCL -> sCL
192  */
193 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
194 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
195 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
196 	},
197 	{
198 /* REPLY */
199 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
200 /*syn*/	   { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
201 /*
202  *	sNO -> sIV	Never reached.
203  *	sSS -> sS2	Simultaneous open
204  *	sS2 -> sS2	Retransmitted simultaneous SYN
205  *	sSR -> sIV	Invalid SYN packets sent by the server
206  *	sES -> sIV
207  *	sFW -> sIV
208  *	sCW -> sIV
209  *	sLA -> sIV
210  *	sTW -> sSS	Reopened connection, but server may have switched role
211  *	sCL -> sIV
212  */
213 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
214 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
215 /*
216  *	sSS -> sSR	Standard open.
217  *	sS2 -> sSR	Simultaneous open
218  *	sSR -> sIG	Retransmitted SYN/ACK, ignore it.
219  *	sES -> sIG	Late retransmitted SYN/ACK?
220  *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
221  *	sCW -> sIG
222  *	sLA -> sIG
223  *	sTW -> sIG
224  *	sCL -> sIG
225  */
226 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
227 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
228 /*
229  *	sSS -> sIV	Server might not send FIN in this state.
230  *	sS2 -> sIV
231  *	sSR -> sFW	Close started.
232  *	sES -> sFW
233  *	sFW -> sLA	FIN seen in both directions.
234  *	sCW -> sLA
235  *	sLA -> sLA	Retransmitted FIN.
236  *	sTW -> sTW
237  *	sCL -> sCL
238  */
239 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
240 /*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
241 /*
242  *	sSS -> sIG	Might be a half-open connection.
243  *	sS2 -> sIG
244  *	sSR -> sSR	Might answer late resent SYN.
245  *	sES -> sES	:-)
246  *	sFW -> sCW	Normal close request answered by ACK.
247  *	sCW -> sCW
248  *	sLA -> sTW	Last ACK detected (RFC5961 challenged)
249  *	sTW -> sTW	Retransmitted last ACK.
250  *	sCL -> sCL
251  */
252 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
253 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
254 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
255 	}
256 };
257 
258 #ifdef CONFIG_NF_CONNTRACK_PROCFS
259 /* Print out the private part of the conntrack. */
tcp_print_conntrack(struct seq_file * s,struct nf_conn * ct)260 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
261 {
262 	if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
263 		return;
264 
265 	seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
266 }
267 #endif
268 
get_conntrack_index(const struct tcphdr * tcph)269 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
270 {
271 	if (tcph->rst) return TCP_RST_SET;
272 	else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
273 	else if (tcph->fin) return TCP_FIN_SET;
274 	else if (tcph->ack) return TCP_ACK_SET;
275 	else return TCP_NONE_SET;
276 }
277 
278 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
279    in IP Filter' by Guido van Rooij.
280 
281    http://www.sane.nl/events/sane2000/papers.html
282    http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
283 
284    The boundaries and the conditions are changed according to RFC793:
285    the packet must intersect the window (i.e. segments may be
286    after the right or before the left edge) and thus receivers may ACK
287    segments after the right edge of the window.
288 
289 	td_maxend = max(sack + max(win,1)) seen in reply packets
290 	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
291 	td_maxwin += seq + len - sender.td_maxend
292 			if seq + len > sender.td_maxend
293 	td_end    = max(seq + len) seen in sent packets
294 
295    I.   Upper bound for valid data:	seq <= sender.td_maxend
296    II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
297    III.	Upper bound for valid (s)ack:   sack <= receiver.td_end
298    IV.	Lower bound for valid (s)ack:	sack >= receiver.td_end - MAXACKWINDOW
299 
300    where sack is the highest right edge of sack block found in the packet
301    or ack in the case of packet without SACK option.
302 
303    The upper bound limit for a valid (s)ack is not ignored -
304    we doesn't have to deal with fragments.
305 */
306 
segment_seq_plus_len(__u32 seq,size_t len,unsigned int dataoff,const struct tcphdr * tcph)307 static inline __u32 segment_seq_plus_len(__u32 seq,
308 					 size_t len,
309 					 unsigned int dataoff,
310 					 const struct tcphdr *tcph)
311 {
312 	/* XXX Should I use payload length field in IP/IPv6 header ?
313 	 * - YK */
314 	return (seq + len - dataoff - tcph->doff*4
315 		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
316 }
317 
318 /* Fixme: what about big packets? */
319 #define MAXACKWINCONST			66000
320 #define MAXACKWINDOW(sender)						\
321 	((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin	\
322 					      : MAXACKWINCONST)
323 
324 /*
325  * Simplified tcp_parse_options routine from tcp_input.c
326  */
tcp_options(const struct sk_buff * skb,unsigned int dataoff,const struct tcphdr * tcph,struct ip_ct_tcp_state * state)327 static void tcp_options(const struct sk_buff *skb,
328 			unsigned int dataoff,
329 			const struct tcphdr *tcph,
330 			struct ip_ct_tcp_state *state)
331 {
332 	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
333 	const unsigned char *ptr;
334 	int length = (tcph->doff*4) - sizeof(struct tcphdr);
335 
336 	if (!length)
337 		return;
338 
339 	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
340 				 length, buff);
341 	if (!ptr)
342 		return;
343 
344 	state->td_scale = 0;
345 	state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL;
346 
347 	while (length > 0) {
348 		int opcode=*ptr++;
349 		int opsize;
350 
351 		switch (opcode) {
352 		case TCPOPT_EOL:
353 			return;
354 		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
355 			length--;
356 			continue;
357 		default:
358 			if (length < 2)
359 				return;
360 			opsize=*ptr++;
361 			if (opsize < 2) /* "silly options" */
362 				return;
363 			if (opsize > length)
364 				return;	/* don't parse partial options */
365 
366 			if (opcode == TCPOPT_SACK_PERM
367 			    && opsize == TCPOLEN_SACK_PERM)
368 				state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
369 			else if (opcode == TCPOPT_WINDOW
370 				 && opsize == TCPOLEN_WINDOW) {
371 				state->td_scale = *(u_int8_t *)ptr;
372 
373 				if (state->td_scale > TCP_MAX_WSCALE)
374 					state->td_scale = TCP_MAX_WSCALE;
375 
376 				state->flags |=
377 					IP_CT_TCP_FLAG_WINDOW_SCALE;
378 			}
379 			ptr += opsize - 2;
380 			length -= opsize;
381 		}
382 	}
383 }
384 
tcp_sack(const struct sk_buff * skb,unsigned int dataoff,const struct tcphdr * tcph,__u32 * sack)385 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
386                      const struct tcphdr *tcph, __u32 *sack)
387 {
388 	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
389 	const unsigned char *ptr;
390 	int length = (tcph->doff*4) - sizeof(struct tcphdr);
391 	__u32 tmp;
392 
393 	if (!length)
394 		return;
395 
396 	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
397 				 length, buff);
398 	if (!ptr)
399 		return;
400 
401 	/* Fast path for timestamp-only option */
402 	if (length == TCPOLEN_TSTAMP_ALIGNED
403 	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
404 				       | (TCPOPT_NOP << 16)
405 				       | (TCPOPT_TIMESTAMP << 8)
406 				       | TCPOLEN_TIMESTAMP))
407 		return;
408 
409 	while (length > 0) {
410 		int opcode = *ptr++;
411 		int opsize, i;
412 
413 		switch (opcode) {
414 		case TCPOPT_EOL:
415 			return;
416 		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
417 			length--;
418 			continue;
419 		default:
420 			if (length < 2)
421 				return;
422 			opsize = *ptr++;
423 			if (opsize < 2) /* "silly options" */
424 				return;
425 			if (opsize > length)
426 				return;	/* don't parse partial options */
427 
428 			if (opcode == TCPOPT_SACK
429 			    && opsize >= (TCPOLEN_SACK_BASE
430 					  + TCPOLEN_SACK_PERBLOCK)
431 			    && !((opsize - TCPOLEN_SACK_BASE)
432 				 % TCPOLEN_SACK_PERBLOCK)) {
433 				for (i = 0;
434 				     i < (opsize - TCPOLEN_SACK_BASE);
435 				     i += TCPOLEN_SACK_PERBLOCK) {
436 					tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
437 
438 					if (after(tmp, *sack))
439 						*sack = tmp;
440 				}
441 				return;
442 			}
443 			ptr += opsize - 2;
444 			length -= opsize;
445 		}
446 	}
447 }
448 
tcp_init_sender(struct ip_ct_tcp_state * sender,struct ip_ct_tcp_state * receiver,const struct sk_buff * skb,unsigned int dataoff,const struct tcphdr * tcph,u32 end,u32 win)449 static void tcp_init_sender(struct ip_ct_tcp_state *sender,
450 			    struct ip_ct_tcp_state *receiver,
451 			    const struct sk_buff *skb,
452 			    unsigned int dataoff,
453 			    const struct tcphdr *tcph,
454 			    u32 end, u32 win)
455 {
456 	/* SYN-ACK in reply to a SYN
457 	 * or SYN from reply direction in simultaneous open.
458 	 */
459 	sender->td_end =
460 	sender->td_maxend = end;
461 	sender->td_maxwin = (win == 0 ? 1 : win);
462 
463 	tcp_options(skb, dataoff, tcph, sender);
464 	/* RFC 1323:
465 	 * Both sides must send the Window Scale option
466 	 * to enable window scaling in either direction.
467 	 */
468 	if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
469 	      receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) {
470 		sender->td_scale = 0;
471 		receiver->td_scale = 0;
472 	}
473 }
474 
tcp_in_window(struct nf_conn * ct,enum ip_conntrack_dir dir,unsigned int index,const struct sk_buff * skb,unsigned int dataoff,const struct tcphdr * tcph,const struct nf_hook_state * hook_state)475 static bool tcp_in_window(struct nf_conn *ct,
476 			  enum ip_conntrack_dir dir,
477 			  unsigned int index,
478 			  const struct sk_buff *skb,
479 			  unsigned int dataoff,
480 			  const struct tcphdr *tcph,
481 			  const struct nf_hook_state *hook_state)
482 {
483 	struct ip_ct_tcp *state = &ct->proto.tcp;
484 	struct net *net = nf_ct_net(ct);
485 	struct nf_tcp_net *tn = nf_tcp_pernet(net);
486 	struct ip_ct_tcp_state *sender = &state->seen[dir];
487 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
488 	__u32 seq, ack, sack, end, win, swin;
489 	u16 win_raw;
490 	s32 receiver_offset;
491 	bool res, in_recv_win;
492 
493 	/*
494 	 * Get the required data from the packet.
495 	 */
496 	seq = ntohl(tcph->seq);
497 	ack = sack = ntohl(tcph->ack_seq);
498 	win_raw = ntohs(tcph->window);
499 	win = win_raw;
500 	end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
501 
502 	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
503 		tcp_sack(skb, dataoff, tcph, &sack);
504 
505 	/* Take into account NAT sequence number mangling */
506 	receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
507 	ack -= receiver_offset;
508 	sack -= receiver_offset;
509 
510 	if (sender->td_maxwin == 0) {
511 		/*
512 		 * Initialize sender data.
513 		 */
514 		if (tcph->syn) {
515 			tcp_init_sender(sender, receiver,
516 					skb, dataoff, tcph,
517 					end, win);
518 			if (!tcph->ack)
519 				/* Simultaneous open */
520 				return true;
521 		} else {
522 			/*
523 			 * We are in the middle of a connection,
524 			 * its history is lost for us.
525 			 * Let's try to use the data from the packet.
526 			 */
527 			sender->td_end = end;
528 			swin = win << sender->td_scale;
529 			sender->td_maxwin = (swin == 0 ? 1 : swin);
530 			sender->td_maxend = end + sender->td_maxwin;
531 			if (receiver->td_maxwin == 0) {
532 				/* We haven't seen traffic in the other
533 				 * direction yet but we have to tweak window
534 				 * tracking to pass III and IV until that
535 				 * happens.
536 				 */
537 				receiver->td_end = receiver->td_maxend = sack;
538 			} else if (sack == receiver->td_end + 1) {
539 				/* Likely a reply to a keepalive.
540 				 * Needed for III.
541 				 */
542 				receiver->td_end++;
543 			}
544 
545 		}
546 	} else if (tcph->syn &&
547 		   after(end, sender->td_end) &&
548 		   (state->state == TCP_CONNTRACK_SYN_SENT ||
549 		    state->state == TCP_CONNTRACK_SYN_RECV)) {
550 		/*
551 		 * RFC 793: "if a TCP is reinitialized ... then it need
552 		 * not wait at all; it must only be sure to use sequence
553 		 * numbers larger than those recently used."
554 		 *
555 		 * Re-init state for this direction, just like for the first
556 		 * syn(-ack) reply, it might differ in seq, ack or tcp options.
557 		 */
558 		tcp_init_sender(sender, receiver,
559 				skb, dataoff, tcph,
560 				end, win);
561 
562 		if (dir == IP_CT_DIR_REPLY && !tcph->ack)
563 			return true;
564 	}
565 
566 	if (!(tcph->ack)) {
567 		/*
568 		 * If there is no ACK, just pretend it was set and OK.
569 		 */
570 		ack = sack = receiver->td_end;
571 	} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
572 		    (TCP_FLAG_ACK|TCP_FLAG_RST))
573 		   && (ack == 0)) {
574 		/*
575 		 * Broken TCP stacks, that set ACK in RST packets as well
576 		 * with zero ack value.
577 		 */
578 		ack = sack = receiver->td_end;
579 	}
580 
581 	if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
582 		/*
583 		 * RST sent answering SYN.
584 		 */
585 		seq = end = sender->td_end;
586 
587 	/* Is the ending sequence in the receive window (if available)? */
588 	in_recv_win = !receiver->td_maxwin ||
589 		      after(end, sender->td_end - receiver->td_maxwin - 1);
590 
591 	if (before(seq, sender->td_maxend + 1) &&
592 	    in_recv_win &&
593 	    before(sack, receiver->td_end + 1) &&
594 	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
595 		/*
596 		 * Take into account window scaling (RFC 1323).
597 		 */
598 		if (!tcph->syn)
599 			win <<= sender->td_scale;
600 
601 		/*
602 		 * Update sender data.
603 		 */
604 		swin = win + (sack - ack);
605 		if (sender->td_maxwin < swin)
606 			sender->td_maxwin = swin;
607 		if (after(end, sender->td_end)) {
608 			sender->td_end = end;
609 			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
610 		}
611 		if (tcph->ack) {
612 			if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
613 				sender->td_maxack = ack;
614 				sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
615 			} else if (after(ack, sender->td_maxack))
616 				sender->td_maxack = ack;
617 		}
618 
619 		/*
620 		 * Update receiver data.
621 		 */
622 		if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
623 			receiver->td_maxwin += end - sender->td_maxend;
624 		if (after(sack + win, receiver->td_maxend - 1)) {
625 			receiver->td_maxend = sack + win;
626 			if (win == 0)
627 				receiver->td_maxend++;
628 		}
629 		if (ack == receiver->td_end)
630 			receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
631 
632 		/*
633 		 * Check retransmissions.
634 		 */
635 		if (index == TCP_ACK_SET) {
636 			if (state->last_dir == dir
637 			    && state->last_seq == seq
638 			    && state->last_ack == ack
639 			    && state->last_end == end
640 			    && state->last_win == win_raw)
641 				state->retrans++;
642 			else {
643 				state->last_dir = dir;
644 				state->last_seq = seq;
645 				state->last_ack = ack;
646 				state->last_end = end;
647 				state->last_win = win_raw;
648 				state->retrans = 0;
649 			}
650 		}
651 		res = true;
652 	} else {
653 		res = false;
654 		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
655 		    tn->tcp_be_liberal)
656 			res = true;
657 		if (!res) {
658 			bool seq_ok = before(seq, sender->td_maxend + 1);
659 
660 			if (!seq_ok) {
661 				u32 overshot = end - sender->td_maxend + 1;
662 				bool ack_ok;
663 
664 				ack_ok = after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1);
665 
666 				if (in_recv_win &&
667 				    ack_ok &&
668 				    overshot <= receiver->td_maxwin &&
669 				    before(sack, receiver->td_end + 1)) {
670 					/* Work around TCPs that send more bytes than allowed by
671 					 * the receive window.
672 					 *
673 					 * If the (marked as invalid) packet is allowed to pass by
674 					 * the ruleset and the peer acks this data, then its possible
675 					 * all future packets will trigger 'ACK is over upper bound' check.
676 					 *
677 					 * Thus if only the sequence check fails then do update td_end so
678 					 * possible ACK for this data can update internal state.
679 					 */
680 					sender->td_end = end;
681 					sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
682 
683 					nf_ct_l4proto_log_invalid(skb, ct, hook_state,
684 								  "%u bytes more than expected", overshot);
685 					return res;
686 				}
687 			}
688 
689 			nf_ct_l4proto_log_invalid(skb, ct, hook_state,
690 			"%s",
691 			before(seq, sender->td_maxend + 1) ?
692 			in_recv_win ?
693 			before(sack, receiver->td_end + 1) ?
694 			after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
695 			: "ACK is under the lower bound (possible overly delayed ACK)"
696 			: "ACK is over the upper bound (ACKed data not seen yet)"
697 			: "SEQ is under the lower bound (already ACKed data retransmitted)"
698 			: "SEQ is over the upper bound (over the window of the receiver)");
699 		}
700 	}
701 
702 	return res;
703 }
704 
705 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
706 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
707 				 TCPHDR_URG) + 1] =
708 {
709 	[TCPHDR_SYN]				= 1,
710 	[TCPHDR_SYN|TCPHDR_URG]			= 1,
711 	[TCPHDR_SYN|TCPHDR_ACK]			= 1,
712 	[TCPHDR_RST]				= 1,
713 	[TCPHDR_RST|TCPHDR_ACK]			= 1,
714 	[TCPHDR_FIN|TCPHDR_ACK]			= 1,
715 	[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]	= 1,
716 	[TCPHDR_ACK]				= 1,
717 	[TCPHDR_ACK|TCPHDR_URG]			= 1,
718 };
719 
tcp_error_log(const struct sk_buff * skb,const struct nf_hook_state * state,const char * msg)720 static void tcp_error_log(const struct sk_buff *skb,
721 			  const struct nf_hook_state *state,
722 			  const char *msg)
723 {
724 	nf_l4proto_log_invalid(skb, state, IPPROTO_TCP, "%s", msg);
725 }
726 
727 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
tcp_error(const struct tcphdr * th,struct sk_buff * skb,unsigned int dataoff,const struct nf_hook_state * state)728 static bool tcp_error(const struct tcphdr *th,
729 		      struct sk_buff *skb,
730 		      unsigned int dataoff,
731 		      const struct nf_hook_state *state)
732 {
733 	unsigned int tcplen = skb->len - dataoff;
734 	u8 tcpflags;
735 
736 	/* Not whole TCP header or malformed packet */
737 	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
738 		tcp_error_log(skb, state, "truncated packet");
739 		return true;
740 	}
741 
742 	/* Checksum invalid? Ignore.
743 	 * We skip checking packets on the outgoing path
744 	 * because the checksum is assumed to be correct.
745 	 */
746 	/* FIXME: Source route IP option packets --RR */
747 	if (state->net->ct.sysctl_checksum &&
748 	    state->hook == NF_INET_PRE_ROUTING &&
749 	    nf_checksum(skb, state->hook, dataoff, IPPROTO_TCP, state->pf)) {
750 		tcp_error_log(skb, state, "bad checksum");
751 		return true;
752 	}
753 
754 	/* Check TCP flags. */
755 	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
756 	if (!tcp_valid_flags[tcpflags]) {
757 		tcp_error_log(skb, state, "invalid tcp flag combination");
758 		return true;
759 	}
760 
761 	return false;
762 }
763 
tcp_new(struct nf_conn * ct,const struct sk_buff * skb,unsigned int dataoff,const struct tcphdr * th)764 static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
765 			     unsigned int dataoff,
766 			     const struct tcphdr *th)
767 {
768 	enum tcp_conntrack new_state;
769 	struct net *net = nf_ct_net(ct);
770 	const struct nf_tcp_net *tn = nf_tcp_pernet(net);
771 
772 	/* Don't need lock here: this conntrack not in circulation yet */
773 	new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
774 
775 	/* Invalid: delete conntrack */
776 	if (new_state >= TCP_CONNTRACK_MAX) {
777 		pr_debug("nf_ct_tcp: invalid new deleting.\n");
778 		return false;
779 	}
780 
781 	if (new_state == TCP_CONNTRACK_SYN_SENT) {
782 		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
783 		/* SYN packet */
784 		ct->proto.tcp.seen[0].td_end =
785 			segment_seq_plus_len(ntohl(th->seq), skb->len,
786 					     dataoff, th);
787 		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
788 		if (ct->proto.tcp.seen[0].td_maxwin == 0)
789 			ct->proto.tcp.seen[0].td_maxwin = 1;
790 		ct->proto.tcp.seen[0].td_maxend =
791 			ct->proto.tcp.seen[0].td_end;
792 
793 		tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
794 	} else if (tn->tcp_loose == 0) {
795 		/* Don't try to pick up connections. */
796 		return false;
797 	} else {
798 		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
799 		/*
800 		 * We are in the middle of a connection,
801 		 * its history is lost for us.
802 		 * Let's try to use the data from the packet.
803 		 */
804 		ct->proto.tcp.seen[0].td_end =
805 			segment_seq_plus_len(ntohl(th->seq), skb->len,
806 					     dataoff, th);
807 		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
808 		if (ct->proto.tcp.seen[0].td_maxwin == 0)
809 			ct->proto.tcp.seen[0].td_maxwin = 1;
810 		ct->proto.tcp.seen[0].td_maxend =
811 			ct->proto.tcp.seen[0].td_end +
812 			ct->proto.tcp.seen[0].td_maxwin;
813 
814 		/* We assume SACK and liberal window checking to handle
815 		 * window scaling */
816 		ct->proto.tcp.seen[0].flags =
817 		ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
818 					      IP_CT_TCP_FLAG_BE_LIBERAL;
819 	}
820 
821 	/* tcp_packet will set them */
822 	ct->proto.tcp.last_index = TCP_NONE_SET;
823 	return true;
824 }
825 
tcp_can_early_drop(const struct nf_conn * ct)826 static bool tcp_can_early_drop(const struct nf_conn *ct)
827 {
828 	switch (ct->proto.tcp.state) {
829 	case TCP_CONNTRACK_FIN_WAIT:
830 	case TCP_CONNTRACK_LAST_ACK:
831 	case TCP_CONNTRACK_TIME_WAIT:
832 	case TCP_CONNTRACK_CLOSE:
833 	case TCP_CONNTRACK_CLOSE_WAIT:
834 		return true;
835 	default:
836 		break;
837 	}
838 
839 	return false;
840 }
841 
nf_ct_tcp_state_reset(struct ip_ct_tcp_state * state)842 static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state)
843 {
844 	state->td_end		= 0;
845 	state->td_maxend	= 0;
846 	state->td_maxwin	= 0;
847 	state->td_maxack	= 0;
848 	state->td_scale		= 0;
849 	state->flags		&= IP_CT_TCP_FLAG_BE_LIBERAL;
850 }
851 
852 /* Returns verdict for packet, or -1 for invalid. */
nf_conntrack_tcp_packet(struct nf_conn * ct,struct sk_buff * skb,unsigned int dataoff,enum ip_conntrack_info ctinfo,const struct nf_hook_state * state)853 int nf_conntrack_tcp_packet(struct nf_conn *ct,
854 			    struct sk_buff *skb,
855 			    unsigned int dataoff,
856 			    enum ip_conntrack_info ctinfo,
857 			    const struct nf_hook_state *state)
858 {
859 	struct net *net = nf_ct_net(ct);
860 	struct nf_tcp_net *tn = nf_tcp_pernet(net);
861 	struct nf_conntrack_tuple *tuple;
862 	enum tcp_conntrack new_state, old_state;
863 	unsigned int index, *timeouts;
864 	enum ip_conntrack_dir dir;
865 	const struct tcphdr *th;
866 	struct tcphdr _tcph;
867 	unsigned long timeout;
868 
869 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
870 	if (th == NULL)
871 		return -NF_ACCEPT;
872 
873 	if (tcp_error(th, skb, dataoff, state))
874 		return -NF_ACCEPT;
875 
876 	if (!nf_ct_is_confirmed(ct) && !tcp_new(ct, skb, dataoff, th))
877 		return -NF_ACCEPT;
878 
879 	spin_lock_bh(&ct->lock);
880 	old_state = ct->proto.tcp.state;
881 	dir = CTINFO2DIR(ctinfo);
882 	index = get_conntrack_index(th);
883 	new_state = tcp_conntracks[dir][index][old_state];
884 	tuple = &ct->tuplehash[dir].tuple;
885 
886 	switch (new_state) {
887 	case TCP_CONNTRACK_SYN_SENT:
888 		if (old_state < TCP_CONNTRACK_TIME_WAIT)
889 			break;
890 		/* RFC 1122: "When a connection is closed actively,
891 		 * it MUST linger in TIME-WAIT state for a time 2xMSL
892 		 * (Maximum Segment Lifetime). However, it MAY accept
893 		 * a new SYN from the remote TCP to reopen the connection
894 		 * directly from TIME-WAIT state, if..."
895 		 * We ignore the conditions because we are in the
896 		 * TIME-WAIT state anyway.
897 		 *
898 		 * Handle aborted connections: we and the server
899 		 * think there is an existing connection but the client
900 		 * aborts it and starts a new one.
901 		 */
902 		if (((ct->proto.tcp.seen[dir].flags
903 		      | ct->proto.tcp.seen[!dir].flags)
904 		     & IP_CT_TCP_FLAG_CLOSE_INIT)
905 		    || (ct->proto.tcp.last_dir == dir
906 		        && ct->proto.tcp.last_index == TCP_RST_SET)) {
907 			/* Attempt to reopen a closed/aborted connection.
908 			 * Delete this connection and look up again. */
909 			spin_unlock_bh(&ct->lock);
910 
911 			/* Only repeat if we can actually remove the timer.
912 			 * Destruction may already be in progress in process
913 			 * context and we must give it a chance to terminate.
914 			 */
915 			if (nf_ct_kill(ct))
916 				return -NF_REPEAT;
917 			return NF_DROP;
918 		}
919 		fallthrough;
920 	case TCP_CONNTRACK_IGNORE:
921 		/* Ignored packets:
922 		 *
923 		 * Our connection entry may be out of sync, so ignore
924 		 * packets which may signal the real connection between
925 		 * the client and the server.
926 		 *
927 		 * a) SYN in ORIGINAL
928 		 * b) SYN/ACK in REPLY
929 		 * c) ACK in reply direction after initial SYN in original.
930 		 *
931 		 * If the ignored packet is invalid, the receiver will send
932 		 * a RST we'll catch below.
933 		 */
934 		if (index == TCP_SYNACK_SET
935 		    && ct->proto.tcp.last_index == TCP_SYN_SET
936 		    && ct->proto.tcp.last_dir != dir
937 		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
938 			/* b) This SYN/ACK acknowledges a SYN that we earlier
939 			 * ignored as invalid. This means that the client and
940 			 * the server are both in sync, while the firewall is
941 			 * not. We get in sync from the previously annotated
942 			 * values.
943 			 */
944 			old_state = TCP_CONNTRACK_SYN_SENT;
945 			new_state = TCP_CONNTRACK_SYN_RECV;
946 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
947 				ct->proto.tcp.last_end;
948 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
949 				ct->proto.tcp.last_end;
950 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
951 				ct->proto.tcp.last_win == 0 ?
952 					1 : ct->proto.tcp.last_win;
953 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
954 				ct->proto.tcp.last_wscale;
955 			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
956 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
957 				ct->proto.tcp.last_flags;
958 			nf_ct_tcp_state_reset(&ct->proto.tcp.seen[dir]);
959 			break;
960 		}
961 		ct->proto.tcp.last_index = index;
962 		ct->proto.tcp.last_dir = dir;
963 		ct->proto.tcp.last_seq = ntohl(th->seq);
964 		ct->proto.tcp.last_end =
965 		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
966 		ct->proto.tcp.last_win = ntohs(th->window);
967 
968 		/* a) This is a SYN in ORIGINAL. The client and the server
969 		 * may be in sync but we are not. In that case, we annotate
970 		 * the TCP options and let the packet go through. If it is a
971 		 * valid SYN packet, the server will reply with a SYN/ACK, and
972 		 * then we'll get in sync. Otherwise, the server potentially
973 		 * responds with a challenge ACK if implementing RFC5961.
974 		 */
975 		if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
976 			struct ip_ct_tcp_state seen = {};
977 
978 			ct->proto.tcp.last_flags =
979 			ct->proto.tcp.last_wscale = 0;
980 			tcp_options(skb, dataoff, th, &seen);
981 			if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
982 				ct->proto.tcp.last_flags |=
983 					IP_CT_TCP_FLAG_WINDOW_SCALE;
984 				ct->proto.tcp.last_wscale = seen.td_scale;
985 			}
986 			if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
987 				ct->proto.tcp.last_flags |=
988 					IP_CT_TCP_FLAG_SACK_PERM;
989 			}
990 			/* Mark the potential for RFC5961 challenge ACK,
991 			 * this pose a special problem for LAST_ACK state
992 			 * as ACK is intrepretated as ACKing last FIN.
993 			 */
994 			if (old_state == TCP_CONNTRACK_LAST_ACK)
995 				ct->proto.tcp.last_flags |=
996 					IP_CT_EXP_CHALLENGE_ACK;
997 		}
998 		spin_unlock_bh(&ct->lock);
999 		nf_ct_l4proto_log_invalid(skb, ct, state,
1000 					  "packet (index %d) in dir %d ignored, state %s",
1001 					  index, dir,
1002 					  tcp_conntrack_names[old_state]);
1003 		return NF_ACCEPT;
1004 	case TCP_CONNTRACK_MAX:
1005 		/* Special case for SYN proxy: when the SYN to the server or
1006 		 * the SYN/ACK from the server is lost, the client may transmit
1007 		 * a keep-alive packet while in SYN_SENT state. This needs to
1008 		 * be associated with the original conntrack entry in order to
1009 		 * generate a new SYN with the correct sequence number.
1010 		 */
1011 		if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
1012 		    index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
1013 		    ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
1014 		    ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
1015 			pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
1016 			spin_unlock_bh(&ct->lock);
1017 			return NF_ACCEPT;
1018 		}
1019 
1020 		/* Invalid packet */
1021 		spin_unlock_bh(&ct->lock);
1022 		nf_ct_l4proto_log_invalid(skb, ct, state,
1023 					  "packet (index %d) in dir %d invalid, state %s",
1024 					  index, dir,
1025 					  tcp_conntrack_names[old_state]);
1026 		return -NF_ACCEPT;
1027 	case TCP_CONNTRACK_TIME_WAIT:
1028 		/* RFC5961 compliance cause stack to send "challenge-ACK"
1029 		 * e.g. in response to spurious SYNs.  Conntrack MUST
1030 		 * not believe this ACK is acking last FIN.
1031 		 */
1032 		if (old_state == TCP_CONNTRACK_LAST_ACK &&
1033 		    index == TCP_ACK_SET &&
1034 		    ct->proto.tcp.last_dir != dir &&
1035 		    ct->proto.tcp.last_index == TCP_SYN_SET &&
1036 		    (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
1037 			/* Detected RFC5961 challenge ACK */
1038 			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
1039 			spin_unlock_bh(&ct->lock);
1040 			nf_ct_l4proto_log_invalid(skb, ct, state, "challenge-ack ignored");
1041 			return NF_ACCEPT; /* Don't change state */
1042 		}
1043 		break;
1044 	case TCP_CONNTRACK_SYN_SENT2:
1045 		/* tcp_conntracks table is not smart enough to handle
1046 		 * simultaneous open.
1047 		 */
1048 		ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN;
1049 		break;
1050 	case TCP_CONNTRACK_SYN_RECV:
1051 		if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET &&
1052 		    ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN)
1053 			new_state = TCP_CONNTRACK_ESTABLISHED;
1054 		break;
1055 	case TCP_CONNTRACK_CLOSE:
1056 		if (index != TCP_RST_SET)
1057 			break;
1058 
1059 		/* If we are closing, tuple might have been re-used already.
1060 		 * last_index, last_ack, and all other ct fields used for
1061 		 * sequence/window validation are outdated in that case.
1062 		 *
1063 		 * As the conntrack can already be expired by GC under pressure,
1064 		 * just skip validation checks.
1065 		 */
1066 		if (tcp_can_early_drop(ct))
1067 			goto in_window;
1068 
1069 		/* td_maxack might be outdated if we let a SYN through earlier */
1070 		if ((ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) &&
1071 		    ct->proto.tcp.last_index != TCP_SYN_SET) {
1072 			u32 seq = ntohl(th->seq);
1073 
1074 			/* If we are not in established state and SEQ=0 this is most
1075 			 * likely an answer to a SYN we let go through above (last_index
1076 			 * can be updated due to out-of-order ACKs).
1077 			 */
1078 			if (seq == 0 && !nf_conntrack_tcp_established(ct))
1079 				break;
1080 
1081 			if (before(seq, ct->proto.tcp.seen[!dir].td_maxack) &&
1082 			    !tn->tcp_ignore_invalid_rst) {
1083 				/* Invalid RST  */
1084 				spin_unlock_bh(&ct->lock);
1085 				nf_ct_l4proto_log_invalid(skb, ct, state, "invalid rst");
1086 				return -NF_ACCEPT;
1087 			}
1088 
1089 			if (!nf_conntrack_tcp_established(ct) ||
1090 			    seq == ct->proto.tcp.seen[!dir].td_maxack)
1091 				break;
1092 
1093 			/* Check if rst is part of train, such as
1094 			 *   foo:80 > bar:4379: P, 235946583:235946602(19) ack 42
1095 			 *   foo:80 > bar:4379: R, 235946602:235946602(0)  ack 42
1096 			 */
1097 			if (ct->proto.tcp.last_index == TCP_ACK_SET &&
1098 			    ct->proto.tcp.last_dir == dir &&
1099 			    seq == ct->proto.tcp.last_end)
1100 				break;
1101 
1102 			/* ... RST sequence number doesn't match exactly, keep
1103 			 * established state to allow a possible challenge ACK.
1104 			 */
1105 			new_state = old_state;
1106 		}
1107 		if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
1108 			 && ct->proto.tcp.last_index == TCP_SYN_SET)
1109 			|| (!test_bit(IPS_ASSURED_BIT, &ct->status)
1110 			    && ct->proto.tcp.last_index == TCP_ACK_SET))
1111 		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
1112 			/* RST sent to invalid SYN or ACK we had let through
1113 			 * at a) and c) above:
1114 			 *
1115 			 * a) SYN was in window then
1116 			 * c) we hold a half-open connection.
1117 			 *
1118 			 * Delete our connection entry.
1119 			 * We skip window checking, because packet might ACK
1120 			 * segments we ignored. */
1121 			goto in_window;
1122 		}
1123 		break;
1124 	default:
1125 		/* Keep compilers happy. */
1126 		break;
1127 	}
1128 
1129 	if (!tcp_in_window(ct, dir, index,
1130 			   skb, dataoff, th, state)) {
1131 		spin_unlock_bh(&ct->lock);
1132 		return -NF_ACCEPT;
1133 	}
1134      in_window:
1135 	/* From now on we have got in-window packets */
1136 	ct->proto.tcp.last_index = index;
1137 	ct->proto.tcp.last_dir = dir;
1138 
1139 	pr_debug("tcp_conntracks: ");
1140 	nf_ct_dump_tuple(tuple);
1141 	pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1142 		 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1143 		 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1144 		 old_state, new_state);
1145 
1146 	ct->proto.tcp.state = new_state;
1147 	if (old_state != new_state
1148 	    && new_state == TCP_CONNTRACK_FIN_WAIT)
1149 		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1150 
1151 	timeouts = nf_ct_timeout_lookup(ct);
1152 	if (!timeouts)
1153 		timeouts = tn->timeouts;
1154 
1155 	if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1156 	    timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1157 		timeout = timeouts[TCP_CONNTRACK_RETRANS];
1158 	else if (unlikely(index == TCP_RST_SET))
1159 		timeout = timeouts[TCP_CONNTRACK_CLOSE];
1160 	else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1161 		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1162 		 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1163 		timeout = timeouts[TCP_CONNTRACK_UNACK];
1164 	else if (ct->proto.tcp.last_win == 0 &&
1165 		 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1166 		timeout = timeouts[TCP_CONNTRACK_RETRANS];
1167 	else
1168 		timeout = timeouts[new_state];
1169 	spin_unlock_bh(&ct->lock);
1170 
1171 	if (new_state != old_state)
1172 		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1173 
1174 	if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1175 		/* If only reply is a RST, we can consider ourselves not to
1176 		   have an established connection: this is a fairly common
1177 		   problem case, so we can delete the conntrack
1178 		   immediately.  --RR */
1179 		if (th->rst) {
1180 			nf_ct_kill_acct(ct, ctinfo, skb);
1181 			return NF_ACCEPT;
1182 		}
1183 
1184 		if (index == TCP_SYN_SET && old_state == TCP_CONNTRACK_SYN_SENT) {
1185 			/* do not renew timeout on SYN retransmit.
1186 			 *
1187 			 * Else port reuse by client or NAT middlebox can keep
1188 			 * entry alive indefinitely (including nat info).
1189 			 */
1190 			return NF_ACCEPT;
1191 		}
1192 
1193 		/* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1194 		 * pickup with loose=1. Avoid large ESTABLISHED timeout.
1195 		 */
1196 		if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1197 		    timeout > timeouts[TCP_CONNTRACK_UNACK])
1198 			timeout = timeouts[TCP_CONNTRACK_UNACK];
1199 	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1200 		   && (old_state == TCP_CONNTRACK_SYN_RECV
1201 		       || old_state == TCP_CONNTRACK_ESTABLISHED)
1202 		   && new_state == TCP_CONNTRACK_ESTABLISHED) {
1203 		/* Set ASSURED if we see valid ack in ESTABLISHED
1204 		   after SYN_RECV or a valid answer for a picked up
1205 		   connection. */
1206 		set_bit(IPS_ASSURED_BIT, &ct->status);
1207 		nf_conntrack_event_cache(IPCT_ASSURED, ct);
1208 	}
1209 	nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1210 
1211 	return NF_ACCEPT;
1212 }
1213 
1214 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1215 
1216 #include <linux/netfilter/nfnetlink.h>
1217 #include <linux/netfilter/nfnetlink_conntrack.h>
1218 
tcp_to_nlattr(struct sk_buff * skb,struct nlattr * nla,struct nf_conn * ct,bool destroy)1219 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1220 			 struct nf_conn *ct, bool destroy)
1221 {
1222 	struct nlattr *nest_parms;
1223 	struct nf_ct_tcp_flags tmp = {};
1224 
1225 	spin_lock_bh(&ct->lock);
1226 	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP);
1227 	if (!nest_parms)
1228 		goto nla_put_failure;
1229 
1230 	if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state))
1231 		goto nla_put_failure;
1232 
1233 	if (destroy)
1234 		goto skip_state;
1235 
1236 	if (nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1237 		       ct->proto.tcp.seen[0].td_scale) ||
1238 	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1239 		       ct->proto.tcp.seen[1].td_scale))
1240 		goto nla_put_failure;
1241 
1242 	tmp.flags = ct->proto.tcp.seen[0].flags;
1243 	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1244 		    sizeof(struct nf_ct_tcp_flags), &tmp))
1245 		goto nla_put_failure;
1246 
1247 	tmp.flags = ct->proto.tcp.seen[1].flags;
1248 	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1249 		    sizeof(struct nf_ct_tcp_flags), &tmp))
1250 		goto nla_put_failure;
1251 skip_state:
1252 	spin_unlock_bh(&ct->lock);
1253 	nla_nest_end(skb, nest_parms);
1254 
1255 	return 0;
1256 
1257 nla_put_failure:
1258 	spin_unlock_bh(&ct->lock);
1259 	return -1;
1260 }
1261 
1262 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1263 	[CTA_PROTOINFO_TCP_STATE]	    = { .type = NLA_U8 },
1264 	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1265 	[CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1266 	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1267 	[CTA_PROTOINFO_TCP_FLAGS_REPLY]	    = { .len = sizeof(struct nf_ct_tcp_flags) },
1268 };
1269 
1270 #define TCP_NLATTR_SIZE	( \
1271 	NLA_ALIGN(NLA_HDRLEN + 1) + \
1272 	NLA_ALIGN(NLA_HDRLEN + 1) + \
1273 	NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)) + \
1274 	NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)))
1275 
nlattr_to_tcp(struct nlattr * cda[],struct nf_conn * ct)1276 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1277 {
1278 	struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1279 	struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1280 	int err;
1281 
1282 	/* updates could not contain anything about the private
1283 	 * protocol info, in that case skip the parsing */
1284 	if (!pattr)
1285 		return 0;
1286 
1287 	err = nla_parse_nested_deprecated(tb, CTA_PROTOINFO_TCP_MAX, pattr,
1288 					  tcp_nla_policy, NULL);
1289 	if (err < 0)
1290 		return err;
1291 
1292 	if (tb[CTA_PROTOINFO_TCP_STATE] &&
1293 	    nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1294 		return -EINVAL;
1295 
1296 	spin_lock_bh(&ct->lock);
1297 	if (tb[CTA_PROTOINFO_TCP_STATE])
1298 		ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1299 
1300 	if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1301 		struct nf_ct_tcp_flags *attr =
1302 			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1303 		ct->proto.tcp.seen[0].flags &= ~attr->mask;
1304 		ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1305 	}
1306 
1307 	if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1308 		struct nf_ct_tcp_flags *attr =
1309 			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1310 		ct->proto.tcp.seen[1].flags &= ~attr->mask;
1311 		ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1312 	}
1313 
1314 	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1315 	    tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1316 	    ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1317 	    ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1318 		ct->proto.tcp.seen[0].td_scale =
1319 			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1320 		ct->proto.tcp.seen[1].td_scale =
1321 			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1322 	}
1323 	spin_unlock_bh(&ct->lock);
1324 
1325 	return 0;
1326 }
1327 
tcp_nlattr_tuple_size(void)1328 static unsigned int tcp_nlattr_tuple_size(void)
1329 {
1330 	static unsigned int size __read_mostly;
1331 
1332 	if (!size)
1333 		size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1334 
1335 	return size;
1336 }
1337 #endif
1338 
1339 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1340 
1341 #include <linux/netfilter/nfnetlink.h>
1342 #include <linux/netfilter/nfnetlink_cttimeout.h>
1343 
tcp_timeout_nlattr_to_obj(struct nlattr * tb[],struct net * net,void * data)1344 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1345 				     struct net *net, void *data)
1346 {
1347 	struct nf_tcp_net *tn = nf_tcp_pernet(net);
1348 	unsigned int *timeouts = data;
1349 	int i;
1350 
1351 	if (!timeouts)
1352 		timeouts = tn->timeouts;
1353 	/* set default TCP timeouts. */
1354 	for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1355 		timeouts[i] = tn->timeouts[i];
1356 
1357 	if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1358 		timeouts[TCP_CONNTRACK_SYN_SENT] =
1359 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1360 	}
1361 
1362 	if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1363 		timeouts[TCP_CONNTRACK_SYN_RECV] =
1364 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1365 	}
1366 	if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1367 		timeouts[TCP_CONNTRACK_ESTABLISHED] =
1368 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1369 	}
1370 	if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1371 		timeouts[TCP_CONNTRACK_FIN_WAIT] =
1372 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1373 	}
1374 	if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1375 		timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1376 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1377 	}
1378 	if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1379 		timeouts[TCP_CONNTRACK_LAST_ACK] =
1380 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1381 	}
1382 	if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1383 		timeouts[TCP_CONNTRACK_TIME_WAIT] =
1384 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1385 	}
1386 	if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1387 		timeouts[TCP_CONNTRACK_CLOSE] =
1388 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1389 	}
1390 	if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1391 		timeouts[TCP_CONNTRACK_SYN_SENT2] =
1392 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1393 	}
1394 	if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1395 		timeouts[TCP_CONNTRACK_RETRANS] =
1396 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1397 	}
1398 	if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1399 		timeouts[TCP_CONNTRACK_UNACK] =
1400 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1401 	}
1402 
1403 	timeouts[CTA_TIMEOUT_TCP_UNSPEC] = timeouts[CTA_TIMEOUT_TCP_SYN_SENT];
1404 	return 0;
1405 }
1406 
1407 static int
tcp_timeout_obj_to_nlattr(struct sk_buff * skb,const void * data)1408 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1409 {
1410 	const unsigned int *timeouts = data;
1411 
1412 	if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1413 			htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1414 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1415 			 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1416 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1417 			 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1418 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1419 			 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1420 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1421 			 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1422 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1423 			 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1424 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1425 			 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1426 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1427 			 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1428 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1429 			 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1430 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1431 			 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1432 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1433 			 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1434 		goto nla_put_failure;
1435 	return 0;
1436 
1437 nla_put_failure:
1438 	return -ENOSPC;
1439 }
1440 
1441 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1442 	[CTA_TIMEOUT_TCP_SYN_SENT]	= { .type = NLA_U32 },
1443 	[CTA_TIMEOUT_TCP_SYN_RECV]	= { .type = NLA_U32 },
1444 	[CTA_TIMEOUT_TCP_ESTABLISHED]	= { .type = NLA_U32 },
1445 	[CTA_TIMEOUT_TCP_FIN_WAIT]	= { .type = NLA_U32 },
1446 	[CTA_TIMEOUT_TCP_CLOSE_WAIT]	= { .type = NLA_U32 },
1447 	[CTA_TIMEOUT_TCP_LAST_ACK]	= { .type = NLA_U32 },
1448 	[CTA_TIMEOUT_TCP_TIME_WAIT]	= { .type = NLA_U32 },
1449 	[CTA_TIMEOUT_TCP_CLOSE]		= { .type = NLA_U32 },
1450 	[CTA_TIMEOUT_TCP_SYN_SENT2]	= { .type = NLA_U32 },
1451 	[CTA_TIMEOUT_TCP_RETRANS]	= { .type = NLA_U32 },
1452 	[CTA_TIMEOUT_TCP_UNACK]		= { .type = NLA_U32 },
1453 };
1454 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1455 
nf_conntrack_tcp_init_net(struct net * net)1456 void nf_conntrack_tcp_init_net(struct net *net)
1457 {
1458 	struct nf_tcp_net *tn = nf_tcp_pernet(net);
1459 	int i;
1460 
1461 	for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1462 		tn->timeouts[i] = tcp_timeouts[i];
1463 
1464 	/* timeouts[0] is unused, make it same as SYN_SENT so
1465 	 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
1466 	 */
1467 	tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
1468 
1469 	/* If it is set to zero, we disable picking up already established
1470 	 * connections.
1471 	 */
1472 	tn->tcp_loose = 1;
1473 
1474 	/* "Be conservative in what you do,
1475 	 *  be liberal in what you accept from others."
1476 	 * If it's non-zero, we mark only out of window RST segments as INVALID.
1477 	 */
1478 	tn->tcp_be_liberal = 0;
1479 
1480 	/* If it's non-zero, we turn off RST sequence number check */
1481 	tn->tcp_ignore_invalid_rst = 0;
1482 
1483 	/* Max number of the retransmitted packets without receiving an (acceptable)
1484 	 * ACK from the destination. If this number is reached, a shorter timer
1485 	 * will be started.
1486 	 */
1487 	tn->tcp_max_retrans = 3;
1488 
1489 #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
1490 	tn->offload_timeout = 30 * HZ;
1491 #endif
1492 }
1493 
1494 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
1495 {
1496 	.l4proto 		= IPPROTO_TCP,
1497 #ifdef CONFIG_NF_CONNTRACK_PROCFS
1498 	.print_conntrack 	= tcp_print_conntrack,
1499 #endif
1500 	.can_early_drop		= tcp_can_early_drop,
1501 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1502 	.to_nlattr		= tcp_to_nlattr,
1503 	.from_nlattr		= nlattr_to_tcp,
1504 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
1505 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1506 	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1507 	.nlattr_size		= TCP_NLATTR_SIZE,
1508 	.nla_policy		= nf_ct_port_nla_policy,
1509 #endif
1510 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1511 	.ctnl_timeout		= {
1512 		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
1513 		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
1514 		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
1515 		.obj_size	= sizeof(unsigned int) *
1516 					TCP_CONNTRACK_TIMEOUT_MAX,
1517 		.nla_policy	= tcp_timeout_nla_policy,
1518 	},
1519 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1520 };
1521