1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		Implementation of the Transmission Control Protocol(TCP).
7  *
8  * Version:	$Id: tcp_timer.c,v 1.87 2001/09/21 21:27:34 davem Exp $
9  *
10  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *		Corey Minyard <wf-rch!minyard@relay.EU.net>
14  *		Florian La Roche, <flla@stud.uni-sb.de>
15  *		Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
16  *		Linus Torvalds, <torvalds@cs.helsinki.fi>
17  *		Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *		Matthew Dillon, <dillon@apollo.west.oic.com>
19  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
20  *		Jorge Cwik, <jorge@laser.satlink.net>
21  */
22 
23 #include <net/tcp.h>
24 
25 int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
26 int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
27 int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
28 int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
29 int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
30 int sysctl_tcp_retries1 = TCP_RETR1;
31 int sysctl_tcp_retries2 = TCP_RETR2;
32 int sysctl_tcp_orphan_retries;
33 
34 static void tcp_write_timer(unsigned long);
35 static void tcp_delack_timer(unsigned long);
36 static void tcp_keepalive_timer (unsigned long data);
37 
38 const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
39 
40 /*
41  * Using different timers for retransmit, delayed acks and probes
42  * We may wish use just one timer maintaining a list of expire jiffies
43  * to optimize.
44  */
45 
tcp_init_xmit_timers(struct sock * sk)46 void tcp_init_xmit_timers(struct sock *sk)
47 {
48 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
49 
50 	init_timer(&tp->retransmit_timer);
51 	tp->retransmit_timer.function=&tcp_write_timer;
52 	tp->retransmit_timer.data = (unsigned long) sk;
53 	tp->pending = 0;
54 
55 	init_timer(&tp->delack_timer);
56 	tp->delack_timer.function=&tcp_delack_timer;
57 	tp->delack_timer.data = (unsigned long) sk;
58 	tp->ack.pending = 0;
59 
60 	init_timer(&sk->timer);
61 	sk->timer.function=&tcp_keepalive_timer;
62 	sk->timer.data = (unsigned long) sk;
63 }
64 
tcp_clear_xmit_timers(struct sock * sk)65 void tcp_clear_xmit_timers(struct sock *sk)
66 {
67 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
68 
69 	tp->pending = 0;
70 	if (timer_pending(&tp->retransmit_timer) &&
71 	    del_timer(&tp->retransmit_timer))
72 		__sock_put(sk);
73 
74 	tp->ack.pending = 0;
75 	tp->ack.blocked = 0;
76 	if (timer_pending(&tp->delack_timer) &&
77 	    del_timer(&tp->delack_timer))
78 		__sock_put(sk);
79 
80 	if(timer_pending(&sk->timer) && del_timer(&sk->timer))
81 		__sock_put(sk);
82 }
83 
tcp_write_err(struct sock * sk)84 static void tcp_write_err(struct sock *sk)
85 {
86 	sk->err = sk->err_soft ? : ETIMEDOUT;
87 	sk->error_report(sk);
88 
89 	tcp_done(sk);
90 	NET_INC_STATS_BH(TCPAbortOnTimeout);
91 }
92 
93 /* Do not allow orphaned sockets to eat all our resources.
94  * This is direct violation of TCP specs, but it is required
95  * to prevent DoS attacks. It is called when a retransmission timeout
96  * or zero probe timeout occurs on orphaned socket.
97  *
98  * Criterium is still not confirmed experimentally and may change.
99  * We kill the socket, if:
100  * 1. If number of orphaned sockets exceeds an administratively configured
101  *    limit.
102  * 2. If we have strong memory pressure.
103  */
tcp_out_of_resources(struct sock * sk,int do_reset)104 static int tcp_out_of_resources(struct sock *sk, int do_reset)
105 {
106 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
107 	int orphans = atomic_read(&tcp_orphan_count);
108 
109 	/* If peer does not open window for long time, or did not transmit
110 	 * anything for long time, penalize it. */
111 	if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
112 		orphans <<= 1;
113 
114 	/* If some dubious ICMP arrived, penalize even more. */
115 	if (sk->err_soft)
116 		orphans <<= 1;
117 
118 	if (orphans >= sysctl_tcp_max_orphans ||
119 	    (sk->wmem_queued > SOCK_MIN_SNDBUF &&
120 	     atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
121 		if (net_ratelimit())
122 			printk(KERN_INFO "Out of socket memory\n");
123 
124 		/* Catch exceptional cases, when connection requires reset.
125 		 *      1. Last segment was sent recently. */
126 		if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
127 		    /*  2. Window is closed. */
128 		    (!tp->snd_wnd && !tp->packets_out))
129 			do_reset = 1;
130 		if (do_reset)
131 			tcp_send_active_reset(sk, GFP_ATOMIC);
132 		tcp_done(sk);
133 		NET_INC_STATS_BH(TCPAbortOnMemory);
134 		return 1;
135 	}
136 	return 0;
137 }
138 
139 /* Calculate maximal number or retries on an orphaned socket. */
tcp_orphan_retries(struct sock * sk,int alive)140 static int tcp_orphan_retries(struct sock *sk, int alive)
141 {
142 	int retries = sysctl_tcp_orphan_retries; /* May be zero. */
143 
144 	/* We know from an ICMP that something is wrong. */
145 	if (sk->err_soft && !alive)
146 		retries = 0;
147 
148 	/* However, if socket sent something recently, select some safe
149 	 * number of retries. 8 corresponds to >100 seconds with minimal
150 	 * RTO of 200msec. */
151 	if (retries == 0 && alive)
152 		retries = 8;
153 	return retries;
154 }
155 
156 /* A write timeout has occurred. Process the after effects. */
tcp_write_timeout(struct sock * sk)157 static int tcp_write_timeout(struct sock *sk)
158 {
159 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
160 	int retry_until;
161 
162 	if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
163 		if (tp->retransmits)
164 			dst_negative_advice(&sk->dst_cache);
165 		retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries;
166 	} else {
167 		if (tp->retransmits >= sysctl_tcp_retries1) {
168 			/* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
169 			   hole detection. :-(
170 
171 			   It is place to make it. It is not made. I do not want
172 			   to make it. It is disguisting. It does not work in any
173 			   case. Let me to cite the same draft, which requires for
174 			   us to implement this:
175 
176    "The one security concern raised by this memo is that ICMP black holes
177    are often caused by over-zealous security administrators who block
178    all ICMP messages.  It is vitally important that those who design and
179    deploy security systems understand the impact of strict filtering on
180    upper-layer protocols.  The safest web site in the world is worthless
181    if most TCP implementations cannot transfer data from it.  It would
182    be far nicer to have all of the black holes fixed rather than fixing
183    all of the TCP implementations."
184 
185                            Golden words :-).
186 		   */
187 
188 			dst_negative_advice(&sk->dst_cache);
189 		}
190 
191 		retry_until = sysctl_tcp_retries2;
192 		if (sk->dead) {
193 			int alive = (tp->rto < TCP_RTO_MAX);
194 
195 			retry_until = tcp_orphan_retries(sk, alive);
196 
197 			if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
198 				return 1;
199 		}
200 	}
201 
202 	if (tp->retransmits >= retry_until) {
203 		/* Has it gone just too far? */
204 		tcp_write_err(sk);
205 		return 1;
206 	}
207 	return 0;
208 }
209 
tcp_delack_timer(unsigned long data)210 static void tcp_delack_timer(unsigned long data)
211 {
212 	struct sock *sk = (struct sock*)data;
213 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
214 
215 	bh_lock_sock(sk);
216 	if (sk->lock.users) {
217 		/* Try again later. */
218 		tp->ack.blocked = 1;
219 		NET_INC_STATS_BH(DelayedACKLocked);
220 		if (!mod_timer(&tp->delack_timer, jiffies + TCP_DELACK_MIN))
221 			sock_hold(sk);
222 		goto out_unlock;
223 	}
224 
225 	tcp_mem_reclaim(sk);
226 
227 	if (sk->state == TCP_CLOSE || !(tp->ack.pending&TCP_ACK_TIMER))
228 		goto out;
229 
230 	if ((long)(tp->ack.timeout - jiffies) > 0) {
231 		if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
232 			sock_hold(sk);
233 		goto out;
234 	}
235 	tp->ack.pending &= ~TCP_ACK_TIMER;
236 
237 	if (skb_queue_len(&tp->ucopy.prequeue)) {
238 		struct sk_buff *skb;
239 
240 		net_statistics[smp_processor_id()*2].TCPSchedulerFailed += skb_queue_len(&tp->ucopy.prequeue);
241 
242 		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
243 			sk->backlog_rcv(sk, skb);
244 
245 		tp->ucopy.memory = 0;
246 	}
247 
248 	if (tcp_ack_scheduled(tp)) {
249 		if (!tp->ack.pingpong) {
250 			/* Delayed ACK missed: inflate ATO. */
251 			tp->ack.ato = min(tp->ack.ato << 1, tp->rto);
252 		} else {
253 			/* Delayed ACK missed: leave pingpong mode and
254 			 * deflate ATO.
255 			 */
256 			tp->ack.pingpong = 0;
257 			tp->ack.ato = TCP_ATO_MIN;
258 		}
259 		tcp_send_ack(sk);
260 		NET_INC_STATS_BH(DelayedACKs);
261 	}
262 	TCP_CHECK_TIMER(sk);
263 
264 out:
265 	if (tcp_memory_pressure)
266 		tcp_mem_reclaim(sk);
267 out_unlock:
268 	bh_unlock_sock(sk);
269 	sock_put(sk);
270 }
271 
tcp_probe_timer(struct sock * sk)272 static void tcp_probe_timer(struct sock *sk)
273 {
274 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
275 	int max_probes;
276 
277 	if (tp->packets_out || !tp->send_head) {
278 		tp->probes_out = 0;
279 		return;
280 	}
281 
282 	/* *WARNING* RFC 1122 forbids this
283 	 *
284 	 * It doesn't AFAIK, because we kill the retransmit timer -AK
285 	 *
286 	 * FIXME: We ought not to do it, Solaris 2.5 actually has fixing
287 	 * this behaviour in Solaris down as a bug fix. [AC]
288 	 *
289 	 * Let me to explain. probes_out is zeroed by incoming ACKs
290 	 * even if they advertise zero window. Hence, connection is killed only
291 	 * if we received no ACKs for normal connection timeout. It is not killed
292 	 * only because window stays zero for some time, window may be zero
293 	 * until armageddon and even later. We are in full accordance
294 	 * with RFCs, only probe timer combines both retransmission timeout
295 	 * and probe timeout in one bottle.				--ANK
296 	 */
297 	max_probes = sysctl_tcp_retries2;
298 
299 	if (sk->dead) {
300 		int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
301 
302 		max_probes = tcp_orphan_retries(sk, alive);
303 
304 		if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
305 			return;
306 	}
307 
308 	if (tp->probes_out > max_probes) {
309 		tcp_write_err(sk);
310 	} else {
311 		/* Only send another probe if we didn't close things up. */
312 		tcp_send_probe0(sk);
313 	}
314 }
315 
316 /*
317  *	The TCP retransmit timer.
318  */
319 
tcp_retransmit_timer(struct sock * sk)320 static void tcp_retransmit_timer(struct sock *sk)
321 {
322 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
323 
324 	if (tp->packets_out == 0)
325 		goto out;
326 
327 	BUG_TRAP(!skb_queue_empty(&sk->write_queue));
328 
329 	if (tp->snd_wnd == 0 && !sk->dead &&
330 	    !((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV))) {
331 		/* Receiver dastardly shrinks window. Our retransmits
332 		 * become zero probes, but we should not timeout this
333 		 * connection. If the socket is an orphan, time it out,
334 		 * we cannot allow such beasts to hang infinitely.
335 		 */
336 #ifdef TCP_DEBUG
337 		if (net_ratelimit())
338 			printk(KERN_DEBUG "TCP: Treason uncloaked! Peer %u.%u.%u.%u:%u/%u shrinks window %u:%u. Repaired.\n",
339 			       NIPQUAD(sk->daddr), htons(sk->dport), sk->num,
340 			       tp->snd_una, tp->snd_nxt);
341 #endif
342 		if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) {
343 			tcp_write_err(sk);
344 			goto out;
345 		}
346 		tcp_enter_loss(sk, 0);
347 		tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));
348 		__sk_dst_reset(sk);
349 		goto out_reset_timer;
350 	}
351 
352 	if (tcp_write_timeout(sk))
353 		goto out;
354 
355 	if (tp->retransmits == 0) {
356 		if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
357 			if (tp->sack_ok) {
358 				if (tp->ca_state == TCP_CA_Recovery)
359 					NET_INC_STATS_BH(TCPSackRecoveryFail);
360 				else
361 					NET_INC_STATS_BH(TCPSackFailures);
362 			} else {
363 				if (tp->ca_state == TCP_CA_Recovery)
364 					NET_INC_STATS_BH(TCPRenoRecoveryFail);
365 				else
366 					NET_INC_STATS_BH(TCPRenoFailures);
367 			}
368 		} else if (tp->ca_state == TCP_CA_Loss) {
369 			NET_INC_STATS_BH(TCPLossFailures);
370 		} else {
371 			NET_INC_STATS_BH(TCPTimeouts);
372 		}
373 	}
374 
375 	if (tcp_use_frto(sk)) {
376 		tcp_enter_frto(sk);
377 	} else {
378 		tcp_enter_loss(sk, 0);
379 	}
380 
381 	if (tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)) > 0) {
382 		/* Retransmission failed because of local congestion,
383 		 * do not backoff.
384 		 */
385 		if (!tp->retransmits)
386 			tp->retransmits=1;
387 		tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
388 				     min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
389 		goto out;
390 	}
391 
392 	/* Increase the timeout each time we retransmit.  Note that
393 	 * we do not increase the rtt estimate.  rto is initialized
394 	 * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
395 	 * that doubling rto each time is the least we can get away with.
396 	 * In KA9Q, Karn uses this for the first few times, and then
397 	 * goes to quadratic.  netBSD doubles, but only goes up to *64,
398 	 * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
399 	 * defined in the protocol as the maximum possible RTT.  I guess
400 	 * we'll have to use something other than TCP to talk to the
401 	 * University of Mars.
402 	 *
403 	 * PAWS allows us longer timeouts and large windows, so once
404 	 * implemented ftp to mars will work nicely. We will have to fix
405 	 * the 120 second clamps though!
406 	 */
407 	tp->backoff++;
408 	tp->retransmits++;
409 
410 out_reset_timer:
411 	tp->rto = min(tp->rto << 1, TCP_RTO_MAX);
412 	tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
413 	if (tp->retransmits > sysctl_tcp_retries1)
414 		__sk_dst_reset(sk);
415 
416 out:;
417 }
418 
tcp_write_timer(unsigned long data)419 static void tcp_write_timer(unsigned long data)
420 {
421 	struct sock *sk = (struct sock*)data;
422 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
423 	int event;
424 
425 	bh_lock_sock(sk);
426 	if (sk->lock.users) {
427 		/* Try again later */
428 		if (!mod_timer(&tp->retransmit_timer, jiffies + (HZ/20)))
429 			sock_hold(sk);
430 		goto out_unlock;
431 	}
432 
433 	if (sk->state == TCP_CLOSE || !tp->pending)
434 		goto out;
435 
436 	if ((long)(tp->timeout - jiffies) > 0) {
437 		if (!mod_timer(&tp->retransmit_timer, tp->timeout))
438 			sock_hold(sk);
439 		goto out;
440 	}
441 
442 	event = tp->pending;
443 	tp->pending = 0;
444 
445 	switch (event) {
446 	case TCP_TIME_RETRANS:
447 		tcp_retransmit_timer(sk);
448 		break;
449 	case TCP_TIME_PROBE0:
450 		tcp_probe_timer(sk);
451 		break;
452 	}
453 	TCP_CHECK_TIMER(sk);
454 
455 out:
456 	tcp_mem_reclaim(sk);
457 out_unlock:
458 	bh_unlock_sock(sk);
459 	sock_put(sk);
460 }
461 
462 /*
463  *	Timer for listening sockets
464  */
465 
tcp_synack_timer(struct sock * sk)466 static void tcp_synack_timer(struct sock *sk)
467 {
468 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
469 	struct tcp_listen_opt *lopt = tp->listen_opt;
470 	int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries;
471 	int thresh = max_retries;
472 	unsigned long now = jiffies;
473 	struct open_request **reqp, *req;
474 	int i, budget;
475 
476 	if (lopt == NULL || lopt->qlen == 0)
477 		return;
478 
479 	/* Normally all the openreqs are young and become mature
480 	 * (i.e. converted to established socket) for first timeout.
481 	 * If synack was not acknowledged for 3 seconds, it means
482 	 * one of the following things: synack was lost, ack was lost,
483 	 * rtt is high or nobody planned to ack (i.e. synflood).
484 	 * When server is a bit loaded, queue is populated with old
485 	 * open requests, reducing effective size of queue.
486 	 * When server is well loaded, queue size reduces to zero
487 	 * after several minutes of work. It is not synflood,
488 	 * it is normal operation. The solution is pruning
489 	 * too old entries overriding normal timeout, when
490 	 * situation becomes dangerous.
491 	 *
492 	 * Essentially, we reserve half of room for young
493 	 * embrions; and abort old ones without pity, if old
494 	 * ones are about to clog our table.
495 	 */
496 	if (lopt->qlen>>(lopt->max_qlen_log-1)) {
497 		int young = (lopt->qlen_young<<1);
498 
499 		while (thresh > 2) {
500 			if (lopt->qlen < young)
501 				break;
502 			thresh--;
503 			young <<= 1;
504 		}
505 	}
506 
507 	if (tp->defer_accept)
508 		max_retries = tp->defer_accept;
509 
510 	budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL));
511 	i = lopt->clock_hand;
512 
513 	do {
514 		reqp=&lopt->syn_table[i];
515 		while ((req = *reqp) != NULL) {
516 			if ((long)(now - req->expires) >= 0) {
517 				if ((req->retrans < thresh ||
518 				     (req->acked && req->retrans < max_retries))
519 				    && !req->class->rtx_syn_ack(sk, req, NULL)) {
520 					unsigned long timeo;
521 
522 					if (req->retrans++ == 0)
523 						lopt->qlen_young--;
524 					timeo = min((TCP_TIMEOUT_INIT << req->retrans),
525 						    TCP_RTO_MAX);
526 					req->expires = now + timeo;
527 					reqp = &req->dl_next;
528 					continue;
529 				}
530 
531 				/* Drop this request */
532 				write_lock(&tp->syn_wait_lock);
533 				*reqp = req->dl_next;
534 				write_unlock(&tp->syn_wait_lock);
535 				lopt->qlen--;
536 				if (req->retrans == 0)
537 					lopt->qlen_young--;
538 				tcp_openreq_free(req);
539 				continue;
540 			}
541 			reqp = &req->dl_next;
542 		}
543 
544 		i = (i+1)&(TCP_SYNQ_HSIZE-1);
545 
546 	} while (--budget > 0);
547 
548 	lopt->clock_hand = i;
549 
550 	if (lopt->qlen)
551 		tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL);
552 }
553 
tcp_delete_keepalive_timer(struct sock * sk)554 void tcp_delete_keepalive_timer (struct sock *sk)
555 {
556 	if (timer_pending(&sk->timer) && del_timer (&sk->timer))
557 		__sock_put(sk);
558 }
559 
tcp_reset_keepalive_timer(struct sock * sk,unsigned long len)560 void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len)
561 {
562 	if (!mod_timer(&sk->timer, jiffies+len))
563 		sock_hold(sk);
564 }
565 
tcp_set_keepalive(struct sock * sk,int val)566 void tcp_set_keepalive(struct sock *sk, int val)
567 {
568 	if ((1<<sk->state)&(TCPF_CLOSE|TCPF_LISTEN))
569 		return;
570 
571 	if (val && !sk->keepopen)
572 		tcp_reset_keepalive_timer(sk, keepalive_time_when(&sk->tp_pinfo.af_tcp));
573 	else if (!val)
574 		tcp_delete_keepalive_timer(sk);
575 }
576 
577 
tcp_keepalive_timer(unsigned long data)578 static void tcp_keepalive_timer (unsigned long data)
579 {
580 	struct sock *sk = (struct sock *) data;
581 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
582 	__u32 elapsed;
583 
584 	/* Only process if socket is not in use. */
585 	bh_lock_sock(sk);
586 	if (sk->lock.users) {
587 		/* Try again later. */
588 		tcp_reset_keepalive_timer (sk, HZ/20);
589 		goto out;
590 	}
591 
592 	if (sk->state == TCP_LISTEN) {
593 		tcp_synack_timer(sk);
594 		goto out;
595 	}
596 
597 	if (sk->state == TCP_FIN_WAIT2 && sk->dead) {
598 		if (tp->linger2 >= 0) {
599 			int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN;
600 
601 			if (tmo > 0) {
602 				tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
603 				goto out;
604 			}
605 		}
606 		tcp_send_active_reset(sk, GFP_ATOMIC);
607 		goto death;
608 	}
609 
610 	if (!sk->keepopen || sk->state == TCP_CLOSE)
611 		goto out;
612 
613 	elapsed = keepalive_time_when(tp);
614 
615 	/* It is alive without keepalive 8) */
616 	if (tp->packets_out || tp->send_head)
617 		goto resched;
618 
619 	elapsed = tcp_time_stamp - tp->rcv_tstamp;
620 
621 	if (elapsed >= keepalive_time_when(tp)) {
622 		if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) ||
623 		     (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) {
624 			tcp_send_active_reset(sk, GFP_ATOMIC);
625 			tcp_write_err(sk);
626 			goto out;
627 		}
628 		if (tcp_write_wakeup(sk) <= 0) {
629 			tp->probes_out++;
630 			elapsed = keepalive_intvl_when(tp);
631 		} else {
632 			/* If keepalive was lost due to local congestion,
633 			 * try harder.
634 			 */
635 			elapsed = TCP_RESOURCE_PROBE_INTERVAL;
636 		}
637 	} else {
638 		/* It is tp->rcv_tstamp + keepalive_time_when(tp) */
639 		elapsed = keepalive_time_when(tp) - elapsed;
640 	}
641 
642 	TCP_CHECK_TIMER(sk);
643 	tcp_mem_reclaim(sk);
644 
645 resched:
646 	tcp_reset_keepalive_timer (sk, elapsed);
647 	goto out;
648 
649 death:
650 	tcp_done(sk);
651 
652 out:
653 	bh_unlock_sock(sk);
654 	sock_put(sk);
655 }
656