1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr,
85 						   int l3index)
86 {
87 	return NULL;
88 }
89 #endif
90 
91 /* Helper returning the inet6 address from a given tcp socket.
92  * It can be used in TCP stack instead of inet6_sk(sk).
93  * This avoids a dereference and allow compiler optimizations.
94  * It is a specialized version of inet6_sk_generic().
95  */
96 #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
97 					      struct tcp6_sock, tcp)->inet6)
98 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)99 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
100 {
101 	struct dst_entry *dst = skb_dst(skb);
102 
103 	if (dst && dst_hold_safe(dst)) {
104 		const struct rt6_info *rt = (const struct rt6_info *)dst;
105 
106 		rcu_assign_pointer(sk->sk_rx_dst, dst);
107 		sk->sk_rx_dst_ifindex = skb->skb_iif;
108 		sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
109 	}
110 }
111 
tcp_v6_init_seq(const struct sk_buff * skb)112 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
113 {
114 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
115 				ipv6_hdr(skb)->saddr.s6_addr32,
116 				tcp_hdr(skb)->dest,
117 				tcp_hdr(skb)->source);
118 }
119 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)120 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
121 {
122 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
123 				   ipv6_hdr(skb)->saddr.s6_addr32);
124 }
125 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)126 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
127 			      int addr_len)
128 {
129 	/* This check is replicated from tcp_v6_connect() and intended to
130 	 * prevent BPF program called below from accessing bytes that are out
131 	 * of the bound specified by user in addr_len.
132 	 */
133 	if (addr_len < SIN6_LEN_RFC2133)
134 		return -EINVAL;
135 
136 	sock_owned_by_me(sk);
137 
138 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len);
139 }
140 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)141 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
142 			  int addr_len)
143 {
144 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
145 	struct inet_connection_sock *icsk = inet_csk(sk);
146 	struct in6_addr *saddr = NULL, *final_p, final;
147 	struct inet_timewait_death_row *tcp_death_row;
148 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
149 	struct inet_sock *inet = inet_sk(sk);
150 	struct tcp_sock *tp = tcp_sk(sk);
151 	struct net *net = sock_net(sk);
152 	struct ipv6_txoptions *opt;
153 	struct dst_entry *dst;
154 	struct flowi6 fl6;
155 	int addr_type;
156 	int err;
157 
158 	if (addr_len < SIN6_LEN_RFC2133)
159 		return -EINVAL;
160 
161 	if (usin->sin6_family != AF_INET6)
162 		return -EAFNOSUPPORT;
163 
164 	memset(&fl6, 0, sizeof(fl6));
165 
166 	if (np->sndflow) {
167 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
168 		IP6_ECN_flow_init(fl6.flowlabel);
169 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
170 			struct ip6_flowlabel *flowlabel;
171 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
172 			if (IS_ERR(flowlabel))
173 				return -EINVAL;
174 			fl6_sock_release(flowlabel);
175 		}
176 	}
177 
178 	/*
179 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
180 	 */
181 
182 	if (ipv6_addr_any(&usin->sin6_addr)) {
183 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
184 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
185 					       &usin->sin6_addr);
186 		else
187 			usin->sin6_addr = in6addr_loopback;
188 	}
189 
190 	addr_type = ipv6_addr_type(&usin->sin6_addr);
191 
192 	if (addr_type & IPV6_ADDR_MULTICAST)
193 		return -ENETUNREACH;
194 
195 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
196 		if (addr_len >= sizeof(struct sockaddr_in6) &&
197 		    usin->sin6_scope_id) {
198 			/* If interface is set while binding, indices
199 			 * must coincide.
200 			 */
201 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
202 				return -EINVAL;
203 
204 			sk->sk_bound_dev_if = usin->sin6_scope_id;
205 		}
206 
207 		/* Connect to link-local address requires an interface */
208 		if (!sk->sk_bound_dev_if)
209 			return -EINVAL;
210 	}
211 
212 	if (tp->rx_opt.ts_recent_stamp &&
213 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
214 		tp->rx_opt.ts_recent = 0;
215 		tp->rx_opt.ts_recent_stamp = 0;
216 		WRITE_ONCE(tp->write_seq, 0);
217 	}
218 
219 	sk->sk_v6_daddr = usin->sin6_addr;
220 	np->flow_label = fl6.flowlabel;
221 
222 	/*
223 	 *	TCP over IPv4
224 	 */
225 
226 	if (addr_type & IPV6_ADDR_MAPPED) {
227 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
228 		struct sockaddr_in sin;
229 
230 		if (ipv6_only_sock(sk))
231 			return -ENETUNREACH;
232 
233 		sin.sin_family = AF_INET;
234 		sin.sin_port = usin->sin6_port;
235 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
236 
237 		/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
238 		WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
239 		if (sk_is_mptcp(sk))
240 			mptcpv6_handle_mapped(sk, true);
241 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
242 #ifdef CONFIG_TCP_MD5SIG
243 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
244 #endif
245 
246 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
247 
248 		if (err) {
249 			icsk->icsk_ext_hdr_len = exthdrlen;
250 			/* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
251 			WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
252 			if (sk_is_mptcp(sk))
253 				mptcpv6_handle_mapped(sk, false);
254 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
255 #ifdef CONFIG_TCP_MD5SIG
256 			tp->af_specific = &tcp_sock_ipv6_specific;
257 #endif
258 			goto failure;
259 		}
260 		np->saddr = sk->sk_v6_rcv_saddr;
261 
262 		return err;
263 	}
264 
265 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
266 		saddr = &sk->sk_v6_rcv_saddr;
267 
268 	fl6.flowi6_proto = IPPROTO_TCP;
269 	fl6.daddr = sk->sk_v6_daddr;
270 	fl6.saddr = saddr ? *saddr : np->saddr;
271 	fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
272 	fl6.flowi6_oif = sk->sk_bound_dev_if;
273 	fl6.flowi6_mark = sk->sk_mark;
274 	fl6.fl6_dport = usin->sin6_port;
275 	fl6.fl6_sport = inet->inet_sport;
276 	fl6.flowi6_uid = sk->sk_uid;
277 
278 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279 	final_p = fl6_update_dst(&fl6, opt, &final);
280 
281 	security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282 
283 	dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
284 	if (IS_ERR(dst)) {
285 		err = PTR_ERR(dst);
286 		goto failure;
287 	}
288 
289 	tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
290 
291 	if (!saddr) {
292 		saddr = &fl6.saddr;
293 
294 		err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
295 		if (err)
296 			goto failure;
297 	}
298 
299 	/* set the source address */
300 	np->saddr = *saddr;
301 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
302 
303 	sk->sk_gso_type = SKB_GSO_TCPV6;
304 	ip6_dst_store(sk, dst, NULL, NULL);
305 
306 	icsk->icsk_ext_hdr_len = 0;
307 	if (opt)
308 		icsk->icsk_ext_hdr_len = opt->opt_flen +
309 					 opt->opt_nflen;
310 
311 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
312 
313 	inet->inet_dport = usin->sin6_port;
314 
315 	tcp_set_state(sk, TCP_SYN_SENT);
316 	err = inet6_hash_connect(tcp_death_row, sk);
317 	if (err)
318 		goto late_failure;
319 
320 	sk_set_txhash(sk);
321 
322 	if (likely(!tp->repair)) {
323 		if (!tp->write_seq)
324 			WRITE_ONCE(tp->write_seq,
325 				   secure_tcpv6_seq(np->saddr.s6_addr32,
326 						    sk->sk_v6_daddr.s6_addr32,
327 						    inet->inet_sport,
328 						    inet->inet_dport));
329 		tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
330 						   sk->sk_v6_daddr.s6_addr32);
331 	}
332 
333 	if (tcp_fastopen_defer_connect(sk, &err))
334 		return err;
335 	if (err)
336 		goto late_failure;
337 
338 	err = tcp_connect(sk);
339 	if (err)
340 		goto late_failure;
341 
342 	return 0;
343 
344 late_failure:
345 	tcp_set_state(sk, TCP_CLOSE);
346 	inet_bhash2_reset_saddr(sk);
347 failure:
348 	inet->inet_dport = 0;
349 	sk->sk_route_caps = 0;
350 	return err;
351 }
352 
tcp_v6_mtu_reduced(struct sock * sk)353 static void tcp_v6_mtu_reduced(struct sock *sk)
354 {
355 	struct dst_entry *dst;
356 	u32 mtu;
357 
358 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
359 		return;
360 
361 	mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
362 
363 	/* Drop requests trying to increase our current mss.
364 	 * Check done in __ip6_rt_update_pmtu() is too late.
365 	 */
366 	if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
367 		return;
368 
369 	dst = inet6_csk_update_pmtu(sk, mtu);
370 	if (!dst)
371 		return;
372 
373 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
374 		tcp_sync_mss(sk, dst_mtu(dst));
375 		tcp_simple_retransmit(sk);
376 	}
377 }
378 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)379 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
380 		u8 type, u8 code, int offset, __be32 info)
381 {
382 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
383 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
384 	struct net *net = dev_net(skb->dev);
385 	struct request_sock *fastopen;
386 	struct ipv6_pinfo *np;
387 	struct tcp_sock *tp;
388 	__u32 seq, snd_una;
389 	struct sock *sk;
390 	bool fatal;
391 	int err;
392 
393 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
394 					&hdr->daddr, th->dest,
395 					&hdr->saddr, ntohs(th->source),
396 					skb->dev->ifindex, inet6_sdif(skb));
397 
398 	if (!sk) {
399 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
400 				  ICMP6_MIB_INERRORS);
401 		return -ENOENT;
402 	}
403 
404 	if (sk->sk_state == TCP_TIME_WAIT) {
405 		inet_twsk_put(inet_twsk(sk));
406 		return 0;
407 	}
408 	seq = ntohl(th->seq);
409 	fatal = icmpv6_err_convert(type, code, &err);
410 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
411 		tcp_req_err(sk, seq, fatal);
412 		return 0;
413 	}
414 
415 	bh_lock_sock(sk);
416 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
417 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
418 
419 	if (sk->sk_state == TCP_CLOSE)
420 		goto out;
421 
422 	if (static_branch_unlikely(&ip6_min_hopcount)) {
423 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
424 		if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
425 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
426 			goto out;
427 		}
428 	}
429 
430 	tp = tcp_sk(sk);
431 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
432 	fastopen = rcu_dereference(tp->fastopen_rsk);
433 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
434 	if (sk->sk_state != TCP_LISTEN &&
435 	    !between(seq, snd_una, tp->snd_nxt)) {
436 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
437 		goto out;
438 	}
439 
440 	np = tcp_inet6_sk(sk);
441 
442 	if (type == NDISC_REDIRECT) {
443 		if (!sock_owned_by_user(sk)) {
444 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
445 
446 			if (dst)
447 				dst->ops->redirect(dst, sk, skb);
448 		}
449 		goto out;
450 	}
451 
452 	if (type == ICMPV6_PKT_TOOBIG) {
453 		u32 mtu = ntohl(info);
454 
455 		/* We are not interested in TCP_LISTEN and open_requests
456 		 * (SYN-ACKs send out by Linux are always <576bytes so
457 		 * they should go through unfragmented).
458 		 */
459 		if (sk->sk_state == TCP_LISTEN)
460 			goto out;
461 
462 		if (!ip6_sk_accept_pmtu(sk))
463 			goto out;
464 
465 		if (mtu < IPV6_MIN_MTU)
466 			goto out;
467 
468 		WRITE_ONCE(tp->mtu_info, mtu);
469 
470 		if (!sock_owned_by_user(sk))
471 			tcp_v6_mtu_reduced(sk);
472 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
473 					   &sk->sk_tsq_flags))
474 			sock_hold(sk);
475 		goto out;
476 	}
477 
478 
479 	/* Might be for an request_sock */
480 	switch (sk->sk_state) {
481 	case TCP_SYN_SENT:
482 	case TCP_SYN_RECV:
483 		/* Only in fast or simultaneous open. If a fast open socket is
484 		 * already accepted it is treated as a connected one below.
485 		 */
486 		if (fastopen && !fastopen->sk)
487 			break;
488 
489 		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
490 
491 		if (!sock_owned_by_user(sk)) {
492 			WRITE_ONCE(sk->sk_err, err);
493 			sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
494 
495 			tcp_done(sk);
496 		} else {
497 			WRITE_ONCE(sk->sk_err_soft, err);
498 		}
499 		goto out;
500 	case TCP_LISTEN:
501 		break;
502 	default:
503 		/* check if this ICMP message allows revert of backoff.
504 		 * (see RFC 6069)
505 		 */
506 		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
507 		    code == ICMPV6_NOROUTE)
508 			tcp_ld_RTO_revert(sk, seq);
509 	}
510 
511 	if (!sock_owned_by_user(sk) && np->recverr) {
512 		WRITE_ONCE(sk->sk_err, err);
513 		sk_error_report(sk);
514 	} else {
515 		WRITE_ONCE(sk->sk_err_soft, err);
516 	}
517 out:
518 	bh_unlock_sock(sk);
519 	sock_put(sk);
520 	return 0;
521 }
522 
523 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)524 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
525 			      struct flowi *fl,
526 			      struct request_sock *req,
527 			      struct tcp_fastopen_cookie *foc,
528 			      enum tcp_synack_type synack_type,
529 			      struct sk_buff *syn_skb)
530 {
531 	struct inet_request_sock *ireq = inet_rsk(req);
532 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
533 	struct ipv6_txoptions *opt;
534 	struct flowi6 *fl6 = &fl->u.ip6;
535 	struct sk_buff *skb;
536 	int err = -ENOMEM;
537 	u8 tclass;
538 
539 	/* First, grab a route. */
540 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
541 					       IPPROTO_TCP)) == NULL)
542 		goto done;
543 
544 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
545 
546 	if (skb) {
547 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
548 				    &ireq->ir_v6_rmt_addr);
549 
550 		fl6->daddr = ireq->ir_v6_rmt_addr;
551 		if (np->repflow && ireq->pktopts)
552 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
553 
554 		tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
555 				(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
556 				(np->tclass & INET_ECN_MASK) :
557 				np->tclass;
558 
559 		if (!INET_ECN_is_capable(tclass) &&
560 		    tcp_bpf_ca_needs_ecn((struct sock *)req))
561 			tclass |= INET_ECN_ECT_0;
562 
563 		rcu_read_lock();
564 		opt = ireq->ipv6_opt;
565 		if (!opt)
566 			opt = rcu_dereference(np->opt);
567 		err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
568 			       opt, tclass, sk->sk_priority);
569 		rcu_read_unlock();
570 		err = net_xmit_eval(err);
571 	}
572 
573 done:
574 	return err;
575 }
576 
577 
tcp_v6_reqsk_destructor(struct request_sock * req)578 static void tcp_v6_reqsk_destructor(struct request_sock *req)
579 {
580 	kfree(inet_rsk(req)->ipv6_opt);
581 	consume_skb(inet_rsk(req)->pktopts);
582 }
583 
584 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)585 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
586 						   const struct in6_addr *addr,
587 						   int l3index)
588 {
589 	return tcp_md5_do_lookup(sk, l3index,
590 				 (union tcp_md5_addr *)addr, AF_INET6);
591 }
592 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)593 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
594 						const struct sock *addr_sk)
595 {
596 	int l3index;
597 
598 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
599 						 addr_sk->sk_bound_dev_if);
600 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
601 				    l3index);
602 }
603 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)604 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
605 				 sockptr_t optval, int optlen)
606 {
607 	struct tcp_md5sig cmd;
608 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
609 	int l3index = 0;
610 	u8 prefixlen;
611 	u8 flags;
612 
613 	if (optlen < sizeof(cmd))
614 		return -EINVAL;
615 
616 	if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
617 		return -EFAULT;
618 
619 	if (sin6->sin6_family != AF_INET6)
620 		return -EINVAL;
621 
622 	flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
623 
624 	if (optname == TCP_MD5SIG_EXT &&
625 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
626 		prefixlen = cmd.tcpm_prefixlen;
627 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
628 					prefixlen > 32))
629 			return -EINVAL;
630 	} else {
631 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
632 	}
633 
634 	if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
635 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
636 		struct net_device *dev;
637 
638 		rcu_read_lock();
639 		dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
640 		if (dev && netif_is_l3_master(dev))
641 			l3index = dev->ifindex;
642 		rcu_read_unlock();
643 
644 		/* ok to reference set/not set outside of rcu;
645 		 * right now device MUST be an L3 master
646 		 */
647 		if (!dev || !l3index)
648 			return -EINVAL;
649 	}
650 
651 	if (!cmd.tcpm_keylen) {
652 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
653 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
654 					      AF_INET, prefixlen,
655 					      l3index, flags);
656 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
657 				      AF_INET6, prefixlen, l3index, flags);
658 	}
659 
660 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
661 		return -EINVAL;
662 
663 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
664 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
665 				      AF_INET, prefixlen, l3index, flags,
666 				      cmd.tcpm_key, cmd.tcpm_keylen);
667 
668 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
669 			      AF_INET6, prefixlen, l3index, flags,
670 			      cmd.tcpm_key, cmd.tcpm_keylen);
671 }
672 
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)673 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
674 				   const struct in6_addr *daddr,
675 				   const struct in6_addr *saddr,
676 				   const struct tcphdr *th, int nbytes)
677 {
678 	struct tcp6_pseudohdr *bp;
679 	struct scatterlist sg;
680 	struct tcphdr *_th;
681 
682 	bp = hp->scratch;
683 	/* 1. TCP pseudo-header (RFC2460) */
684 	bp->saddr = *saddr;
685 	bp->daddr = *daddr;
686 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
687 	bp->len = cpu_to_be32(nbytes);
688 
689 	_th = (struct tcphdr *)(bp + 1);
690 	memcpy(_th, th, sizeof(*th));
691 	_th->check = 0;
692 
693 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
694 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
695 				sizeof(*bp) + sizeof(*th));
696 	return crypto_ahash_update(hp->md5_req);
697 }
698 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)699 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
700 			       const struct in6_addr *daddr, struct in6_addr *saddr,
701 			       const struct tcphdr *th)
702 {
703 	struct tcp_md5sig_pool *hp;
704 	struct ahash_request *req;
705 
706 	hp = tcp_get_md5sig_pool();
707 	if (!hp)
708 		goto clear_hash_noput;
709 	req = hp->md5_req;
710 
711 	if (crypto_ahash_init(req))
712 		goto clear_hash;
713 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
714 		goto clear_hash;
715 	if (tcp_md5_hash_key(hp, key))
716 		goto clear_hash;
717 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
718 	if (crypto_ahash_final(req))
719 		goto clear_hash;
720 
721 	tcp_put_md5sig_pool();
722 	return 0;
723 
724 clear_hash:
725 	tcp_put_md5sig_pool();
726 clear_hash_noput:
727 	memset(md5_hash, 0, 16);
728 	return 1;
729 }
730 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)731 static int tcp_v6_md5_hash_skb(char *md5_hash,
732 			       const struct tcp_md5sig_key *key,
733 			       const struct sock *sk,
734 			       const struct sk_buff *skb)
735 {
736 	const struct in6_addr *saddr, *daddr;
737 	struct tcp_md5sig_pool *hp;
738 	struct ahash_request *req;
739 	const struct tcphdr *th = tcp_hdr(skb);
740 
741 	if (sk) { /* valid for establish/request sockets */
742 		saddr = &sk->sk_v6_rcv_saddr;
743 		daddr = &sk->sk_v6_daddr;
744 	} else {
745 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
746 		saddr = &ip6h->saddr;
747 		daddr = &ip6h->daddr;
748 	}
749 
750 	hp = tcp_get_md5sig_pool();
751 	if (!hp)
752 		goto clear_hash_noput;
753 	req = hp->md5_req;
754 
755 	if (crypto_ahash_init(req))
756 		goto clear_hash;
757 
758 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
759 		goto clear_hash;
760 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
761 		goto clear_hash;
762 	if (tcp_md5_hash_key(hp, key))
763 		goto clear_hash;
764 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
765 	if (crypto_ahash_final(req))
766 		goto clear_hash;
767 
768 	tcp_put_md5sig_pool();
769 	return 0;
770 
771 clear_hash:
772 	tcp_put_md5sig_pool();
773 clear_hash_noput:
774 	memset(md5_hash, 0, 16);
775 	return 1;
776 }
777 
778 #endif
779 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)780 static void tcp_v6_init_req(struct request_sock *req,
781 			    const struct sock *sk_listener,
782 			    struct sk_buff *skb)
783 {
784 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
785 	struct inet_request_sock *ireq = inet_rsk(req);
786 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
787 
788 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
789 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
790 
791 	/* So that link locals have meaning */
792 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
793 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
794 		ireq->ir_iif = tcp_v6_iif(skb);
795 
796 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
797 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
798 	     np->rxopt.bits.rxinfo ||
799 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
800 	     np->rxopt.bits.rxohlim || np->repflow)) {
801 		refcount_inc(&skb->users);
802 		ireq->pktopts = skb;
803 	}
804 }
805 
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req)806 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
807 					  struct sk_buff *skb,
808 					  struct flowi *fl,
809 					  struct request_sock *req)
810 {
811 	tcp_v6_init_req(req, sk, skb);
812 
813 	if (security_inet_conn_request(sk, skb, req))
814 		return NULL;
815 
816 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
817 }
818 
819 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
820 	.family		=	AF_INET6,
821 	.obj_size	=	sizeof(struct tcp6_request_sock),
822 	.rtx_syn_ack	=	tcp_rtx_synack,
823 	.send_ack	=	tcp_v6_reqsk_send_ack,
824 	.destructor	=	tcp_v6_reqsk_destructor,
825 	.send_reset	=	tcp_v6_send_reset,
826 	.syn_ack_timeout =	tcp_syn_ack_timeout,
827 };
828 
829 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
830 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
831 				sizeof(struct ipv6hdr),
832 #ifdef CONFIG_TCP_MD5SIG
833 	.req_md5_lookup	=	tcp_v6_md5_lookup,
834 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
835 #endif
836 #ifdef CONFIG_SYN_COOKIES
837 	.cookie_init_seq =	cookie_v6_init_sequence,
838 #endif
839 	.route_req	=	tcp_v6_route_req,
840 	.init_seq	=	tcp_v6_init_seq,
841 	.init_ts_off	=	tcp_v6_init_ts_off,
842 	.send_synack	=	tcp_v6_send_synack,
843 };
844 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority,u32 txhash)845 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
846 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
847 				 int oif, struct tcp_md5sig_key *key, int rst,
848 				 u8 tclass, __be32 label, u32 priority, u32 txhash)
849 {
850 	const struct tcphdr *th = tcp_hdr(skb);
851 	struct tcphdr *t1;
852 	struct sk_buff *buff;
853 	struct flowi6 fl6;
854 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
855 	struct sock *ctl_sk = net->ipv6.tcp_sk;
856 	unsigned int tot_len = sizeof(struct tcphdr);
857 	__be32 mrst = 0, *topt;
858 	struct dst_entry *dst;
859 	__u32 mark = 0;
860 
861 	if (tsecr)
862 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
863 #ifdef CONFIG_TCP_MD5SIG
864 	if (key)
865 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
866 #endif
867 
868 #ifdef CONFIG_MPTCP
869 	if (rst && !key) {
870 		mrst = mptcp_reset_option(skb);
871 
872 		if (mrst)
873 			tot_len += sizeof(__be32);
874 	}
875 #endif
876 
877 	buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
878 	if (!buff)
879 		return;
880 
881 	skb_reserve(buff, MAX_TCP_HEADER);
882 
883 	t1 = skb_push(buff, tot_len);
884 	skb_reset_transport_header(buff);
885 
886 	/* Swap the send and the receive. */
887 	memset(t1, 0, sizeof(*t1));
888 	t1->dest = th->source;
889 	t1->source = th->dest;
890 	t1->doff = tot_len / 4;
891 	t1->seq = htonl(seq);
892 	t1->ack_seq = htonl(ack);
893 	t1->ack = !rst || !th->ack;
894 	t1->rst = rst;
895 	t1->window = htons(win);
896 
897 	topt = (__be32 *)(t1 + 1);
898 
899 	if (tsecr) {
900 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
901 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
902 		*topt++ = htonl(tsval);
903 		*topt++ = htonl(tsecr);
904 	}
905 
906 	if (mrst)
907 		*topt++ = mrst;
908 
909 #ifdef CONFIG_TCP_MD5SIG
910 	if (key) {
911 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
912 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
913 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
914 				    &ipv6_hdr(skb)->saddr,
915 				    &ipv6_hdr(skb)->daddr, t1);
916 	}
917 #endif
918 
919 	memset(&fl6, 0, sizeof(fl6));
920 	fl6.daddr = ipv6_hdr(skb)->saddr;
921 	fl6.saddr = ipv6_hdr(skb)->daddr;
922 	fl6.flowlabel = label;
923 
924 	buff->ip_summed = CHECKSUM_PARTIAL;
925 
926 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
927 
928 	fl6.flowi6_proto = IPPROTO_TCP;
929 	if (rt6_need_strict(&fl6.daddr) && !oif)
930 		fl6.flowi6_oif = tcp_v6_iif(skb);
931 	else {
932 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
933 			oif = skb->skb_iif;
934 
935 		fl6.flowi6_oif = oif;
936 	}
937 
938 	if (sk) {
939 		if (sk->sk_state == TCP_TIME_WAIT)
940 			mark = inet_twsk(sk)->tw_mark;
941 		else
942 			mark = READ_ONCE(sk->sk_mark);
943 		skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
944 	}
945 	if (txhash) {
946 		/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
947 		skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
948 	}
949 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
950 	fl6.fl6_dport = t1->dest;
951 	fl6.fl6_sport = t1->source;
952 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
953 	security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
954 
955 	/* Pass a socket to ip6_dst_lookup either it is for RST
956 	 * Underlying function will use this to retrieve the network
957 	 * namespace
958 	 */
959 	if (sk && sk->sk_state != TCP_TIME_WAIT)
960 		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
961 	else
962 		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
963 	if (!IS_ERR(dst)) {
964 		skb_dst_set(buff, dst);
965 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
966 			 tclass & ~INET_ECN_MASK, priority);
967 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
968 		if (rst)
969 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
970 		return;
971 	}
972 
973 	kfree_skb(buff);
974 }
975 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)976 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
977 {
978 	const struct tcphdr *th = tcp_hdr(skb);
979 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
980 	u32 seq = 0, ack_seq = 0;
981 	struct tcp_md5sig_key *key = NULL;
982 #ifdef CONFIG_TCP_MD5SIG
983 	const __u8 *hash_location = NULL;
984 	unsigned char newhash[16];
985 	int genhash;
986 	struct sock *sk1 = NULL;
987 #endif
988 	__be32 label = 0;
989 	u32 priority = 0;
990 	struct net *net;
991 	u32 txhash = 0;
992 	int oif = 0;
993 
994 	if (th->rst)
995 		return;
996 
997 	/* If sk not NULL, it means we did a successful lookup and incoming
998 	 * route had to be correct. prequeue might have dropped our dst.
999 	 */
1000 	if (!sk && !ipv6_unicast_destination(skb))
1001 		return;
1002 
1003 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1004 #ifdef CONFIG_TCP_MD5SIG
1005 	rcu_read_lock();
1006 	hash_location = tcp_parse_md5sig_option(th);
1007 	if (sk && sk_fullsock(sk)) {
1008 		int l3index;
1009 
1010 		/* sdif set, means packet ingressed via a device
1011 		 * in an L3 domain and inet_iif is set to it.
1012 		 */
1013 		l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1014 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1015 	} else if (hash_location) {
1016 		int dif = tcp_v6_iif_l3_slave(skb);
1017 		int sdif = tcp_v6_sdif(skb);
1018 		int l3index;
1019 
1020 		/*
1021 		 * active side is lost. Try to find listening socket through
1022 		 * source port, and then find md5 key through listening socket.
1023 		 * we are not loose security here:
1024 		 * Incoming packet is checked with md5 hash with finding key,
1025 		 * no RST generated if md5 hash doesn't match.
1026 		 */
1027 		sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1028 					    NULL, 0, &ipv6h->saddr, th->source,
1029 					    &ipv6h->daddr, ntohs(th->source),
1030 					    dif, sdif);
1031 		if (!sk1)
1032 			goto out;
1033 
1034 		/* sdif set, means packet ingressed via a device
1035 		 * in an L3 domain and dif is set to it.
1036 		 */
1037 		l3index = tcp_v6_sdif(skb) ? dif : 0;
1038 
1039 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1040 		if (!key)
1041 			goto out;
1042 
1043 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1044 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
1045 			goto out;
1046 	}
1047 #endif
1048 
1049 	if (th->ack)
1050 		seq = ntohl(th->ack_seq);
1051 	else
1052 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1053 			  (th->doff << 2);
1054 
1055 	if (sk) {
1056 		oif = sk->sk_bound_dev_if;
1057 		if (sk_fullsock(sk)) {
1058 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1059 
1060 			trace_tcp_send_reset(sk, skb);
1061 			if (np->repflow)
1062 				label = ip6_flowlabel(ipv6h);
1063 			priority = sk->sk_priority;
1064 			txhash = sk->sk_txhash;
1065 		}
1066 		if (sk->sk_state == TCP_TIME_WAIT) {
1067 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1068 			priority = inet_twsk(sk)->tw_priority;
1069 			txhash = inet_twsk(sk)->tw_txhash;
1070 		}
1071 	} else {
1072 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1073 			label = ip6_flowlabel(ipv6h);
1074 	}
1075 
1076 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1077 			     ipv6_get_dsfield(ipv6h), label, priority, txhash);
1078 
1079 #ifdef CONFIG_TCP_MD5SIG
1080 out:
1081 	rcu_read_unlock();
1082 #endif
1083 }
1084 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority,u32 txhash)1085 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1086 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1087 			    struct tcp_md5sig_key *key, u8 tclass,
1088 			    __be32 label, u32 priority, u32 txhash)
1089 {
1090 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1091 			     tclass, label, priority, txhash);
1092 }
1093 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1094 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1095 {
1096 	struct inet_timewait_sock *tw = inet_twsk(sk);
1097 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1098 
1099 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1100 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1101 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1102 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1103 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
1104 			tw->tw_txhash);
1105 
1106 	inet_twsk_put(tw);
1107 }
1108 
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1109 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1110 				  struct request_sock *req)
1111 {
1112 	int l3index;
1113 
1114 	l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1115 
1116 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1117 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1118 	 */
1119 	/* RFC 7323 2.3
1120 	 * The window field (SEG.WND) of every outgoing segment, with the
1121 	 * exception of <SYN> segments, MUST be right-shifted by
1122 	 * Rcv.Wind.Shift bits:
1123 	 */
1124 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1125 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1126 			tcp_rsk(req)->rcv_nxt,
1127 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1128 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1129 			READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
1130 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1131 			ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1132 			READ_ONCE(sk->sk_priority),
1133 			READ_ONCE(tcp_rsk(req)->txhash));
1134 }
1135 
1136 
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1137 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1138 {
1139 #ifdef CONFIG_SYN_COOKIES
1140 	const struct tcphdr *th = tcp_hdr(skb);
1141 
1142 	if (!th->syn)
1143 		sk = cookie_v6_check(sk, skb);
1144 #endif
1145 	return sk;
1146 }
1147 
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1148 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1149 			 struct tcphdr *th, u32 *cookie)
1150 {
1151 	u16 mss = 0;
1152 #ifdef CONFIG_SYN_COOKIES
1153 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1154 				    &tcp_request_sock_ipv6_ops, sk, th);
1155 	if (mss) {
1156 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1157 		tcp_synq_overflow(sk);
1158 	}
1159 #endif
1160 	return mss;
1161 }
1162 
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1163 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1164 {
1165 	if (skb->protocol == htons(ETH_P_IP))
1166 		return tcp_v4_conn_request(sk, skb);
1167 
1168 	if (!ipv6_unicast_destination(skb))
1169 		goto drop;
1170 
1171 	if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1172 		__IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1173 		return 0;
1174 	}
1175 
1176 	return tcp_conn_request(&tcp6_request_sock_ops,
1177 				&tcp_request_sock_ipv6_ops, sk, skb);
1178 
1179 drop:
1180 	tcp_listendrop(sk);
1181 	return 0; /* don't send reset */
1182 }
1183 
tcp_v6_restore_cb(struct sk_buff * skb)1184 static void tcp_v6_restore_cb(struct sk_buff *skb)
1185 {
1186 	/* We need to move header back to the beginning if xfrm6_policy_check()
1187 	 * and tcp_v6_fill_cb() are going to be called again.
1188 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1189 	 */
1190 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1191 		sizeof(struct inet6_skb_parm));
1192 }
1193 
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1194 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1195 					 struct request_sock *req,
1196 					 struct dst_entry *dst,
1197 					 struct request_sock *req_unhash,
1198 					 bool *own_req)
1199 {
1200 	struct inet_request_sock *ireq;
1201 	struct ipv6_pinfo *newnp;
1202 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1203 	struct ipv6_txoptions *opt;
1204 	struct inet_sock *newinet;
1205 	bool found_dup_sk = false;
1206 	struct tcp_sock *newtp;
1207 	struct sock *newsk;
1208 #ifdef CONFIG_TCP_MD5SIG
1209 	struct tcp_md5sig_key *key;
1210 	int l3index;
1211 #endif
1212 	struct flowi6 fl6;
1213 
1214 	if (skb->protocol == htons(ETH_P_IP)) {
1215 		/*
1216 		 *	v6 mapped
1217 		 */
1218 
1219 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1220 					     req_unhash, own_req);
1221 
1222 		if (!newsk)
1223 			return NULL;
1224 
1225 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1226 
1227 		newnp = tcp_inet6_sk(newsk);
1228 		newtp = tcp_sk(newsk);
1229 
1230 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1231 
1232 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1233 
1234 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1235 		if (sk_is_mptcp(newsk))
1236 			mptcpv6_handle_mapped(newsk, true);
1237 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1238 #ifdef CONFIG_TCP_MD5SIG
1239 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1240 #endif
1241 
1242 		newnp->ipv6_mc_list = NULL;
1243 		newnp->ipv6_ac_list = NULL;
1244 		newnp->ipv6_fl_list = NULL;
1245 		newnp->pktoptions  = NULL;
1246 		newnp->opt	   = NULL;
1247 		newnp->mcast_oif   = inet_iif(skb);
1248 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1249 		newnp->rcv_flowinfo = 0;
1250 		if (np->repflow)
1251 			newnp->flow_label = 0;
1252 
1253 		/*
1254 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1255 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1256 		 * that function for the gory details. -acme
1257 		 */
1258 
1259 		/* It is tricky place. Until this moment IPv4 tcp
1260 		   worked with IPv6 icsk.icsk_af_ops.
1261 		   Sync it now.
1262 		 */
1263 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1264 
1265 		return newsk;
1266 	}
1267 
1268 	ireq = inet_rsk(req);
1269 
1270 	if (sk_acceptq_is_full(sk))
1271 		goto out_overflow;
1272 
1273 	if (!dst) {
1274 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1275 		if (!dst)
1276 			goto out;
1277 	}
1278 
1279 	newsk = tcp_create_openreq_child(sk, req, skb);
1280 	if (!newsk)
1281 		goto out_nonewsk;
1282 
1283 	/*
1284 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1285 	 * count here, tcp_create_openreq_child now does this for us, see the
1286 	 * comment in that function for the gory details. -acme
1287 	 */
1288 
1289 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1290 	ip6_dst_store(newsk, dst, NULL, NULL);
1291 	inet6_sk_rx_dst_set(newsk, skb);
1292 
1293 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1294 
1295 	newtp = tcp_sk(newsk);
1296 	newinet = inet_sk(newsk);
1297 	newnp = tcp_inet6_sk(newsk);
1298 
1299 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1300 
1301 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1302 	newnp->saddr = ireq->ir_v6_loc_addr;
1303 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1304 	newsk->sk_bound_dev_if = ireq->ir_iif;
1305 
1306 	/* Now IPv6 options...
1307 
1308 	   First: no IPv4 options.
1309 	 */
1310 	newinet->inet_opt = NULL;
1311 	newnp->ipv6_mc_list = NULL;
1312 	newnp->ipv6_ac_list = NULL;
1313 	newnp->ipv6_fl_list = NULL;
1314 
1315 	/* Clone RX bits */
1316 	newnp->rxopt.all = np->rxopt.all;
1317 
1318 	newnp->pktoptions = NULL;
1319 	newnp->opt	  = NULL;
1320 	newnp->mcast_oif  = tcp_v6_iif(skb);
1321 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1322 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1323 	if (np->repflow)
1324 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1325 
1326 	/* Set ToS of the new socket based upon the value of incoming SYN.
1327 	 * ECT bits are set later in tcp_init_transfer().
1328 	 */
1329 	if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1330 		newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1331 
1332 	/* Clone native IPv6 options from listening socket (if any)
1333 
1334 	   Yes, keeping reference count would be much more clever,
1335 	   but we make one more one thing there: reattach optmem
1336 	   to newsk.
1337 	 */
1338 	opt = ireq->ipv6_opt;
1339 	if (!opt)
1340 		opt = rcu_dereference(np->opt);
1341 	if (opt) {
1342 		opt = ipv6_dup_options(newsk, opt);
1343 		RCU_INIT_POINTER(newnp->opt, opt);
1344 	}
1345 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1346 	if (opt)
1347 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1348 						    opt->opt_flen;
1349 
1350 	tcp_ca_openreq_child(newsk, dst);
1351 
1352 	tcp_sync_mss(newsk, dst_mtu(dst));
1353 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1354 
1355 	tcp_initialize_rcv_mss(newsk);
1356 
1357 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1358 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1359 
1360 #ifdef CONFIG_TCP_MD5SIG
1361 	l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1362 
1363 	/* Copy over the MD5 key from the original socket */
1364 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1365 	if (key) {
1366 		const union tcp_md5_addr *addr;
1367 
1368 		addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr;
1369 		if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) {
1370 			inet_csk_prepare_forced_close(newsk);
1371 			tcp_done(newsk);
1372 			goto out;
1373 		}
1374 	}
1375 #endif
1376 
1377 	if (__inet_inherit_port(sk, newsk) < 0) {
1378 		inet_csk_prepare_forced_close(newsk);
1379 		tcp_done(newsk);
1380 		goto out;
1381 	}
1382 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1383 				       &found_dup_sk);
1384 	if (*own_req) {
1385 		tcp_move_syn(newtp, req);
1386 
1387 		/* Clone pktoptions received with SYN, if we own the req */
1388 		if (ireq->pktopts) {
1389 			newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
1390 			consume_skb(ireq->pktopts);
1391 			ireq->pktopts = NULL;
1392 			if (newnp->pktoptions)
1393 				tcp_v6_restore_cb(newnp->pktoptions);
1394 		}
1395 	} else {
1396 		if (!req_unhash && found_dup_sk) {
1397 			/* This code path should only be executed in the
1398 			 * syncookie case only
1399 			 */
1400 			bh_unlock_sock(newsk);
1401 			sock_put(newsk);
1402 			newsk = NULL;
1403 		}
1404 	}
1405 
1406 	return newsk;
1407 
1408 out_overflow:
1409 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1410 out_nonewsk:
1411 	dst_release(dst);
1412 out:
1413 	tcp_listendrop(sk);
1414 	return NULL;
1415 }
1416 
1417 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1418 							   u32));
1419 /* The socket must have it's spinlock held when we get
1420  * here, unless it is a TCP_LISTEN socket.
1421  *
1422  * We have a potential double-lock case here, so even when
1423  * doing backlog processing we use the BH locking scheme.
1424  * This is because we cannot sleep with the original spinlock
1425  * held.
1426  */
1427 INDIRECT_CALLABLE_SCOPE
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1428 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1429 {
1430 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1431 	struct sk_buff *opt_skb = NULL;
1432 	enum skb_drop_reason reason;
1433 	struct tcp_sock *tp;
1434 
1435 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1436 	   goes to IPv4 receive handler and backlogged.
1437 	   From backlog it always goes here. Kerboom...
1438 	   Fortunately, tcp_rcv_established and rcv_established
1439 	   handle them correctly, but it is not case with
1440 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1441 	 */
1442 
1443 	if (skb->protocol == htons(ETH_P_IP))
1444 		return tcp_v4_do_rcv(sk, skb);
1445 
1446 	/*
1447 	 *	socket locking is here for SMP purposes as backlog rcv
1448 	 *	is currently called with bh processing disabled.
1449 	 */
1450 
1451 	/* Do Stevens' IPV6_PKTOPTIONS.
1452 
1453 	   Yes, guys, it is the only place in our code, where we
1454 	   may make it not affecting IPv4.
1455 	   The rest of code is protocol independent,
1456 	   and I do not like idea to uglify IPv4.
1457 
1458 	   Actually, all the idea behind IPV6_PKTOPTIONS
1459 	   looks not very well thought. For now we latch
1460 	   options, received in the last packet, enqueued
1461 	   by tcp. Feel free to propose better solution.
1462 					       --ANK (980728)
1463 	 */
1464 	if (np->rxopt.all)
1465 		opt_skb = skb_clone_and_charge_r(skb, sk);
1466 
1467 	reason = SKB_DROP_REASON_NOT_SPECIFIED;
1468 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1469 		struct dst_entry *dst;
1470 
1471 		dst = rcu_dereference_protected(sk->sk_rx_dst,
1472 						lockdep_sock_is_held(sk));
1473 
1474 		sock_rps_save_rxhash(sk, skb);
1475 		sk_mark_napi_id(sk, skb);
1476 		if (dst) {
1477 			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1478 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1479 					    dst, sk->sk_rx_dst_cookie) == NULL) {
1480 				RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1481 				dst_release(dst);
1482 			}
1483 		}
1484 
1485 		tcp_rcv_established(sk, skb);
1486 		if (opt_skb)
1487 			goto ipv6_pktoptions;
1488 		return 0;
1489 	}
1490 
1491 	if (tcp_checksum_complete(skb))
1492 		goto csum_err;
1493 
1494 	if (sk->sk_state == TCP_LISTEN) {
1495 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1496 
1497 		if (!nsk)
1498 			goto discard;
1499 
1500 		if (nsk != sk) {
1501 			if (tcp_child_process(sk, nsk, skb))
1502 				goto reset;
1503 			if (opt_skb)
1504 				__kfree_skb(opt_skb);
1505 			return 0;
1506 		}
1507 	} else
1508 		sock_rps_save_rxhash(sk, skb);
1509 
1510 	if (tcp_rcv_state_process(sk, skb))
1511 		goto reset;
1512 	if (opt_skb)
1513 		goto ipv6_pktoptions;
1514 	return 0;
1515 
1516 reset:
1517 	tcp_v6_send_reset(sk, skb);
1518 discard:
1519 	if (opt_skb)
1520 		__kfree_skb(opt_skb);
1521 	kfree_skb_reason(skb, reason);
1522 	return 0;
1523 csum_err:
1524 	reason = SKB_DROP_REASON_TCP_CSUM;
1525 	trace_tcp_bad_csum(skb);
1526 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1527 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1528 	goto discard;
1529 
1530 
1531 ipv6_pktoptions:
1532 	/* Do you ask, what is it?
1533 
1534 	   1. skb was enqueued by tcp.
1535 	   2. skb is added to tail of read queue, rather than out of order.
1536 	   3. socket is not in passive state.
1537 	   4. Finally, it really contains options, which user wants to receive.
1538 	 */
1539 	tp = tcp_sk(sk);
1540 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1541 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1542 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1543 			np->mcast_oif = tcp_v6_iif(opt_skb);
1544 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1545 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1546 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1547 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1548 		if (np->repflow)
1549 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1550 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1551 			tcp_v6_restore_cb(opt_skb);
1552 			opt_skb = xchg(&np->pktoptions, opt_skb);
1553 		} else {
1554 			__kfree_skb(opt_skb);
1555 			opt_skb = xchg(&np->pktoptions, NULL);
1556 		}
1557 	}
1558 
1559 	consume_skb(opt_skb);
1560 	return 0;
1561 }
1562 
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1563 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1564 			   const struct tcphdr *th)
1565 {
1566 	/* This is tricky: we move IP6CB at its correct location into
1567 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1568 	 * _decode_session6() uses IP6CB().
1569 	 * barrier() makes sure compiler won't play aliasing games.
1570 	 */
1571 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1572 		sizeof(struct inet6_skb_parm));
1573 	barrier();
1574 
1575 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1576 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1577 				    skb->len - th->doff*4);
1578 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1579 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1580 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1581 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1582 	TCP_SKB_CB(skb)->sacked = 0;
1583 	TCP_SKB_CB(skb)->has_rxtstamp =
1584 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1585 }
1586 
tcp_v6_rcv(struct sk_buff * skb)1587 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1588 {
1589 	enum skb_drop_reason drop_reason;
1590 	int sdif = inet6_sdif(skb);
1591 	int dif = inet6_iif(skb);
1592 	const struct tcphdr *th;
1593 	const struct ipv6hdr *hdr;
1594 	bool refcounted;
1595 	struct sock *sk;
1596 	int ret;
1597 	struct net *net = dev_net(skb->dev);
1598 
1599 	drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1600 	if (skb->pkt_type != PACKET_HOST)
1601 		goto discard_it;
1602 
1603 	/*
1604 	 *	Count it even if it's bad.
1605 	 */
1606 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1607 
1608 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1609 		goto discard_it;
1610 
1611 	th = (const struct tcphdr *)skb->data;
1612 
1613 	if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1614 		drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1615 		goto bad_packet;
1616 	}
1617 	if (!pskb_may_pull(skb, th->doff*4))
1618 		goto discard_it;
1619 
1620 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1621 		goto csum_error;
1622 
1623 	th = (const struct tcphdr *)skb->data;
1624 	hdr = ipv6_hdr(skb);
1625 
1626 lookup:
1627 	sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
1628 				th->source, th->dest, inet6_iif(skb), sdif,
1629 				&refcounted);
1630 	if (!sk)
1631 		goto no_tcp_socket;
1632 
1633 process:
1634 	if (sk->sk_state == TCP_TIME_WAIT)
1635 		goto do_time_wait;
1636 
1637 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1638 		struct request_sock *req = inet_reqsk(sk);
1639 		bool req_stolen = false;
1640 		struct sock *nsk;
1641 
1642 		sk = req->rsk_listener;
1643 		if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1644 			drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1645 		else
1646 			drop_reason = tcp_inbound_md5_hash(sk, skb,
1647 							   &hdr->saddr, &hdr->daddr,
1648 							   AF_INET6, dif, sdif);
1649 		if (drop_reason) {
1650 			sk_drops_add(sk, skb);
1651 			reqsk_put(req);
1652 			goto discard_it;
1653 		}
1654 		if (tcp_checksum_complete(skb)) {
1655 			reqsk_put(req);
1656 			goto csum_error;
1657 		}
1658 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1659 			nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1660 			if (!nsk) {
1661 				inet_csk_reqsk_queue_drop_and_put(sk, req);
1662 				goto lookup;
1663 			}
1664 			sk = nsk;
1665 			/* reuseport_migrate_sock() has already held one sk_refcnt
1666 			 * before returning.
1667 			 */
1668 		} else {
1669 			sock_hold(sk);
1670 		}
1671 		refcounted = true;
1672 		nsk = NULL;
1673 		if (!tcp_filter(sk, skb)) {
1674 			th = (const struct tcphdr *)skb->data;
1675 			hdr = ipv6_hdr(skb);
1676 			tcp_v6_fill_cb(skb, hdr, th);
1677 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1678 		} else {
1679 			drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1680 		}
1681 		if (!nsk) {
1682 			reqsk_put(req);
1683 			if (req_stolen) {
1684 				/* Another cpu got exclusive access to req
1685 				 * and created a full blown socket.
1686 				 * Try to feed this packet to this socket
1687 				 * instead of discarding it.
1688 				 */
1689 				tcp_v6_restore_cb(skb);
1690 				sock_put(sk);
1691 				goto lookup;
1692 			}
1693 			goto discard_and_relse;
1694 		}
1695 		nf_reset_ct(skb);
1696 		if (nsk == sk) {
1697 			reqsk_put(req);
1698 			tcp_v6_restore_cb(skb);
1699 		} else if (tcp_child_process(sk, nsk, skb)) {
1700 			tcp_v6_send_reset(nsk, skb);
1701 			goto discard_and_relse;
1702 		} else {
1703 			sock_put(sk);
1704 			return 0;
1705 		}
1706 	}
1707 
1708 	if (static_branch_unlikely(&ip6_min_hopcount)) {
1709 		/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1710 		if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
1711 			__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1712 			drop_reason = SKB_DROP_REASON_TCP_MINTTL;
1713 			goto discard_and_relse;
1714 		}
1715 	}
1716 
1717 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1718 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1719 		goto discard_and_relse;
1720 	}
1721 
1722 	drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1723 					   AF_INET6, dif, sdif);
1724 	if (drop_reason)
1725 		goto discard_and_relse;
1726 
1727 	nf_reset_ct(skb);
1728 
1729 	if (tcp_filter(sk, skb)) {
1730 		drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1731 		goto discard_and_relse;
1732 	}
1733 	th = (const struct tcphdr *)skb->data;
1734 	hdr = ipv6_hdr(skb);
1735 	tcp_v6_fill_cb(skb, hdr, th);
1736 
1737 	skb->dev = NULL;
1738 
1739 	if (sk->sk_state == TCP_LISTEN) {
1740 		ret = tcp_v6_do_rcv(sk, skb);
1741 		goto put_and_return;
1742 	}
1743 
1744 	sk_incoming_cpu_update(sk);
1745 
1746 	bh_lock_sock_nested(sk);
1747 	tcp_segs_in(tcp_sk(sk), skb);
1748 	ret = 0;
1749 	if (!sock_owned_by_user(sk)) {
1750 		ret = tcp_v6_do_rcv(sk, skb);
1751 	} else {
1752 		if (tcp_add_backlog(sk, skb, &drop_reason))
1753 			goto discard_and_relse;
1754 	}
1755 	bh_unlock_sock(sk);
1756 put_and_return:
1757 	if (refcounted)
1758 		sock_put(sk);
1759 	return ret ? -1 : 0;
1760 
1761 no_tcp_socket:
1762 	drop_reason = SKB_DROP_REASON_NO_SOCKET;
1763 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1764 		goto discard_it;
1765 
1766 	tcp_v6_fill_cb(skb, hdr, th);
1767 
1768 	if (tcp_checksum_complete(skb)) {
1769 csum_error:
1770 		drop_reason = SKB_DROP_REASON_TCP_CSUM;
1771 		trace_tcp_bad_csum(skb);
1772 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1773 bad_packet:
1774 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1775 	} else {
1776 		tcp_v6_send_reset(NULL, skb);
1777 	}
1778 
1779 discard_it:
1780 	SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1781 	kfree_skb_reason(skb, drop_reason);
1782 	return 0;
1783 
1784 discard_and_relse:
1785 	sk_drops_add(sk, skb);
1786 	if (refcounted)
1787 		sock_put(sk);
1788 	goto discard_it;
1789 
1790 do_time_wait:
1791 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1792 		drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1793 		inet_twsk_put(inet_twsk(sk));
1794 		goto discard_it;
1795 	}
1796 
1797 	tcp_v6_fill_cb(skb, hdr, th);
1798 
1799 	if (tcp_checksum_complete(skb)) {
1800 		inet_twsk_put(inet_twsk(sk));
1801 		goto csum_error;
1802 	}
1803 
1804 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1805 	case TCP_TW_SYN:
1806 	{
1807 		struct sock *sk2;
1808 
1809 		sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
1810 					    skb, __tcp_hdrlen(th),
1811 					    &ipv6_hdr(skb)->saddr, th->source,
1812 					    &ipv6_hdr(skb)->daddr,
1813 					    ntohs(th->dest),
1814 					    tcp_v6_iif_l3_slave(skb),
1815 					    sdif);
1816 		if (sk2) {
1817 			struct inet_timewait_sock *tw = inet_twsk(sk);
1818 			inet_twsk_deschedule_put(tw);
1819 			sk = sk2;
1820 			tcp_v6_restore_cb(skb);
1821 			refcounted = false;
1822 			goto process;
1823 		}
1824 	}
1825 		/* to ACK */
1826 		fallthrough;
1827 	case TCP_TW_ACK:
1828 		tcp_v6_timewait_ack(sk, skb);
1829 		break;
1830 	case TCP_TW_RST:
1831 		tcp_v6_send_reset(sk, skb);
1832 		inet_twsk_deschedule_put(inet_twsk(sk));
1833 		goto discard_it;
1834 	case TCP_TW_SUCCESS:
1835 		;
1836 	}
1837 	goto discard_it;
1838 }
1839 
tcp_v6_early_demux(struct sk_buff * skb)1840 void tcp_v6_early_demux(struct sk_buff *skb)
1841 {
1842 	struct net *net = dev_net(skb->dev);
1843 	const struct ipv6hdr *hdr;
1844 	const struct tcphdr *th;
1845 	struct sock *sk;
1846 
1847 	if (skb->pkt_type != PACKET_HOST)
1848 		return;
1849 
1850 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1851 		return;
1852 
1853 	hdr = ipv6_hdr(skb);
1854 	th = tcp_hdr(skb);
1855 
1856 	if (th->doff < sizeof(struct tcphdr) / 4)
1857 		return;
1858 
1859 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1860 	sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
1861 					&hdr->saddr, th->source,
1862 					&hdr->daddr, ntohs(th->dest),
1863 					inet6_iif(skb), inet6_sdif(skb));
1864 	if (sk) {
1865 		skb->sk = sk;
1866 		skb->destructor = sock_edemux;
1867 		if (sk_fullsock(sk)) {
1868 			struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1869 
1870 			if (dst)
1871 				dst = dst_check(dst, sk->sk_rx_dst_cookie);
1872 			if (dst &&
1873 			    sk->sk_rx_dst_ifindex == skb->skb_iif)
1874 				skb_dst_set_noref(skb, dst);
1875 		}
1876 	}
1877 }
1878 
1879 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1880 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1881 	.twsk_unique	= tcp_twsk_unique,
1882 	.twsk_destructor = tcp_twsk_destructor,
1883 };
1884 
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1885 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1886 {
1887 	__tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1888 }
1889 
1890 const struct inet_connection_sock_af_ops ipv6_specific = {
1891 	.queue_xmit	   = inet6_csk_xmit,
1892 	.send_check	   = tcp_v6_send_check,
1893 	.rebuild_header	   = inet6_sk_rebuild_header,
1894 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1895 	.conn_request	   = tcp_v6_conn_request,
1896 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1897 	.net_header_len	   = sizeof(struct ipv6hdr),
1898 	.net_frag_header_len = sizeof(struct frag_hdr),
1899 	.setsockopt	   = ipv6_setsockopt,
1900 	.getsockopt	   = ipv6_getsockopt,
1901 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1902 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1903 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1904 };
1905 
1906 #ifdef CONFIG_TCP_MD5SIG
1907 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1908 	.md5_lookup	=	tcp_v6_md5_lookup,
1909 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1910 	.md5_parse	=	tcp_v6_parse_md5_keys,
1911 };
1912 #endif
1913 
1914 /*
1915  *	TCP over IPv4 via INET6 API
1916  */
1917 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1918 	.queue_xmit	   = ip_queue_xmit,
1919 	.send_check	   = tcp_v4_send_check,
1920 	.rebuild_header	   = inet_sk_rebuild_header,
1921 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1922 	.conn_request	   = tcp_v6_conn_request,
1923 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1924 	.net_header_len	   = sizeof(struct iphdr),
1925 	.setsockopt	   = ipv6_setsockopt,
1926 	.getsockopt	   = ipv6_getsockopt,
1927 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1928 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1929 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1930 };
1931 
1932 #ifdef CONFIG_TCP_MD5SIG
1933 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1934 	.md5_lookup	=	tcp_v4_md5_lookup,
1935 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1936 	.md5_parse	=	tcp_v6_parse_md5_keys,
1937 };
1938 #endif
1939 
1940 /* NOTE: A lot of things set to zero explicitly by call to
1941  *       sk_alloc() so need not be done here.
1942  */
tcp_v6_init_sock(struct sock * sk)1943 static int tcp_v6_init_sock(struct sock *sk)
1944 {
1945 	struct inet_connection_sock *icsk = inet_csk(sk);
1946 
1947 	tcp_init_sock(sk);
1948 
1949 	icsk->icsk_af_ops = &ipv6_specific;
1950 
1951 #ifdef CONFIG_TCP_MD5SIG
1952 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1953 #endif
1954 
1955 	return 0;
1956 }
1957 
1958 #ifdef CONFIG_PROC_FS
1959 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1960 static void get_openreq6(struct seq_file *seq,
1961 			 const struct request_sock *req, int i)
1962 {
1963 	long ttd = req->rsk_timer.expires - jiffies;
1964 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1965 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1966 
1967 	if (ttd < 0)
1968 		ttd = 0;
1969 
1970 	seq_printf(seq,
1971 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1972 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1973 		   i,
1974 		   src->s6_addr32[0], src->s6_addr32[1],
1975 		   src->s6_addr32[2], src->s6_addr32[3],
1976 		   inet_rsk(req)->ir_num,
1977 		   dest->s6_addr32[0], dest->s6_addr32[1],
1978 		   dest->s6_addr32[2], dest->s6_addr32[3],
1979 		   ntohs(inet_rsk(req)->ir_rmt_port),
1980 		   TCP_SYN_RECV,
1981 		   0, 0, /* could print option size, but that is af dependent. */
1982 		   1,   /* timers active (only the expire timer) */
1983 		   jiffies_to_clock_t(ttd),
1984 		   req->num_timeout,
1985 		   from_kuid_munged(seq_user_ns(seq),
1986 				    sock_i_uid(req->rsk_listener)),
1987 		   0,  /* non standard timer */
1988 		   0, /* open_requests have no inode */
1989 		   0, req);
1990 }
1991 
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1992 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1993 {
1994 	const struct in6_addr *dest, *src;
1995 	__u16 destp, srcp;
1996 	int timer_active;
1997 	unsigned long timer_expires;
1998 	const struct inet_sock *inet = inet_sk(sp);
1999 	const struct tcp_sock *tp = tcp_sk(sp);
2000 	const struct inet_connection_sock *icsk = inet_csk(sp);
2001 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2002 	int rx_queue;
2003 	int state;
2004 
2005 	dest  = &sp->sk_v6_daddr;
2006 	src   = &sp->sk_v6_rcv_saddr;
2007 	destp = ntohs(inet->inet_dport);
2008 	srcp  = ntohs(inet->inet_sport);
2009 
2010 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2011 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2012 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2013 		timer_active	= 1;
2014 		timer_expires	= icsk->icsk_timeout;
2015 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2016 		timer_active	= 4;
2017 		timer_expires	= icsk->icsk_timeout;
2018 	} else if (timer_pending(&sp->sk_timer)) {
2019 		timer_active	= 2;
2020 		timer_expires	= sp->sk_timer.expires;
2021 	} else {
2022 		timer_active	= 0;
2023 		timer_expires = jiffies;
2024 	}
2025 
2026 	state = inet_sk_state_load(sp);
2027 	if (state == TCP_LISTEN)
2028 		rx_queue = READ_ONCE(sp->sk_ack_backlog);
2029 	else
2030 		/* Because we don't lock the socket,
2031 		 * we might find a transient negative value.
2032 		 */
2033 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2034 				      READ_ONCE(tp->copied_seq), 0);
2035 
2036 	seq_printf(seq,
2037 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2038 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2039 		   i,
2040 		   src->s6_addr32[0], src->s6_addr32[1],
2041 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2042 		   dest->s6_addr32[0], dest->s6_addr32[1],
2043 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2044 		   state,
2045 		   READ_ONCE(tp->write_seq) - tp->snd_una,
2046 		   rx_queue,
2047 		   timer_active,
2048 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
2049 		   icsk->icsk_retransmits,
2050 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2051 		   icsk->icsk_probes_out,
2052 		   sock_i_ino(sp),
2053 		   refcount_read(&sp->sk_refcnt), sp,
2054 		   jiffies_to_clock_t(icsk->icsk_rto),
2055 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
2056 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2057 		   tcp_snd_cwnd(tp),
2058 		   state == TCP_LISTEN ?
2059 			fastopenq->max_qlen :
2060 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2061 		   );
2062 }
2063 
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2064 static void get_timewait6_sock(struct seq_file *seq,
2065 			       struct inet_timewait_sock *tw, int i)
2066 {
2067 	long delta = tw->tw_timer.expires - jiffies;
2068 	const struct in6_addr *dest, *src;
2069 	__u16 destp, srcp;
2070 
2071 	dest = &tw->tw_v6_daddr;
2072 	src  = &tw->tw_v6_rcv_saddr;
2073 	destp = ntohs(tw->tw_dport);
2074 	srcp  = ntohs(tw->tw_sport);
2075 
2076 	seq_printf(seq,
2077 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2078 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2079 		   i,
2080 		   src->s6_addr32[0], src->s6_addr32[1],
2081 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
2082 		   dest->s6_addr32[0], dest->s6_addr32[1],
2083 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
2084 		   tw->tw_substate, 0, 0,
2085 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2086 		   refcount_read(&tw->tw_refcnt), tw);
2087 }
2088 
tcp6_seq_show(struct seq_file * seq,void * v)2089 static int tcp6_seq_show(struct seq_file *seq, void *v)
2090 {
2091 	struct tcp_iter_state *st;
2092 	struct sock *sk = v;
2093 
2094 	if (v == SEQ_START_TOKEN) {
2095 		seq_puts(seq,
2096 			 "  sl  "
2097 			 "local_address                         "
2098 			 "remote_address                        "
2099 			 "st tx_queue rx_queue tr tm->when retrnsmt"
2100 			 "   uid  timeout inode\n");
2101 		goto out;
2102 	}
2103 	st = seq->private;
2104 
2105 	if (sk->sk_state == TCP_TIME_WAIT)
2106 		get_timewait6_sock(seq, v, st->num);
2107 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
2108 		get_openreq6(seq, v, st->num);
2109 	else
2110 		get_tcp6_sock(seq, v, st->num);
2111 out:
2112 	return 0;
2113 }
2114 
2115 static const struct seq_operations tcp6_seq_ops = {
2116 	.show		= tcp6_seq_show,
2117 	.start		= tcp_seq_start,
2118 	.next		= tcp_seq_next,
2119 	.stop		= tcp_seq_stop,
2120 };
2121 
2122 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2123 	.family		= AF_INET6,
2124 };
2125 
tcp6_proc_init(struct net * net)2126 int __net_init tcp6_proc_init(struct net *net)
2127 {
2128 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2129 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2130 		return -ENOMEM;
2131 	return 0;
2132 }
2133 
tcp6_proc_exit(struct net * net)2134 void tcp6_proc_exit(struct net *net)
2135 {
2136 	remove_proc_entry("tcp6", net->proc_net);
2137 }
2138 #endif
2139 
2140 struct proto tcpv6_prot = {
2141 	.name			= "TCPv6",
2142 	.owner			= THIS_MODULE,
2143 	.close			= tcp_close,
2144 	.pre_connect		= tcp_v6_pre_connect,
2145 	.connect		= tcp_v6_connect,
2146 	.disconnect		= tcp_disconnect,
2147 	.accept			= inet_csk_accept,
2148 	.ioctl			= tcp_ioctl,
2149 	.init			= tcp_v6_init_sock,
2150 	.destroy		= tcp_v4_destroy_sock,
2151 	.shutdown		= tcp_shutdown,
2152 	.setsockopt		= tcp_setsockopt,
2153 	.getsockopt		= tcp_getsockopt,
2154 	.bpf_bypass_getsockopt	= tcp_bpf_bypass_getsockopt,
2155 	.keepalive		= tcp_set_keepalive,
2156 	.recvmsg		= tcp_recvmsg,
2157 	.sendmsg		= tcp_sendmsg,
2158 	.splice_eof		= tcp_splice_eof,
2159 	.backlog_rcv		= tcp_v6_do_rcv,
2160 	.release_cb		= tcp_release_cb,
2161 	.hash			= inet6_hash,
2162 	.unhash			= inet_unhash,
2163 	.get_port		= inet_csk_get_port,
2164 	.put_port		= inet_put_port,
2165 #ifdef CONFIG_BPF_SYSCALL
2166 	.psock_update_sk_prot	= tcp_bpf_update_proto,
2167 #endif
2168 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2169 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2170 	.stream_memory_free	= tcp_stream_memory_free,
2171 	.sockets_allocated	= &tcp_sockets_allocated,
2172 
2173 	.memory_allocated	= &tcp_memory_allocated,
2174 	.per_cpu_fw_alloc	= &tcp_memory_per_cpu_fw_alloc,
2175 
2176 	.memory_pressure	= &tcp_memory_pressure,
2177 	.orphan_count		= &tcp_orphan_count,
2178 	.sysctl_mem		= sysctl_tcp_mem,
2179 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2180 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2181 	.max_header		= MAX_TCP_HEADER,
2182 	.obj_size		= sizeof(struct tcp6_sock),
2183 	.ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6),
2184 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2185 	.twsk_prot		= &tcp6_timewait_sock_ops,
2186 	.rsk_prot		= &tcp6_request_sock_ops,
2187 	.h.hashinfo		= NULL,
2188 	.no_autobind		= true,
2189 	.diag_destroy		= tcp_abort,
2190 };
2191 EXPORT_SYMBOL_GPL(tcpv6_prot);
2192 
2193 static const struct inet6_protocol tcpv6_protocol = {
2194 	.handler	=	tcp_v6_rcv,
2195 	.err_handler	=	tcp_v6_err,
2196 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2197 };
2198 
2199 static struct inet_protosw tcpv6_protosw = {
2200 	.type		=	SOCK_STREAM,
2201 	.protocol	=	IPPROTO_TCP,
2202 	.prot		=	&tcpv6_prot,
2203 	.ops		=	&inet6_stream_ops,
2204 	.flags		=	INET_PROTOSW_PERMANENT |
2205 				INET_PROTOSW_ICSK,
2206 };
2207 
tcpv6_net_init(struct net * net)2208 static int __net_init tcpv6_net_init(struct net *net)
2209 {
2210 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2211 				    SOCK_RAW, IPPROTO_TCP, net);
2212 }
2213 
tcpv6_net_exit(struct net * net)2214 static void __net_exit tcpv6_net_exit(struct net *net)
2215 {
2216 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2217 }
2218 
tcpv6_net_exit_batch(struct list_head * net_exit_list)2219 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2220 {
2221 	tcp_twsk_purge(net_exit_list, AF_INET6);
2222 }
2223 
2224 static struct pernet_operations tcpv6_net_ops = {
2225 	.init	    = tcpv6_net_init,
2226 	.exit	    = tcpv6_net_exit,
2227 	.exit_batch = tcpv6_net_exit_batch,
2228 };
2229 
tcpv6_init(void)2230 int __init tcpv6_init(void)
2231 {
2232 	int ret;
2233 
2234 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2235 	if (ret)
2236 		goto out;
2237 
2238 	/* register inet6 protocol */
2239 	ret = inet6_register_protosw(&tcpv6_protosw);
2240 	if (ret)
2241 		goto out_tcpv6_protocol;
2242 
2243 	ret = register_pernet_subsys(&tcpv6_net_ops);
2244 	if (ret)
2245 		goto out_tcpv6_protosw;
2246 
2247 	ret = mptcpv6_init();
2248 	if (ret)
2249 		goto out_tcpv6_pernet_subsys;
2250 
2251 out:
2252 	return ret;
2253 
2254 out_tcpv6_pernet_subsys:
2255 	unregister_pernet_subsys(&tcpv6_net_ops);
2256 out_tcpv6_protosw:
2257 	inet6_unregister_protosw(&tcpv6_protosw);
2258 out_tcpv6_protocol:
2259 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2260 	goto out;
2261 }
2262 
tcpv6_exit(void)2263 void tcpv6_exit(void)
2264 {
2265 	unregister_pernet_subsys(&tcpv6_net_ops);
2266 	inet6_unregister_protosw(&tcpv6_protosw);
2267 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2268 }
2269