1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TCP over IPv6
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
13 *
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
74
75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
85 int l3index)
86 {
87 return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
95 */
tcp_inet6_sk(const struct sock * sk)96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 struct dst_entry *dst = skb_dst(skb);
106
107 if (dst && dst_hold_safe(dst)) {
108 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110 rcu_assign_pointer(sk->sk_rx_dst, dst);
111 sk->sk_rx_dst_ifindex = skb->skb_iif;
112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
113 }
114 }
115
tcp_v6_init_seq(const struct sk_buff * skb)116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 ipv6_hdr(skb)->saddr.s6_addr32,
120 tcp_hdr(skb)->dest,
121 tcp_hdr(skb)->source);
122 }
123
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 int addr_len)
132 {
133 /* This check is replicated from tcp_v6_connect() and intended to
134 * prevent BPF program called below from accessing bytes that are out
135 * of the bound specified by user in addr_len.
136 */
137 if (addr_len < SIN6_LEN_RFC2133)
138 return -EINVAL;
139
140 sock_owned_by_me(sk);
141
142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 int addr_len)
147 {
148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 struct inet_sock *inet = inet_sk(sk);
150 struct inet_connection_sock *icsk = inet_csk(sk);
151 struct inet_timewait_death_row *tcp_death_row;
152 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
153 struct tcp_sock *tp = tcp_sk(sk);
154 struct in6_addr *saddr = NULL, *final_p, final;
155 struct ipv6_txoptions *opt;
156 struct flowi6 fl6;
157 struct dst_entry *dst;
158 int addr_type;
159 int err;
160
161 if (addr_len < SIN6_LEN_RFC2133)
162 return -EINVAL;
163
164 if (usin->sin6_family != AF_INET6)
165 return -EAFNOSUPPORT;
166
167 memset(&fl6, 0, sizeof(fl6));
168
169 if (np->sndflow) {
170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 IP6_ECN_flow_init(fl6.flowlabel);
172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 struct ip6_flowlabel *flowlabel;
174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 if (IS_ERR(flowlabel))
176 return -EINVAL;
177 fl6_sock_release(flowlabel);
178 }
179 }
180
181 /*
182 * connect() to INADDR_ANY means loopback (BSD'ism).
183 */
184
185 if (ipv6_addr_any(&usin->sin6_addr)) {
186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 &usin->sin6_addr);
189 else
190 usin->sin6_addr = in6addr_loopback;
191 }
192
193 addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195 if (addr_type & IPV6_ADDR_MULTICAST)
196 return -ENETUNREACH;
197
198 if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 if (addr_len >= sizeof(struct sockaddr_in6) &&
200 usin->sin6_scope_id) {
201 /* If interface is set while binding, indices
202 * must coincide.
203 */
204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 return -EINVAL;
206
207 sk->sk_bound_dev_if = usin->sin6_scope_id;
208 }
209
210 /* Connect to link-local address requires an interface */
211 if (!sk->sk_bound_dev_if)
212 return -EINVAL;
213 }
214
215 if (tp->rx_opt.ts_recent_stamp &&
216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 tp->rx_opt.ts_recent = 0;
218 tp->rx_opt.ts_recent_stamp = 0;
219 WRITE_ONCE(tp->write_seq, 0);
220 }
221
222 sk->sk_v6_daddr = usin->sin6_addr;
223 np->flow_label = fl6.flowlabel;
224
225 /*
226 * TCP over IPv4
227 */
228
229 if (addr_type & IPV6_ADDR_MAPPED) {
230 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 struct sockaddr_in sin;
232
233 if (ipv6_only_sock(sk))
234 return -ENETUNREACH;
235
236 sin.sin_family = AF_INET;
237 sin.sin_port = usin->sin6_port;
238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240 icsk->icsk_af_ops = &ipv6_mapped;
241 if (sk_is_mptcp(sk))
242 mptcpv6_handle_mapped(sk, true);
243 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250 if (err) {
251 icsk->icsk_ext_hdr_len = exthdrlen;
252 icsk->icsk_af_ops = &ipv6_specific;
253 if (sk_is_mptcp(sk))
254 mptcpv6_handle_mapped(sk, false);
255 sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259 goto failure;
260 }
261 np->saddr = sk->sk_v6_rcv_saddr;
262
263 return err;
264 }
265
266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 saddr = &sk->sk_v6_rcv_saddr;
268
269 fl6.flowi6_proto = IPPROTO_TCP;
270 fl6.daddr = sk->sk_v6_daddr;
271 fl6.saddr = saddr ? *saddr : np->saddr;
272 fl6.flowi6_oif = sk->sk_bound_dev_if;
273 fl6.flowi6_mark = sk->sk_mark;
274 fl6.fl6_dport = usin->sin6_port;
275 fl6.fl6_sport = inet->inet_sport;
276 fl6.flowi6_uid = sk->sk_uid;
277
278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279 final_p = fl6_update_dst(&fl6, opt, &final);
280
281 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
282
283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284 if (IS_ERR(dst)) {
285 err = PTR_ERR(dst);
286 goto failure;
287 }
288
289 if (!saddr) {
290 saddr = &fl6.saddr;
291 sk->sk_v6_rcv_saddr = *saddr;
292 }
293
294 /* set the source address */
295 np->saddr = *saddr;
296 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297
298 sk->sk_gso_type = SKB_GSO_TCPV6;
299 ip6_dst_store(sk, dst, NULL, NULL);
300
301 icsk->icsk_ext_hdr_len = 0;
302 if (opt)
303 icsk->icsk_ext_hdr_len = opt->opt_flen +
304 opt->opt_nflen;
305
306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307
308 inet->inet_dport = usin->sin6_port;
309
310 tcp_set_state(sk, TCP_SYN_SENT);
311 tcp_death_row = sock_net(sk)->ipv4.tcp_death_row;
312 err = inet6_hash_connect(tcp_death_row, sk);
313 if (err)
314 goto late_failure;
315
316 sk_set_txhash(sk);
317
318 if (likely(!tp->repair)) {
319 if (!tp->write_seq)
320 WRITE_ONCE(tp->write_seq,
321 secure_tcpv6_seq(np->saddr.s6_addr32,
322 sk->sk_v6_daddr.s6_addr32,
323 inet->inet_sport,
324 inet->inet_dport));
325 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
326 np->saddr.s6_addr32,
327 sk->sk_v6_daddr.s6_addr32);
328 }
329
330 if (tcp_fastopen_defer_connect(sk, &err))
331 return err;
332 if (err)
333 goto late_failure;
334
335 err = tcp_connect(sk);
336 if (err)
337 goto late_failure;
338
339 return 0;
340
341 late_failure:
342 tcp_set_state(sk, TCP_CLOSE);
343 failure:
344 inet->inet_dport = 0;
345 sk->sk_route_caps = 0;
346 return err;
347 }
348
tcp_v6_mtu_reduced(struct sock * sk)349 static void tcp_v6_mtu_reduced(struct sock *sk)
350 {
351 struct dst_entry *dst;
352 u32 mtu;
353
354 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
355 return;
356
357 mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
358
359 /* Drop requests trying to increase our current mss.
360 * Check done in __ip6_rt_update_pmtu() is too late.
361 */
362 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
363 return;
364
365 dst = inet6_csk_update_pmtu(sk, mtu);
366 if (!dst)
367 return;
368
369 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
370 tcp_sync_mss(sk, dst_mtu(dst));
371 tcp_simple_retransmit(sk);
372 }
373 }
374
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)375 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
376 u8 type, u8 code, int offset, __be32 info)
377 {
378 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
379 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
380 struct net *net = dev_net(skb->dev);
381 struct request_sock *fastopen;
382 struct ipv6_pinfo *np;
383 struct tcp_sock *tp;
384 __u32 seq, snd_una;
385 struct sock *sk;
386 bool fatal;
387 int err;
388
389 sk = __inet6_lookup_established(net, &tcp_hashinfo,
390 &hdr->daddr, th->dest,
391 &hdr->saddr, ntohs(th->source),
392 skb->dev->ifindex, inet6_sdif(skb));
393
394 if (!sk) {
395 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
396 ICMP6_MIB_INERRORS);
397 return -ENOENT;
398 }
399
400 if (sk->sk_state == TCP_TIME_WAIT) {
401 inet_twsk_put(inet_twsk(sk));
402 return 0;
403 }
404 seq = ntohl(th->seq);
405 fatal = icmpv6_err_convert(type, code, &err);
406 if (sk->sk_state == TCP_NEW_SYN_RECV) {
407 tcp_req_err(sk, seq, fatal);
408 return 0;
409 }
410
411 bh_lock_sock(sk);
412 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
413 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
414
415 if (sk->sk_state == TCP_CLOSE)
416 goto out;
417
418 if (static_branch_unlikely(&ip6_min_hopcount)) {
419 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
420 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
421 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
422 goto out;
423 }
424 }
425
426 tp = tcp_sk(sk);
427 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
428 fastopen = rcu_dereference(tp->fastopen_rsk);
429 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
430 if (sk->sk_state != TCP_LISTEN &&
431 !between(seq, snd_una, tp->snd_nxt)) {
432 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
433 goto out;
434 }
435
436 np = tcp_inet6_sk(sk);
437
438 if (type == NDISC_REDIRECT) {
439 if (!sock_owned_by_user(sk)) {
440 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
441
442 if (dst)
443 dst->ops->redirect(dst, sk, skb);
444 }
445 goto out;
446 }
447
448 if (type == ICMPV6_PKT_TOOBIG) {
449 u32 mtu = ntohl(info);
450
451 /* We are not interested in TCP_LISTEN and open_requests
452 * (SYN-ACKs send out by Linux are always <576bytes so
453 * they should go through unfragmented).
454 */
455 if (sk->sk_state == TCP_LISTEN)
456 goto out;
457
458 if (!ip6_sk_accept_pmtu(sk))
459 goto out;
460
461 if (mtu < IPV6_MIN_MTU)
462 goto out;
463
464 WRITE_ONCE(tp->mtu_info, mtu);
465
466 if (!sock_owned_by_user(sk))
467 tcp_v6_mtu_reduced(sk);
468 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
469 &sk->sk_tsq_flags))
470 sock_hold(sk);
471 goto out;
472 }
473
474
475 /* Might be for an request_sock */
476 switch (sk->sk_state) {
477 case TCP_SYN_SENT:
478 case TCP_SYN_RECV:
479 /* Only in fast or simultaneous open. If a fast open socket is
480 * already accepted it is treated as a connected one below.
481 */
482 if (fastopen && !fastopen->sk)
483 break;
484
485 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
486
487 if (!sock_owned_by_user(sk)) {
488 sk->sk_err = err;
489 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
490
491 tcp_done(sk);
492 } else
493 sk->sk_err_soft = err;
494 goto out;
495 case TCP_LISTEN:
496 break;
497 default:
498 /* check if this ICMP message allows revert of backoff.
499 * (see RFC 6069)
500 */
501 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
502 code == ICMPV6_NOROUTE)
503 tcp_ld_RTO_revert(sk, seq);
504 }
505
506 if (!sock_owned_by_user(sk) && np->recverr) {
507 sk->sk_err = err;
508 sk_error_report(sk);
509 } else
510 sk->sk_err_soft = err;
511
512 out:
513 bh_unlock_sock(sk);
514 sock_put(sk);
515 return 0;
516 }
517
518
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)519 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
520 struct flowi *fl,
521 struct request_sock *req,
522 struct tcp_fastopen_cookie *foc,
523 enum tcp_synack_type synack_type,
524 struct sk_buff *syn_skb)
525 {
526 struct inet_request_sock *ireq = inet_rsk(req);
527 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
528 struct ipv6_txoptions *opt;
529 struct flowi6 *fl6 = &fl->u.ip6;
530 struct sk_buff *skb;
531 int err = -ENOMEM;
532 u8 tclass;
533
534 /* First, grab a route. */
535 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
536 IPPROTO_TCP)) == NULL)
537 goto done;
538
539 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
540
541 if (skb) {
542 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
543 &ireq->ir_v6_rmt_addr);
544
545 fl6->daddr = ireq->ir_v6_rmt_addr;
546 if (np->repflow && ireq->pktopts)
547 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
548
549 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
550 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
551 (np->tclass & INET_ECN_MASK) :
552 np->tclass;
553
554 if (!INET_ECN_is_capable(tclass) &&
555 tcp_bpf_ca_needs_ecn((struct sock *)req))
556 tclass |= INET_ECN_ECT_0;
557
558 rcu_read_lock();
559 opt = ireq->ipv6_opt;
560 if (!opt)
561 opt = rcu_dereference(np->opt);
562 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
563 tclass, sk->sk_priority);
564 rcu_read_unlock();
565 err = net_xmit_eval(err);
566 }
567
568 done:
569 return err;
570 }
571
572
tcp_v6_reqsk_destructor(struct request_sock * req)573 static void tcp_v6_reqsk_destructor(struct request_sock *req)
574 {
575 kfree(inet_rsk(req)->ipv6_opt);
576 consume_skb(inet_rsk(req)->pktopts);
577 }
578
579 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)580 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
581 const struct in6_addr *addr,
582 int l3index)
583 {
584 return tcp_md5_do_lookup(sk, l3index,
585 (union tcp_md5_addr *)addr, AF_INET6);
586 }
587
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)588 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
589 const struct sock *addr_sk)
590 {
591 int l3index;
592
593 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
594 addr_sk->sk_bound_dev_if);
595 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
596 l3index);
597 }
598
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)599 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
600 sockptr_t optval, int optlen)
601 {
602 struct tcp_md5sig cmd;
603 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
604 int l3index = 0;
605 u8 prefixlen;
606 u8 flags;
607
608 if (optlen < sizeof(cmd))
609 return -EINVAL;
610
611 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
612 return -EFAULT;
613
614 if (sin6->sin6_family != AF_INET6)
615 return -EINVAL;
616
617 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
618
619 if (optname == TCP_MD5SIG_EXT &&
620 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
621 prefixlen = cmd.tcpm_prefixlen;
622 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
623 prefixlen > 32))
624 return -EINVAL;
625 } else {
626 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
627 }
628
629 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
630 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
631 struct net_device *dev;
632
633 rcu_read_lock();
634 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
635 if (dev && netif_is_l3_master(dev))
636 l3index = dev->ifindex;
637 rcu_read_unlock();
638
639 /* ok to reference set/not set outside of rcu;
640 * right now device MUST be an L3 master
641 */
642 if (!dev || !l3index)
643 return -EINVAL;
644 }
645
646 if (!cmd.tcpm_keylen) {
647 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
648 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
649 AF_INET, prefixlen,
650 l3index, flags);
651 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
652 AF_INET6, prefixlen, l3index, flags);
653 }
654
655 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
656 return -EINVAL;
657
658 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
659 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
660 AF_INET, prefixlen, l3index, flags,
661 cmd.tcpm_key, cmd.tcpm_keylen,
662 GFP_KERNEL);
663
664 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
665 AF_INET6, prefixlen, l3index, flags,
666 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
667 }
668
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)669 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
670 const struct in6_addr *daddr,
671 const struct in6_addr *saddr,
672 const struct tcphdr *th, int nbytes)
673 {
674 struct tcp6_pseudohdr *bp;
675 struct scatterlist sg;
676 struct tcphdr *_th;
677
678 bp = hp->scratch;
679 /* 1. TCP pseudo-header (RFC2460) */
680 bp->saddr = *saddr;
681 bp->daddr = *daddr;
682 bp->protocol = cpu_to_be32(IPPROTO_TCP);
683 bp->len = cpu_to_be32(nbytes);
684
685 _th = (struct tcphdr *)(bp + 1);
686 memcpy(_th, th, sizeof(*th));
687 _th->check = 0;
688
689 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
690 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
691 sizeof(*bp) + sizeof(*th));
692 return crypto_ahash_update(hp->md5_req);
693 }
694
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)695 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
696 const struct in6_addr *daddr, struct in6_addr *saddr,
697 const struct tcphdr *th)
698 {
699 struct tcp_md5sig_pool *hp;
700 struct ahash_request *req;
701
702 hp = tcp_get_md5sig_pool();
703 if (!hp)
704 goto clear_hash_noput;
705 req = hp->md5_req;
706
707 if (crypto_ahash_init(req))
708 goto clear_hash;
709 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
710 goto clear_hash;
711 if (tcp_md5_hash_key(hp, key))
712 goto clear_hash;
713 ahash_request_set_crypt(req, NULL, md5_hash, 0);
714 if (crypto_ahash_final(req))
715 goto clear_hash;
716
717 tcp_put_md5sig_pool();
718 return 0;
719
720 clear_hash:
721 tcp_put_md5sig_pool();
722 clear_hash_noput:
723 memset(md5_hash, 0, 16);
724 return 1;
725 }
726
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)727 static int tcp_v6_md5_hash_skb(char *md5_hash,
728 const struct tcp_md5sig_key *key,
729 const struct sock *sk,
730 const struct sk_buff *skb)
731 {
732 const struct in6_addr *saddr, *daddr;
733 struct tcp_md5sig_pool *hp;
734 struct ahash_request *req;
735 const struct tcphdr *th = tcp_hdr(skb);
736
737 if (sk) { /* valid for establish/request sockets */
738 saddr = &sk->sk_v6_rcv_saddr;
739 daddr = &sk->sk_v6_daddr;
740 } else {
741 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
742 saddr = &ip6h->saddr;
743 daddr = &ip6h->daddr;
744 }
745
746 hp = tcp_get_md5sig_pool();
747 if (!hp)
748 goto clear_hash_noput;
749 req = hp->md5_req;
750
751 if (crypto_ahash_init(req))
752 goto clear_hash;
753
754 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
755 goto clear_hash;
756 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
757 goto clear_hash;
758 if (tcp_md5_hash_key(hp, key))
759 goto clear_hash;
760 ahash_request_set_crypt(req, NULL, md5_hash, 0);
761 if (crypto_ahash_final(req))
762 goto clear_hash;
763
764 tcp_put_md5sig_pool();
765 return 0;
766
767 clear_hash:
768 tcp_put_md5sig_pool();
769 clear_hash_noput:
770 memset(md5_hash, 0, 16);
771 return 1;
772 }
773
774 #endif
775
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)776 static void tcp_v6_init_req(struct request_sock *req,
777 const struct sock *sk_listener,
778 struct sk_buff *skb)
779 {
780 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
781 struct inet_request_sock *ireq = inet_rsk(req);
782 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
783
784 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
785 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
786
787 /* So that link locals have meaning */
788 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
789 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
790 ireq->ir_iif = tcp_v6_iif(skb);
791
792 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
793 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
794 np->rxopt.bits.rxinfo ||
795 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
796 np->rxopt.bits.rxohlim || np->repflow)) {
797 refcount_inc(&skb->users);
798 ireq->pktopts = skb;
799 }
800 }
801
tcp_v6_route_req(const struct sock * sk,struct sk_buff * skb,struct flowi * fl,struct request_sock * req)802 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
803 struct sk_buff *skb,
804 struct flowi *fl,
805 struct request_sock *req)
806 {
807 tcp_v6_init_req(req, sk, skb);
808
809 if (security_inet_conn_request(sk, skb, req))
810 return NULL;
811
812 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
813 }
814
815 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
816 .family = AF_INET6,
817 .obj_size = sizeof(struct tcp6_request_sock),
818 .rtx_syn_ack = tcp_rtx_synack,
819 .send_ack = tcp_v6_reqsk_send_ack,
820 .destructor = tcp_v6_reqsk_destructor,
821 .send_reset = tcp_v6_send_reset,
822 .syn_ack_timeout = tcp_syn_ack_timeout,
823 };
824
825 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
826 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
827 sizeof(struct ipv6hdr),
828 #ifdef CONFIG_TCP_MD5SIG
829 .req_md5_lookup = tcp_v6_md5_lookup,
830 .calc_md5_hash = tcp_v6_md5_hash_skb,
831 #endif
832 #ifdef CONFIG_SYN_COOKIES
833 .cookie_init_seq = cookie_v6_init_sequence,
834 #endif
835 .route_req = tcp_v6_route_req,
836 .init_seq = tcp_v6_init_seq,
837 .init_ts_off = tcp_v6_init_ts_off,
838 .send_synack = tcp_v6_send_synack,
839 };
840
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority)841 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
842 u32 ack, u32 win, u32 tsval, u32 tsecr,
843 int oif, struct tcp_md5sig_key *key, int rst,
844 u8 tclass, __be32 label, u32 priority)
845 {
846 const struct tcphdr *th = tcp_hdr(skb);
847 struct tcphdr *t1;
848 struct sk_buff *buff;
849 struct flowi6 fl6;
850 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
851 struct sock *ctl_sk = net->ipv6.tcp_sk;
852 unsigned int tot_len = sizeof(struct tcphdr);
853 __be32 mrst = 0, *topt;
854 struct dst_entry *dst;
855 __u32 mark = 0;
856
857 if (tsecr)
858 tot_len += TCPOLEN_TSTAMP_ALIGNED;
859 #ifdef CONFIG_TCP_MD5SIG
860 if (key)
861 tot_len += TCPOLEN_MD5SIG_ALIGNED;
862 #endif
863
864 #ifdef CONFIG_MPTCP
865 if (rst && !key) {
866 mrst = mptcp_reset_option(skb);
867
868 if (mrst)
869 tot_len += sizeof(__be32);
870 }
871 #endif
872
873 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
874 if (!buff)
875 return;
876
877 skb_reserve(buff, MAX_TCP_HEADER);
878
879 t1 = skb_push(buff, tot_len);
880 skb_reset_transport_header(buff);
881
882 /* Swap the send and the receive. */
883 memset(t1, 0, sizeof(*t1));
884 t1->dest = th->source;
885 t1->source = th->dest;
886 t1->doff = tot_len / 4;
887 t1->seq = htonl(seq);
888 t1->ack_seq = htonl(ack);
889 t1->ack = !rst || !th->ack;
890 t1->rst = rst;
891 t1->window = htons(win);
892
893 topt = (__be32 *)(t1 + 1);
894
895 if (tsecr) {
896 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
897 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
898 *topt++ = htonl(tsval);
899 *topt++ = htonl(tsecr);
900 }
901
902 if (mrst)
903 *topt++ = mrst;
904
905 #ifdef CONFIG_TCP_MD5SIG
906 if (key) {
907 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
908 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
909 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
910 &ipv6_hdr(skb)->saddr,
911 &ipv6_hdr(skb)->daddr, t1);
912 }
913 #endif
914
915 memset(&fl6, 0, sizeof(fl6));
916 fl6.daddr = ipv6_hdr(skb)->saddr;
917 fl6.saddr = ipv6_hdr(skb)->daddr;
918 fl6.flowlabel = label;
919
920 buff->ip_summed = CHECKSUM_PARTIAL;
921
922 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
923
924 fl6.flowi6_proto = IPPROTO_TCP;
925 if (rt6_need_strict(&fl6.daddr) && !oif)
926 fl6.flowi6_oif = tcp_v6_iif(skb);
927 else {
928 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
929 oif = skb->skb_iif;
930
931 fl6.flowi6_oif = oif;
932 }
933
934 if (sk) {
935 if (sk->sk_state == TCP_TIME_WAIT) {
936 mark = inet_twsk(sk)->tw_mark;
937 /* autoflowlabel relies on buff->hash */
938 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
939 PKT_HASH_TYPE_L4);
940 } else {
941 mark = sk->sk_mark;
942 }
943 skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
944 }
945 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
946 fl6.fl6_dport = t1->dest;
947 fl6.fl6_sport = t1->source;
948 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
949 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
950
951 /* Pass a socket to ip6_dst_lookup either it is for RST
952 * Underlying function will use this to retrieve the network
953 * namespace
954 */
955 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
956 if (!IS_ERR(dst)) {
957 skb_dst_set(buff, dst);
958 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
959 tclass & ~INET_ECN_MASK, priority);
960 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
961 if (rst)
962 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
963 return;
964 }
965
966 kfree_skb(buff);
967 }
968
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)969 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
970 {
971 const struct tcphdr *th = tcp_hdr(skb);
972 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
973 u32 seq = 0, ack_seq = 0;
974 struct tcp_md5sig_key *key = NULL;
975 #ifdef CONFIG_TCP_MD5SIG
976 const __u8 *hash_location = NULL;
977 unsigned char newhash[16];
978 int genhash;
979 struct sock *sk1 = NULL;
980 #endif
981 __be32 label = 0;
982 u32 priority = 0;
983 struct net *net;
984 int oif = 0;
985
986 if (th->rst)
987 return;
988
989 /* If sk not NULL, it means we did a successful lookup and incoming
990 * route had to be correct. prequeue might have dropped our dst.
991 */
992 if (!sk && !ipv6_unicast_destination(skb))
993 return;
994
995 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
996 #ifdef CONFIG_TCP_MD5SIG
997 rcu_read_lock();
998 hash_location = tcp_parse_md5sig_option(th);
999 if (sk && sk_fullsock(sk)) {
1000 int l3index;
1001
1002 /* sdif set, means packet ingressed via a device
1003 * in an L3 domain and inet_iif is set to it.
1004 */
1005 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1006 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1007 } else if (hash_location) {
1008 int dif = tcp_v6_iif_l3_slave(skb);
1009 int sdif = tcp_v6_sdif(skb);
1010 int l3index;
1011
1012 /*
1013 * active side is lost. Try to find listening socket through
1014 * source port, and then find md5 key through listening socket.
1015 * we are not loose security here:
1016 * Incoming packet is checked with md5 hash with finding key,
1017 * no RST generated if md5 hash doesn't match.
1018 */
1019 sk1 = inet6_lookup_listener(net,
1020 &tcp_hashinfo, NULL, 0,
1021 &ipv6h->saddr,
1022 th->source, &ipv6h->daddr,
1023 ntohs(th->source), dif, sdif);
1024 if (!sk1)
1025 goto out;
1026
1027 /* sdif set, means packet ingressed via a device
1028 * in an L3 domain and dif is set to it.
1029 */
1030 l3index = tcp_v6_sdif(skb) ? dif : 0;
1031
1032 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1033 if (!key)
1034 goto out;
1035
1036 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1037 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1038 goto out;
1039 }
1040 #endif
1041
1042 if (th->ack)
1043 seq = ntohl(th->ack_seq);
1044 else
1045 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1046 (th->doff << 2);
1047
1048 if (sk) {
1049 oif = sk->sk_bound_dev_if;
1050 if (sk_fullsock(sk)) {
1051 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1052
1053 trace_tcp_send_reset(sk, skb);
1054 if (np->repflow)
1055 label = ip6_flowlabel(ipv6h);
1056 priority = sk->sk_priority;
1057 }
1058 if (sk->sk_state == TCP_TIME_WAIT) {
1059 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1060 priority = inet_twsk(sk)->tw_priority;
1061 }
1062 } else {
1063 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1064 label = ip6_flowlabel(ipv6h);
1065 }
1066
1067 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1068 ipv6_get_dsfield(ipv6h), label, priority);
1069
1070 #ifdef CONFIG_TCP_MD5SIG
1071 out:
1072 rcu_read_unlock();
1073 #endif
1074 }
1075
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority)1076 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1077 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1078 struct tcp_md5sig_key *key, u8 tclass,
1079 __be32 label, u32 priority)
1080 {
1081 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1082 tclass, label, priority);
1083 }
1084
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1085 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1086 {
1087 struct inet_timewait_sock *tw = inet_twsk(sk);
1088 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1089
1090 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1091 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1092 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1093 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1094 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1095
1096 inet_twsk_put(tw);
1097 }
1098
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1099 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1100 struct request_sock *req)
1101 {
1102 int l3index;
1103
1104 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1105
1106 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1107 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1108 */
1109 /* RFC 7323 2.3
1110 * The window field (SEG.WND) of every outgoing segment, with the
1111 * exception of <SYN> segments, MUST be right-shifted by
1112 * Rcv.Wind.Shift bits:
1113 */
1114 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1115 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1116 tcp_rsk(req)->rcv_nxt,
1117 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1118 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1119 req->ts_recent, sk->sk_bound_dev_if,
1120 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1121 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1122 }
1123
1124
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1125 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1126 {
1127 #ifdef CONFIG_SYN_COOKIES
1128 const struct tcphdr *th = tcp_hdr(skb);
1129
1130 if (!th->syn)
1131 sk = cookie_v6_check(sk, skb);
1132 #endif
1133 return sk;
1134 }
1135
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1136 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1137 struct tcphdr *th, u32 *cookie)
1138 {
1139 u16 mss = 0;
1140 #ifdef CONFIG_SYN_COOKIES
1141 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1142 &tcp_request_sock_ipv6_ops, sk, th);
1143 if (mss) {
1144 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1145 tcp_synq_overflow(sk);
1146 }
1147 #endif
1148 return mss;
1149 }
1150
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1151 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1152 {
1153 if (skb->protocol == htons(ETH_P_IP))
1154 return tcp_v4_conn_request(sk, skb);
1155
1156 if (!ipv6_unicast_destination(skb))
1157 goto drop;
1158
1159 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
1160 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
1161 return 0;
1162 }
1163
1164 return tcp_conn_request(&tcp6_request_sock_ops,
1165 &tcp_request_sock_ipv6_ops, sk, skb);
1166
1167 drop:
1168 tcp_listendrop(sk);
1169 return 0; /* don't send reset */
1170 }
1171
tcp_v6_restore_cb(struct sk_buff * skb)1172 static void tcp_v6_restore_cb(struct sk_buff *skb)
1173 {
1174 /* We need to move header back to the beginning if xfrm6_policy_check()
1175 * and tcp_v6_fill_cb() are going to be called again.
1176 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1177 */
1178 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1179 sizeof(struct inet6_skb_parm));
1180 }
1181
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1182 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1183 struct request_sock *req,
1184 struct dst_entry *dst,
1185 struct request_sock *req_unhash,
1186 bool *own_req)
1187 {
1188 struct inet_request_sock *ireq;
1189 struct ipv6_pinfo *newnp;
1190 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1191 struct ipv6_txoptions *opt;
1192 struct inet_sock *newinet;
1193 bool found_dup_sk = false;
1194 struct tcp_sock *newtp;
1195 struct sock *newsk;
1196 #ifdef CONFIG_TCP_MD5SIG
1197 struct tcp_md5sig_key *key;
1198 int l3index;
1199 #endif
1200 struct flowi6 fl6;
1201
1202 if (skb->protocol == htons(ETH_P_IP)) {
1203 /*
1204 * v6 mapped
1205 */
1206
1207 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1208 req_unhash, own_req);
1209
1210 if (!newsk)
1211 return NULL;
1212
1213 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1214
1215 newnp = tcp_inet6_sk(newsk);
1216 newtp = tcp_sk(newsk);
1217
1218 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1219
1220 newnp->saddr = newsk->sk_v6_rcv_saddr;
1221
1222 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1223 if (sk_is_mptcp(newsk))
1224 mptcpv6_handle_mapped(newsk, true);
1225 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1226 #ifdef CONFIG_TCP_MD5SIG
1227 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1228 #endif
1229
1230 newnp->ipv6_mc_list = NULL;
1231 newnp->ipv6_ac_list = NULL;
1232 newnp->ipv6_fl_list = NULL;
1233 newnp->pktoptions = NULL;
1234 newnp->opt = NULL;
1235 newnp->mcast_oif = inet_iif(skb);
1236 newnp->mcast_hops = ip_hdr(skb)->ttl;
1237 newnp->rcv_flowinfo = 0;
1238 if (np->repflow)
1239 newnp->flow_label = 0;
1240
1241 /*
1242 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1243 * here, tcp_create_openreq_child now does this for us, see the comment in
1244 * that function for the gory details. -acme
1245 */
1246
1247 /* It is tricky place. Until this moment IPv4 tcp
1248 worked with IPv6 icsk.icsk_af_ops.
1249 Sync it now.
1250 */
1251 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1252
1253 return newsk;
1254 }
1255
1256 ireq = inet_rsk(req);
1257
1258 if (sk_acceptq_is_full(sk))
1259 goto out_overflow;
1260
1261 if (!dst) {
1262 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1263 if (!dst)
1264 goto out;
1265 }
1266
1267 newsk = tcp_create_openreq_child(sk, req, skb);
1268 if (!newsk)
1269 goto out_nonewsk;
1270
1271 /*
1272 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1273 * count here, tcp_create_openreq_child now does this for us, see the
1274 * comment in that function for the gory details. -acme
1275 */
1276
1277 newsk->sk_gso_type = SKB_GSO_TCPV6;
1278 ip6_dst_store(newsk, dst, NULL, NULL);
1279 inet6_sk_rx_dst_set(newsk, skb);
1280
1281 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1282
1283 newtp = tcp_sk(newsk);
1284 newinet = inet_sk(newsk);
1285 newnp = tcp_inet6_sk(newsk);
1286
1287 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1288
1289 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1290 newnp->saddr = ireq->ir_v6_loc_addr;
1291 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1292 newsk->sk_bound_dev_if = ireq->ir_iif;
1293
1294 /* Now IPv6 options...
1295
1296 First: no IPv4 options.
1297 */
1298 newinet->inet_opt = NULL;
1299 newnp->ipv6_mc_list = NULL;
1300 newnp->ipv6_ac_list = NULL;
1301 newnp->ipv6_fl_list = NULL;
1302
1303 /* Clone RX bits */
1304 newnp->rxopt.all = np->rxopt.all;
1305
1306 newnp->pktoptions = NULL;
1307 newnp->opt = NULL;
1308 newnp->mcast_oif = tcp_v6_iif(skb);
1309 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1310 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1311 if (np->repflow)
1312 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1313
1314 /* Set ToS of the new socket based upon the value of incoming SYN.
1315 * ECT bits are set later in tcp_init_transfer().
1316 */
1317 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1318 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1319
1320 /* Clone native IPv6 options from listening socket (if any)
1321
1322 Yes, keeping reference count would be much more clever,
1323 but we make one more one thing there: reattach optmem
1324 to newsk.
1325 */
1326 opt = ireq->ipv6_opt;
1327 if (!opt)
1328 opt = rcu_dereference(np->opt);
1329 if (opt) {
1330 opt = ipv6_dup_options(newsk, opt);
1331 RCU_INIT_POINTER(newnp->opt, opt);
1332 }
1333 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1334 if (opt)
1335 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1336 opt->opt_flen;
1337
1338 tcp_ca_openreq_child(newsk, dst);
1339
1340 tcp_sync_mss(newsk, dst_mtu(dst));
1341 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1342
1343 tcp_initialize_rcv_mss(newsk);
1344
1345 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1346 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1347
1348 #ifdef CONFIG_TCP_MD5SIG
1349 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1350
1351 /* Copy over the MD5 key from the original socket */
1352 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1353 if (key) {
1354 /* We're using one, so create a matching key
1355 * on the newsk structure. If we fail to get
1356 * memory, then we end up not copying the key
1357 * across. Shucks.
1358 */
1359 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1360 AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
1361 sk_gfp_mask(sk, GFP_ATOMIC));
1362 }
1363 #endif
1364
1365 if (__inet_inherit_port(sk, newsk) < 0) {
1366 inet_csk_prepare_forced_close(newsk);
1367 tcp_done(newsk);
1368 goto out;
1369 }
1370 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1371 &found_dup_sk);
1372 if (*own_req) {
1373 tcp_move_syn(newtp, req);
1374
1375 /* Clone pktoptions received with SYN, if we own the req */
1376 if (ireq->pktopts) {
1377 newnp->pktoptions = skb_clone(ireq->pktopts,
1378 sk_gfp_mask(sk, GFP_ATOMIC));
1379 consume_skb(ireq->pktopts);
1380 ireq->pktopts = NULL;
1381 if (newnp->pktoptions) {
1382 tcp_v6_restore_cb(newnp->pktoptions);
1383 skb_set_owner_r(newnp->pktoptions, newsk);
1384 }
1385 }
1386 } else {
1387 if (!req_unhash && found_dup_sk) {
1388 /* This code path should only be executed in the
1389 * syncookie case only
1390 */
1391 bh_unlock_sock(newsk);
1392 sock_put(newsk);
1393 newsk = NULL;
1394 }
1395 }
1396
1397 return newsk;
1398
1399 out_overflow:
1400 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1401 out_nonewsk:
1402 dst_release(dst);
1403 out:
1404 tcp_listendrop(sk);
1405 return NULL;
1406 }
1407
1408 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
1409 u32));
1410 /* The socket must have it's spinlock held when we get
1411 * here, unless it is a TCP_LISTEN socket.
1412 *
1413 * We have a potential double-lock case here, so even when
1414 * doing backlog processing we use the BH locking scheme.
1415 * This is because we cannot sleep with the original spinlock
1416 * held.
1417 */
1418 INDIRECT_CALLABLE_SCOPE
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1419 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1420 {
1421 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1422 struct sk_buff *opt_skb = NULL;
1423 enum skb_drop_reason reason;
1424 struct tcp_sock *tp;
1425
1426 /* Imagine: socket is IPv6. IPv4 packet arrives,
1427 goes to IPv4 receive handler and backlogged.
1428 From backlog it always goes here. Kerboom...
1429 Fortunately, tcp_rcv_established and rcv_established
1430 handle them correctly, but it is not case with
1431 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1432 */
1433
1434 if (skb->protocol == htons(ETH_P_IP))
1435 return tcp_v4_do_rcv(sk, skb);
1436
1437 /*
1438 * socket locking is here for SMP purposes as backlog rcv
1439 * is currently called with bh processing disabled.
1440 */
1441
1442 /* Do Stevens' IPV6_PKTOPTIONS.
1443
1444 Yes, guys, it is the only place in our code, where we
1445 may make it not affecting IPv4.
1446 The rest of code is protocol independent,
1447 and I do not like idea to uglify IPv4.
1448
1449 Actually, all the idea behind IPV6_PKTOPTIONS
1450 looks not very well thought. For now we latch
1451 options, received in the last packet, enqueued
1452 by tcp. Feel free to propose better solution.
1453 --ANK (980728)
1454 */
1455 if (np->rxopt.all)
1456 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1457
1458 reason = SKB_DROP_REASON_NOT_SPECIFIED;
1459 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1460 struct dst_entry *dst;
1461
1462 dst = rcu_dereference_protected(sk->sk_rx_dst,
1463 lockdep_sock_is_held(sk));
1464
1465 sock_rps_save_rxhash(sk, skb);
1466 sk_mark_napi_id(sk, skb);
1467 if (dst) {
1468 if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
1469 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
1470 dst, sk->sk_rx_dst_cookie) == NULL) {
1471 RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
1472 dst_release(dst);
1473 }
1474 }
1475
1476 tcp_rcv_established(sk, skb);
1477 if (opt_skb)
1478 goto ipv6_pktoptions;
1479 return 0;
1480 }
1481
1482 if (tcp_checksum_complete(skb))
1483 goto csum_err;
1484
1485 if (sk->sk_state == TCP_LISTEN) {
1486 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1487
1488 if (!nsk)
1489 goto discard;
1490
1491 if (nsk != sk) {
1492 if (tcp_child_process(sk, nsk, skb))
1493 goto reset;
1494 if (opt_skb)
1495 __kfree_skb(opt_skb);
1496 return 0;
1497 }
1498 } else
1499 sock_rps_save_rxhash(sk, skb);
1500
1501 if (tcp_rcv_state_process(sk, skb))
1502 goto reset;
1503 if (opt_skb)
1504 goto ipv6_pktoptions;
1505 return 0;
1506
1507 reset:
1508 tcp_v6_send_reset(sk, skb);
1509 discard:
1510 if (opt_skb)
1511 __kfree_skb(opt_skb);
1512 kfree_skb_reason(skb, reason);
1513 return 0;
1514 csum_err:
1515 reason = SKB_DROP_REASON_TCP_CSUM;
1516 trace_tcp_bad_csum(skb);
1517 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1518 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1519 goto discard;
1520
1521
1522 ipv6_pktoptions:
1523 /* Do you ask, what is it?
1524
1525 1. skb was enqueued by tcp.
1526 2. skb is added to tail of read queue, rather than out of order.
1527 3. socket is not in passive state.
1528 4. Finally, it really contains options, which user wants to receive.
1529 */
1530 tp = tcp_sk(sk);
1531 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1532 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1533 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1534 np->mcast_oif = tcp_v6_iif(opt_skb);
1535 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1536 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1537 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1538 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1539 if (np->repflow)
1540 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1541 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1542 skb_set_owner_r(opt_skb, sk);
1543 tcp_v6_restore_cb(opt_skb);
1544 opt_skb = xchg(&np->pktoptions, opt_skb);
1545 } else {
1546 __kfree_skb(opt_skb);
1547 opt_skb = xchg(&np->pktoptions, NULL);
1548 }
1549 }
1550
1551 consume_skb(opt_skb);
1552 return 0;
1553 }
1554
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1555 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1556 const struct tcphdr *th)
1557 {
1558 /* This is tricky: we move IP6CB at its correct location into
1559 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1560 * _decode_session6() uses IP6CB().
1561 * barrier() makes sure compiler won't play aliasing games.
1562 */
1563 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1564 sizeof(struct inet6_skb_parm));
1565 barrier();
1566
1567 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1568 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1569 skb->len - th->doff*4);
1570 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1571 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1572 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1573 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1574 TCP_SKB_CB(skb)->sacked = 0;
1575 TCP_SKB_CB(skb)->has_rxtstamp =
1576 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1577 }
1578
tcp_v6_rcv(struct sk_buff * skb)1579 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1580 {
1581 enum skb_drop_reason drop_reason;
1582 int sdif = inet6_sdif(skb);
1583 int dif = inet6_iif(skb);
1584 const struct tcphdr *th;
1585 const struct ipv6hdr *hdr;
1586 bool refcounted;
1587 struct sock *sk;
1588 int ret;
1589 struct net *net = dev_net(skb->dev);
1590
1591 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
1592 if (skb->pkt_type != PACKET_HOST)
1593 goto discard_it;
1594
1595 /*
1596 * Count it even if it's bad.
1597 */
1598 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1599
1600 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1601 goto discard_it;
1602
1603 th = (const struct tcphdr *)skb->data;
1604
1605 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
1606 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
1607 goto bad_packet;
1608 }
1609 if (!pskb_may_pull(skb, th->doff*4))
1610 goto discard_it;
1611
1612 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1613 goto csum_error;
1614
1615 th = (const struct tcphdr *)skb->data;
1616 hdr = ipv6_hdr(skb);
1617
1618 lookup:
1619 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1620 th->source, th->dest, inet6_iif(skb), sdif,
1621 &refcounted);
1622 if (!sk)
1623 goto no_tcp_socket;
1624
1625 process:
1626 if (sk->sk_state == TCP_TIME_WAIT)
1627 goto do_time_wait;
1628
1629 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1630 struct request_sock *req = inet_reqsk(sk);
1631 bool req_stolen = false;
1632 struct sock *nsk;
1633
1634 sk = req->rsk_listener;
1635 drop_reason = tcp_inbound_md5_hash(sk, skb,
1636 &hdr->saddr, &hdr->daddr,
1637 AF_INET6, dif, sdif);
1638 if (drop_reason) {
1639 sk_drops_add(sk, skb);
1640 reqsk_put(req);
1641 goto discard_it;
1642 }
1643 if (tcp_checksum_complete(skb)) {
1644 reqsk_put(req);
1645 goto csum_error;
1646 }
1647 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1648 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
1649 if (!nsk) {
1650 inet_csk_reqsk_queue_drop_and_put(sk, req);
1651 goto lookup;
1652 }
1653 sk = nsk;
1654 /* reuseport_migrate_sock() has already held one sk_refcnt
1655 * before returning.
1656 */
1657 } else {
1658 sock_hold(sk);
1659 }
1660 refcounted = true;
1661 nsk = NULL;
1662 if (!tcp_filter(sk, skb)) {
1663 th = (const struct tcphdr *)skb->data;
1664 hdr = ipv6_hdr(skb);
1665 tcp_v6_fill_cb(skb, hdr, th);
1666 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1667 } else {
1668 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1669 }
1670 if (!nsk) {
1671 reqsk_put(req);
1672 if (req_stolen) {
1673 /* Another cpu got exclusive access to req
1674 * and created a full blown socket.
1675 * Try to feed this packet to this socket
1676 * instead of discarding it.
1677 */
1678 tcp_v6_restore_cb(skb);
1679 sock_put(sk);
1680 goto lookup;
1681 }
1682 goto discard_and_relse;
1683 }
1684 if (nsk == sk) {
1685 reqsk_put(req);
1686 tcp_v6_restore_cb(skb);
1687 } else if (tcp_child_process(sk, nsk, skb)) {
1688 tcp_v6_send_reset(nsk, skb);
1689 goto discard_and_relse;
1690 } else {
1691 sock_put(sk);
1692 return 0;
1693 }
1694 }
1695
1696 if (static_branch_unlikely(&ip6_min_hopcount)) {
1697 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
1698 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
1699 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1700 goto discard_and_relse;
1701 }
1702 }
1703
1704 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
1705 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1706 goto discard_and_relse;
1707 }
1708
1709 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
1710 AF_INET6, dif, sdif);
1711 if (drop_reason)
1712 goto discard_and_relse;
1713
1714 if (tcp_filter(sk, skb)) {
1715 drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
1716 goto discard_and_relse;
1717 }
1718 th = (const struct tcphdr *)skb->data;
1719 hdr = ipv6_hdr(skb);
1720 tcp_v6_fill_cb(skb, hdr, th);
1721
1722 skb->dev = NULL;
1723
1724 if (sk->sk_state == TCP_LISTEN) {
1725 ret = tcp_v6_do_rcv(sk, skb);
1726 goto put_and_return;
1727 }
1728
1729 sk_incoming_cpu_update(sk);
1730
1731 bh_lock_sock_nested(sk);
1732 tcp_segs_in(tcp_sk(sk), skb);
1733 ret = 0;
1734 if (!sock_owned_by_user(sk)) {
1735 ret = tcp_v6_do_rcv(sk, skb);
1736 } else {
1737 if (tcp_add_backlog(sk, skb, &drop_reason))
1738 goto discard_and_relse;
1739 }
1740 bh_unlock_sock(sk);
1741 put_and_return:
1742 if (refcounted)
1743 sock_put(sk);
1744 return ret ? -1 : 0;
1745
1746 no_tcp_socket:
1747 drop_reason = SKB_DROP_REASON_NO_SOCKET;
1748 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1749 goto discard_it;
1750
1751 tcp_v6_fill_cb(skb, hdr, th);
1752
1753 if (tcp_checksum_complete(skb)) {
1754 csum_error:
1755 drop_reason = SKB_DROP_REASON_TCP_CSUM;
1756 trace_tcp_bad_csum(skb);
1757 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1758 bad_packet:
1759 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1760 } else {
1761 tcp_v6_send_reset(NULL, skb);
1762 }
1763
1764 discard_it:
1765 SKB_DR_OR(drop_reason, NOT_SPECIFIED);
1766 kfree_skb_reason(skb, drop_reason);
1767 return 0;
1768
1769 discard_and_relse:
1770 sk_drops_add(sk, skb);
1771 if (refcounted)
1772 sock_put(sk);
1773 goto discard_it;
1774
1775 do_time_wait:
1776 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1777 drop_reason = SKB_DROP_REASON_XFRM_POLICY;
1778 inet_twsk_put(inet_twsk(sk));
1779 goto discard_it;
1780 }
1781
1782 tcp_v6_fill_cb(skb, hdr, th);
1783
1784 if (tcp_checksum_complete(skb)) {
1785 inet_twsk_put(inet_twsk(sk));
1786 goto csum_error;
1787 }
1788
1789 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1790 case TCP_TW_SYN:
1791 {
1792 struct sock *sk2;
1793
1794 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1795 skb, __tcp_hdrlen(th),
1796 &ipv6_hdr(skb)->saddr, th->source,
1797 &ipv6_hdr(skb)->daddr,
1798 ntohs(th->dest),
1799 tcp_v6_iif_l3_slave(skb),
1800 sdif);
1801 if (sk2) {
1802 struct inet_timewait_sock *tw = inet_twsk(sk);
1803 inet_twsk_deschedule_put(tw);
1804 sk = sk2;
1805 tcp_v6_restore_cb(skb);
1806 refcounted = false;
1807 goto process;
1808 }
1809 }
1810 /* to ACK */
1811 fallthrough;
1812 case TCP_TW_ACK:
1813 tcp_v6_timewait_ack(sk, skb);
1814 break;
1815 case TCP_TW_RST:
1816 tcp_v6_send_reset(sk, skb);
1817 inet_twsk_deschedule_put(inet_twsk(sk));
1818 goto discard_it;
1819 case TCP_TW_SUCCESS:
1820 ;
1821 }
1822 goto discard_it;
1823 }
1824
tcp_v6_early_demux(struct sk_buff * skb)1825 void tcp_v6_early_demux(struct sk_buff *skb)
1826 {
1827 const struct ipv6hdr *hdr;
1828 const struct tcphdr *th;
1829 struct sock *sk;
1830
1831 if (skb->pkt_type != PACKET_HOST)
1832 return;
1833
1834 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1835 return;
1836
1837 hdr = ipv6_hdr(skb);
1838 th = tcp_hdr(skb);
1839
1840 if (th->doff < sizeof(struct tcphdr) / 4)
1841 return;
1842
1843 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1844 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1845 &hdr->saddr, th->source,
1846 &hdr->daddr, ntohs(th->dest),
1847 inet6_iif(skb), inet6_sdif(skb));
1848 if (sk) {
1849 skb->sk = sk;
1850 skb->destructor = sock_edemux;
1851 if (sk_fullsock(sk)) {
1852 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
1853
1854 if (dst)
1855 dst = dst_check(dst, sk->sk_rx_dst_cookie);
1856 if (dst &&
1857 sk->sk_rx_dst_ifindex == skb->skb_iif)
1858 skb_dst_set_noref(skb, dst);
1859 }
1860 }
1861 }
1862
1863 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1864 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1865 .twsk_unique = tcp_twsk_unique,
1866 .twsk_destructor = tcp_twsk_destructor,
1867 };
1868
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1869 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1870 {
1871 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
1872 }
1873
1874 const struct inet_connection_sock_af_ops ipv6_specific = {
1875 .queue_xmit = inet6_csk_xmit,
1876 .send_check = tcp_v6_send_check,
1877 .rebuild_header = inet6_sk_rebuild_header,
1878 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1879 .conn_request = tcp_v6_conn_request,
1880 .syn_recv_sock = tcp_v6_syn_recv_sock,
1881 .net_header_len = sizeof(struct ipv6hdr),
1882 .net_frag_header_len = sizeof(struct frag_hdr),
1883 .setsockopt = ipv6_setsockopt,
1884 .getsockopt = ipv6_getsockopt,
1885 .addr2sockaddr = inet6_csk_addr2sockaddr,
1886 .sockaddr_len = sizeof(struct sockaddr_in6),
1887 .mtu_reduced = tcp_v6_mtu_reduced,
1888 };
1889
1890 #ifdef CONFIG_TCP_MD5SIG
1891 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1892 .md5_lookup = tcp_v6_md5_lookup,
1893 .calc_md5_hash = tcp_v6_md5_hash_skb,
1894 .md5_parse = tcp_v6_parse_md5_keys,
1895 };
1896 #endif
1897
1898 /*
1899 * TCP over IPv4 via INET6 API
1900 */
1901 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1902 .queue_xmit = ip_queue_xmit,
1903 .send_check = tcp_v4_send_check,
1904 .rebuild_header = inet_sk_rebuild_header,
1905 .sk_rx_dst_set = inet_sk_rx_dst_set,
1906 .conn_request = tcp_v6_conn_request,
1907 .syn_recv_sock = tcp_v6_syn_recv_sock,
1908 .net_header_len = sizeof(struct iphdr),
1909 .setsockopt = ipv6_setsockopt,
1910 .getsockopt = ipv6_getsockopt,
1911 .addr2sockaddr = inet6_csk_addr2sockaddr,
1912 .sockaddr_len = sizeof(struct sockaddr_in6),
1913 .mtu_reduced = tcp_v4_mtu_reduced,
1914 };
1915
1916 #ifdef CONFIG_TCP_MD5SIG
1917 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1918 .md5_lookup = tcp_v4_md5_lookup,
1919 .calc_md5_hash = tcp_v4_md5_hash_skb,
1920 .md5_parse = tcp_v6_parse_md5_keys,
1921 };
1922 #endif
1923
1924 /* NOTE: A lot of things set to zero explicitly by call to
1925 * sk_alloc() so need not be done here.
1926 */
tcp_v6_init_sock(struct sock * sk)1927 static int tcp_v6_init_sock(struct sock *sk)
1928 {
1929 struct inet_connection_sock *icsk = inet_csk(sk);
1930
1931 tcp_init_sock(sk);
1932
1933 icsk->icsk_af_ops = &ipv6_specific;
1934
1935 #ifdef CONFIG_TCP_MD5SIG
1936 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1937 #endif
1938
1939 return 0;
1940 }
1941
tcp_v6_destroy_sock(struct sock * sk)1942 static void tcp_v6_destroy_sock(struct sock *sk)
1943 {
1944 tcp_v4_destroy_sock(sk);
1945 inet6_destroy_sock(sk);
1946 }
1947
1948 #ifdef CONFIG_PROC_FS
1949 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1950 static void get_openreq6(struct seq_file *seq,
1951 const struct request_sock *req, int i)
1952 {
1953 long ttd = req->rsk_timer.expires - jiffies;
1954 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1955 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1956
1957 if (ttd < 0)
1958 ttd = 0;
1959
1960 seq_printf(seq,
1961 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1962 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1963 i,
1964 src->s6_addr32[0], src->s6_addr32[1],
1965 src->s6_addr32[2], src->s6_addr32[3],
1966 inet_rsk(req)->ir_num,
1967 dest->s6_addr32[0], dest->s6_addr32[1],
1968 dest->s6_addr32[2], dest->s6_addr32[3],
1969 ntohs(inet_rsk(req)->ir_rmt_port),
1970 TCP_SYN_RECV,
1971 0, 0, /* could print option size, but that is af dependent. */
1972 1, /* timers active (only the expire timer) */
1973 jiffies_to_clock_t(ttd),
1974 req->num_timeout,
1975 from_kuid_munged(seq_user_ns(seq),
1976 sock_i_uid(req->rsk_listener)),
1977 0, /* non standard timer */
1978 0, /* open_requests have no inode */
1979 0, req);
1980 }
1981
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1982 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1983 {
1984 const struct in6_addr *dest, *src;
1985 __u16 destp, srcp;
1986 int timer_active;
1987 unsigned long timer_expires;
1988 const struct inet_sock *inet = inet_sk(sp);
1989 const struct tcp_sock *tp = tcp_sk(sp);
1990 const struct inet_connection_sock *icsk = inet_csk(sp);
1991 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1992 int rx_queue;
1993 int state;
1994
1995 dest = &sp->sk_v6_daddr;
1996 src = &sp->sk_v6_rcv_saddr;
1997 destp = ntohs(inet->inet_dport);
1998 srcp = ntohs(inet->inet_sport);
1999
2000 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2001 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
2002 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2003 timer_active = 1;
2004 timer_expires = icsk->icsk_timeout;
2005 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2006 timer_active = 4;
2007 timer_expires = icsk->icsk_timeout;
2008 } else if (timer_pending(&sp->sk_timer)) {
2009 timer_active = 2;
2010 timer_expires = sp->sk_timer.expires;
2011 } else {
2012 timer_active = 0;
2013 timer_expires = jiffies;
2014 }
2015
2016 state = inet_sk_state_load(sp);
2017 if (state == TCP_LISTEN)
2018 rx_queue = READ_ONCE(sp->sk_ack_backlog);
2019 else
2020 /* Because we don't lock the socket,
2021 * we might find a transient negative value.
2022 */
2023 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
2024 READ_ONCE(tp->copied_seq), 0);
2025
2026 seq_printf(seq,
2027 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2028 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2029 i,
2030 src->s6_addr32[0], src->s6_addr32[1],
2031 src->s6_addr32[2], src->s6_addr32[3], srcp,
2032 dest->s6_addr32[0], dest->s6_addr32[1],
2033 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2034 state,
2035 READ_ONCE(tp->write_seq) - tp->snd_una,
2036 rx_queue,
2037 timer_active,
2038 jiffies_delta_to_clock_t(timer_expires - jiffies),
2039 icsk->icsk_retransmits,
2040 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2041 icsk->icsk_probes_out,
2042 sock_i_ino(sp),
2043 refcount_read(&sp->sk_refcnt), sp,
2044 jiffies_to_clock_t(icsk->icsk_rto),
2045 jiffies_to_clock_t(icsk->icsk_ack.ato),
2046 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2047 tcp_snd_cwnd(tp),
2048 state == TCP_LISTEN ?
2049 fastopenq->max_qlen :
2050 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2051 );
2052 }
2053
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2054 static void get_timewait6_sock(struct seq_file *seq,
2055 struct inet_timewait_sock *tw, int i)
2056 {
2057 long delta = tw->tw_timer.expires - jiffies;
2058 const struct in6_addr *dest, *src;
2059 __u16 destp, srcp;
2060
2061 dest = &tw->tw_v6_daddr;
2062 src = &tw->tw_v6_rcv_saddr;
2063 destp = ntohs(tw->tw_dport);
2064 srcp = ntohs(tw->tw_sport);
2065
2066 seq_printf(seq,
2067 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2068 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2069 i,
2070 src->s6_addr32[0], src->s6_addr32[1],
2071 src->s6_addr32[2], src->s6_addr32[3], srcp,
2072 dest->s6_addr32[0], dest->s6_addr32[1],
2073 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2074 tw->tw_substate, 0, 0,
2075 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2076 refcount_read(&tw->tw_refcnt), tw);
2077 }
2078
tcp6_seq_show(struct seq_file * seq,void * v)2079 static int tcp6_seq_show(struct seq_file *seq, void *v)
2080 {
2081 struct tcp_iter_state *st;
2082 struct sock *sk = v;
2083
2084 if (v == SEQ_START_TOKEN) {
2085 seq_puts(seq,
2086 " sl "
2087 "local_address "
2088 "remote_address "
2089 "st tx_queue rx_queue tr tm->when retrnsmt"
2090 " uid timeout inode\n");
2091 goto out;
2092 }
2093 st = seq->private;
2094
2095 if (sk->sk_state == TCP_TIME_WAIT)
2096 get_timewait6_sock(seq, v, st->num);
2097 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2098 get_openreq6(seq, v, st->num);
2099 else
2100 get_tcp6_sock(seq, v, st->num);
2101 out:
2102 return 0;
2103 }
2104
2105 static const struct seq_operations tcp6_seq_ops = {
2106 .show = tcp6_seq_show,
2107 .start = tcp_seq_start,
2108 .next = tcp_seq_next,
2109 .stop = tcp_seq_stop,
2110 };
2111
2112 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2113 .family = AF_INET6,
2114 };
2115
tcp6_proc_init(struct net * net)2116 int __net_init tcp6_proc_init(struct net *net)
2117 {
2118 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2119 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2120 return -ENOMEM;
2121 return 0;
2122 }
2123
tcp6_proc_exit(struct net * net)2124 void tcp6_proc_exit(struct net *net)
2125 {
2126 remove_proc_entry("tcp6", net->proc_net);
2127 }
2128 #endif
2129
2130 struct proto tcpv6_prot = {
2131 .name = "TCPv6",
2132 .owner = THIS_MODULE,
2133 .close = tcp_close,
2134 .pre_connect = tcp_v6_pre_connect,
2135 .connect = tcp_v6_connect,
2136 .disconnect = tcp_disconnect,
2137 .accept = inet_csk_accept,
2138 .ioctl = tcp_ioctl,
2139 .init = tcp_v6_init_sock,
2140 .destroy = tcp_v6_destroy_sock,
2141 .shutdown = tcp_shutdown,
2142 .setsockopt = tcp_setsockopt,
2143 .getsockopt = tcp_getsockopt,
2144 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
2145 .keepalive = tcp_set_keepalive,
2146 .recvmsg = tcp_recvmsg,
2147 .sendmsg = tcp_sendmsg,
2148 .sendpage = tcp_sendpage,
2149 .backlog_rcv = tcp_v6_do_rcv,
2150 .release_cb = tcp_release_cb,
2151 .hash = inet6_hash,
2152 .unhash = inet_unhash,
2153 .get_port = inet_csk_get_port,
2154 .put_port = inet_put_port,
2155 #ifdef CONFIG_BPF_SYSCALL
2156 .psock_update_sk_prot = tcp_bpf_update_proto,
2157 #endif
2158 .enter_memory_pressure = tcp_enter_memory_pressure,
2159 .leave_memory_pressure = tcp_leave_memory_pressure,
2160 .stream_memory_free = tcp_stream_memory_free,
2161 .sockets_allocated = &tcp_sockets_allocated,
2162 .memory_allocated = &tcp_memory_allocated,
2163 .memory_pressure = &tcp_memory_pressure,
2164 .orphan_count = &tcp_orphan_count,
2165 .sysctl_mem = sysctl_tcp_mem,
2166 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2167 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2168 .max_header = MAX_TCP_HEADER,
2169 .obj_size = sizeof(struct tcp6_sock),
2170 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2171 .twsk_prot = &tcp6_timewait_sock_ops,
2172 .rsk_prot = &tcp6_request_sock_ops,
2173 .h.hashinfo = &tcp_hashinfo,
2174 .no_autobind = true,
2175 .diag_destroy = tcp_abort,
2176 };
2177 EXPORT_SYMBOL_GPL(tcpv6_prot);
2178
2179 static const struct inet6_protocol tcpv6_protocol = {
2180 .handler = tcp_v6_rcv,
2181 .err_handler = tcp_v6_err,
2182 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2183 };
2184
2185 static struct inet_protosw tcpv6_protosw = {
2186 .type = SOCK_STREAM,
2187 .protocol = IPPROTO_TCP,
2188 .prot = &tcpv6_prot,
2189 .ops = &inet6_stream_ops,
2190 .flags = INET_PROTOSW_PERMANENT |
2191 INET_PROTOSW_ICSK,
2192 };
2193
tcpv6_net_init(struct net * net)2194 static int __net_init tcpv6_net_init(struct net *net)
2195 {
2196 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2197 SOCK_RAW, IPPROTO_TCP, net);
2198 }
2199
tcpv6_net_exit(struct net * net)2200 static void __net_exit tcpv6_net_exit(struct net *net)
2201 {
2202 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2203 }
2204
tcpv6_net_exit_batch(struct list_head * net_exit_list)2205 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2206 {
2207 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2208 }
2209
2210 static struct pernet_operations tcpv6_net_ops = {
2211 .init = tcpv6_net_init,
2212 .exit = tcpv6_net_exit,
2213 .exit_batch = tcpv6_net_exit_batch,
2214 };
2215
tcpv6_init(void)2216 int __init tcpv6_init(void)
2217 {
2218 int ret;
2219
2220 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2221 if (ret)
2222 goto out;
2223
2224 /* register inet6 protocol */
2225 ret = inet6_register_protosw(&tcpv6_protosw);
2226 if (ret)
2227 goto out_tcpv6_protocol;
2228
2229 ret = register_pernet_subsys(&tcpv6_net_ops);
2230 if (ret)
2231 goto out_tcpv6_protosw;
2232
2233 ret = mptcpv6_init();
2234 if (ret)
2235 goto out_tcpv6_pernet_subsys;
2236
2237 out:
2238 return ret;
2239
2240 out_tcpv6_pernet_subsys:
2241 unregister_pernet_subsys(&tcpv6_net_ops);
2242 out_tcpv6_protosw:
2243 inet6_unregister_protosw(&tcpv6_protosw);
2244 out_tcpv6_protocol:
2245 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2246 goto out;
2247 }
2248
tcpv6_exit(void)2249 void tcpv6_exit(void)
2250 {
2251 unregister_pernet_subsys(&tcpv6_net_ops);
2252 inet6_unregister_protosw(&tcpv6_protosw);
2253 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2254 }
2255