1 /*
2 * TCP over IPv6
3 * Linux INET6 implementation
4 *
5 * Authors:
6 * Pedro Roque <pedro_m@yahoo.com>
7 *
8 * $Id: tcp_ipv6.c,v 1.142.2.1 2001/12/21 05:06:08 davem Exp $
9 *
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
14 *
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 *
21 * This program is free software; you can redistribute it and/or
22 * modify it under the terms of the GNU General Public License
23 * as published by the Free Software Foundation; either version
24 * 2 of the License, or (at your option) any later version.
25 */
26
27 #include <linux/module.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/sched.h>
35 #include <linux/in.h>
36 #include <linux/in6.h>
37 #include <linux/netdevice.h>
38 #include <linux/init.h>
39 #include <linux/jhash.h>
40 #include <linux/ipsec.h>
41
42 #include <linux/ipv6.h>
43 #include <linux/icmpv6.h>
44 #include <linux/random.h>
45
46 #include <net/tcp.h>
47 #include <net/ndisc.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/inet_ecn.h>
53
54 #include <asm/uaccess.h>
55
56 static void tcp_v6_send_reset(struct sk_buff *skb);
57 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
58 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
59 struct sk_buff *skb);
60
61 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
62 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
63
64 static struct tcp_func ipv6_mapped;
65 static struct tcp_func ipv6_specific;
66
67 /* I have no idea if this is a good hash for v6 or not. -DaveM */
tcp_v6_hashfn(struct in6_addr * laddr,u16 lport,struct in6_addr * faddr,u16 fport)68 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
69 struct in6_addr *faddr, u16 fport)
70 {
71 int hashent = (lport ^ fport);
72
73 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
74 hashent ^= hashent>>16;
75 hashent ^= hashent>>8;
76 return (hashent & (tcp_ehash_size - 1));
77 }
78
tcp_v6_sk_hashfn(struct sock * sk)79 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
80 {
81 struct in6_addr *laddr = &sk->net_pinfo.af_inet6.rcv_saddr;
82 struct in6_addr *faddr = &sk->net_pinfo.af_inet6.daddr;
83 __u16 lport = sk->num;
84 __u16 fport = sk->dport;
85 return tcp_v6_hashfn(laddr, lport, faddr, fport);
86 }
87
88 /* Grrr, addr_type already calculated by caller, but I don't want
89 * to add some silly "cookie" argument to this method just for that.
90 * But it doesn't matter, the recalculation is in the rarest path
91 * this function ever takes.
92 */
tcp_v6_get_port(struct sock * sk,unsigned short snum)93 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
94 {
95 struct tcp_bind_hashbucket *head;
96 struct tcp_bind_bucket *tb;
97 int ret;
98
99 local_bh_disable();
100 if (snum == 0) {
101 int low = sysctl_local_port_range[0];
102 int high = sysctl_local_port_range[1];
103 int remaining = (high - low) + 1;
104 int rover;
105
106 spin_lock(&tcp_portalloc_lock);
107 rover = tcp_port_rover;
108 do { rover++;
109 if ((rover < low) || (rover > high))
110 rover = low;
111 head = &tcp_bhash[tcp_bhashfn(rover)];
112 spin_lock(&head->lock);
113 for (tb = head->chain; tb; tb = tb->next)
114 if (tb->port == rover)
115 goto next;
116 break;
117 next:
118 spin_unlock(&head->lock);
119 } while (--remaining > 0);
120 tcp_port_rover = rover;
121 spin_unlock(&tcp_portalloc_lock);
122
123 /* Exhausted local port range during search? */
124 ret = 1;
125 if (remaining <= 0)
126 goto fail;
127
128 /* OK, here is the one we will use. */
129 snum = rover;
130 tb = NULL;
131 } else {
132 head = &tcp_bhash[tcp_bhashfn(snum)];
133 spin_lock(&head->lock);
134 for (tb = head->chain; tb != NULL; tb = tb->next)
135 if (tb->port == snum)
136 break;
137 }
138 if (tb != NULL && tb->owners != NULL) {
139 if (tb->fastreuse > 0 && sk->reuse != 0 && sk->state != TCP_LISTEN) {
140 goto success;
141 } else {
142 struct sock *sk2 = tb->owners;
143 int sk_reuse = sk->reuse;
144 int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr);
145
146 /* We must walk the whole port owner list in this case. -DaveM */
147 for( ; sk2 != NULL; sk2 = sk2->bind_next) {
148 if (sk != sk2 &&
149 (!sk->bound_dev_if ||
150 !sk2->bound_dev_if ||
151 sk->bound_dev_if == sk2->bound_dev_if)) {
152 if (!sk_reuse ||
153 !sk2->reuse ||
154 sk2->state == TCP_LISTEN) {
155 /* NOTE: IPv6 tw bucket have different format */
156 if ((!sk2->rcv_saddr && !ipv6_only_sock(sk)) ||
157 (sk2->family == AF_INET6 &&
158 ipv6_addr_any(&sk2->net_pinfo.af_inet6.rcv_saddr) &&
159 !(ipv6_only_sock(sk2) && addr_type == IPV6_ADDR_MAPPED)) ||
160 (addr_type == IPV6_ADDR_ANY &&
161 (!ipv6_only_sock(sk) ||
162 !(sk2->family == AF_INET6 ? ipv6_addr_type(&sk2->net_pinfo.af_inet6.rcv_saddr) == IPV6_ADDR_MAPPED : 1))) ||
163 (sk2->family == AF_INET6 &&
164 !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
165 sk2->state != TCP_TIME_WAIT ?
166 &sk2->net_pinfo.af_inet6.rcv_saddr :
167 &((struct tcp_tw_bucket*)sk)->v6_rcv_saddr)) ||
168 (addr_type == IPV6_ADDR_MAPPED &&
169 !ipv6_only_sock(sk2) &&
170 (!sk2->rcv_saddr ||
171 !sk->rcv_saddr ||
172 sk->rcv_saddr == sk2->rcv_saddr)))
173 break;
174 }
175 }
176 }
177 /* If we found a conflict, fail. */
178 ret = 1;
179 if (sk2 != NULL)
180 goto fail_unlock;
181 }
182 }
183 ret = 1;
184 if (tb == NULL &&
185 (tb = tcp_bucket_create(head, snum)) == NULL)
186 goto fail_unlock;
187 if (tb->owners == NULL) {
188 if (sk->reuse && sk->state != TCP_LISTEN)
189 tb->fastreuse = 1;
190 else
191 tb->fastreuse = 0;
192 } else if (tb->fastreuse &&
193 ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
194 tb->fastreuse = 0;
195
196 success:
197 sk->num = snum;
198 if (sk->prev == NULL) {
199 if ((sk->bind_next = tb->owners) != NULL)
200 tb->owners->bind_pprev = &sk->bind_next;
201 tb->owners = sk;
202 sk->bind_pprev = &tb->owners;
203 sk->prev = (struct sock *) tb;
204 } else {
205 BUG_TRAP(sk->prev == (struct sock *) tb);
206 }
207 ret = 0;
208
209 fail_unlock:
210 spin_unlock(&head->lock);
211 fail:
212 local_bh_enable();
213 return ret;
214 }
215
__tcp_v6_hash(struct sock * sk)216 static __inline__ void __tcp_v6_hash(struct sock *sk)
217 {
218 struct sock **skp;
219 rwlock_t *lock;
220
221 BUG_TRAP(sk->pprev==NULL);
222
223 if(sk->state == TCP_LISTEN) {
224 skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
225 lock = &tcp_lhash_lock;
226 tcp_listen_wlock();
227 } else {
228 skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))].chain;
229 lock = &tcp_ehash[sk->hashent].lock;
230 write_lock(lock);
231 }
232
233 if((sk->next = *skp) != NULL)
234 (*skp)->pprev = &sk->next;
235 *skp = sk;
236 sk->pprev = skp;
237 sock_prot_inc_use(sk->prot);
238 write_unlock(lock);
239 }
240
241
tcp_v6_hash(struct sock * sk)242 static void tcp_v6_hash(struct sock *sk)
243 {
244 if(sk->state != TCP_CLOSE) {
245 if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) {
246 tcp_prot.hash(sk);
247 return;
248 }
249 local_bh_disable();
250 __tcp_v6_hash(sk);
251 local_bh_enable();
252 }
253 }
254
tcp_v6_lookup_listener(struct in6_addr * daddr,unsigned short hnum,int dif)255 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
256 {
257 struct sock *sk;
258 struct sock *result = NULL;
259 int score, hiscore;
260
261 hiscore=0;
262 read_lock(&tcp_lhash_lock);
263 sk = tcp_listening_hash[tcp_lhashfn(hnum)];
264 for(; sk; sk = sk->next) {
265 if((sk->num == hnum) && (sk->family == PF_INET6)) {
266 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
267
268 score = 1;
269 if(!ipv6_addr_any(&np->rcv_saddr)) {
270 if(ipv6_addr_cmp(&np->rcv_saddr, daddr))
271 continue;
272 score++;
273 }
274 if (sk->bound_dev_if) {
275 if (sk->bound_dev_if != dif)
276 continue;
277 score++;
278 }
279 if (score == 3) {
280 result = sk;
281 break;
282 }
283 if (score > hiscore) {
284 hiscore = score;
285 result = sk;
286 }
287 }
288 }
289 if (result)
290 sock_hold(result);
291 read_unlock(&tcp_lhash_lock);
292 return result;
293 }
294
295 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
296 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
297 *
298 * The sockhash lock must be held as a reader here.
299 */
300
__tcp_v6_lookup_established(struct in6_addr * saddr,u16 sport,struct in6_addr * daddr,u16 hnum,int dif)301 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
302 struct in6_addr *daddr, u16 hnum,
303 int dif)
304 {
305 struct tcp_ehash_bucket *head;
306 struct sock *sk;
307 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
308 int hash;
309
310 /* Optimize here for direct hit, only listening connections can
311 * have wildcards anyways.
312 */
313 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
314 head = &tcp_ehash[hash];
315 read_lock(&head->lock);
316 for(sk = head->chain; sk; sk = sk->next) {
317 /* For IPV6 do the cheaper port and family tests first. */
318 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
319 goto hit; /* You sunk my battleship! */
320 }
321 /* Must check for a TIME_WAIT'er before going to listener hash. */
322 for(sk = (head + tcp_ehash_size)->chain; sk; sk = sk->next) {
323 if(*((__u32 *)&(sk->dport)) == ports &&
324 sk->family == PF_INET6) {
325 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
326 if(!ipv6_addr_cmp(&tw->v6_daddr, saddr) &&
327 !ipv6_addr_cmp(&tw->v6_rcv_saddr, daddr) &&
328 (!sk->bound_dev_if || sk->bound_dev_if == dif))
329 goto hit;
330 }
331 }
332 read_unlock(&head->lock);
333 return NULL;
334
335 hit:
336 sock_hold(sk);
337 read_unlock(&head->lock);
338 return sk;
339 }
340
341
__tcp_v6_lookup(struct in6_addr * saddr,u16 sport,struct in6_addr * daddr,u16 hnum,int dif)342 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
343 struct in6_addr *daddr, u16 hnum,
344 int dif)
345 {
346 struct sock *sk;
347
348 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
349
350 if (sk)
351 return sk;
352
353 return tcp_v6_lookup_listener(daddr, hnum, dif);
354 }
355
tcp_v6_lookup(struct in6_addr * saddr,u16 sport,struct in6_addr * daddr,u16 dport,int dif)356 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
357 struct in6_addr *daddr, u16 dport,
358 int dif)
359 {
360 struct sock *sk;
361
362 local_bh_disable();
363 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
364 local_bh_enable();
365
366 return sk;
367 }
368
369
370 /*
371 * Open request hash tables.
372 */
373
tcp_v6_synq_hash(struct in6_addr * raddr,u16 rport,u32 rnd)374 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
375 {
376 u32 a, b, c;
377
378 a = raddr->s6_addr32[0];
379 b = raddr->s6_addr32[1];
380 c = raddr->s6_addr32[2];
381
382 a += JHASH_GOLDEN_RATIO;
383 b += JHASH_GOLDEN_RATIO;
384 c += rnd;
385 __jhash_mix(a, b, c);
386
387 a += raddr->s6_addr32[3];
388 b += (u32) rport;
389 __jhash_mix(a, b, c);
390
391 return c & (TCP_SYNQ_HSIZE - 1);
392 }
393
tcp_v6_search_req(struct tcp_opt * tp,struct open_request *** prevp,__u16 rport,struct in6_addr * raddr,struct in6_addr * laddr,int iif)394 static struct open_request *tcp_v6_search_req(struct tcp_opt *tp,
395 struct open_request ***prevp,
396 __u16 rport,
397 struct in6_addr *raddr,
398 struct in6_addr *laddr,
399 int iif)
400 {
401 struct tcp_listen_opt *lopt = tp->listen_opt;
402 struct open_request *req, **prev;
403
404 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
405 (req = *prev) != NULL;
406 prev = &req->dl_next) {
407 if (req->rmt_port == rport &&
408 req->class->family == AF_INET6 &&
409 !ipv6_addr_cmp(&req->af.v6_req.rmt_addr, raddr) &&
410 !ipv6_addr_cmp(&req->af.v6_req.loc_addr, laddr) &&
411 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
412 BUG_TRAP(req->sk == NULL);
413 *prevp = prev;
414 return req;
415 }
416 }
417
418 return NULL;
419 }
420
tcp_v6_check(struct tcphdr * th,int len,struct in6_addr * saddr,struct in6_addr * daddr,unsigned long base)421 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
422 struct in6_addr *saddr,
423 struct in6_addr *daddr,
424 unsigned long base)
425 {
426 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
427 }
428
tcp_v6_init_sequence(struct sock * sk,struct sk_buff * skb)429 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
430 {
431 if (skb->protocol == htons(ETH_P_IPV6)) {
432 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
433 skb->nh.ipv6h->saddr.s6_addr32,
434 skb->h.th->dest,
435 skb->h.th->source);
436 } else {
437 return secure_tcp_sequence_number(skb->nh.iph->daddr,
438 skb->nh.iph->saddr,
439 skb->h.th->dest,
440 skb->h.th->source);
441 }
442 }
443
tcp_v6_check_established(struct sock * sk)444 static int tcp_v6_check_established(struct sock *sk)
445 {
446 struct in6_addr *daddr = &sk->net_pinfo.af_inet6.rcv_saddr;
447 struct in6_addr *saddr = &sk->net_pinfo.af_inet6.daddr;
448 int dif = sk->bound_dev_if;
449 u32 ports = TCP_COMBINED_PORTS(sk->dport, sk->num);
450 int hash = tcp_v6_hashfn(daddr, sk->num, saddr, sk->dport);
451 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
452 struct sock *sk2, **skp;
453 struct tcp_tw_bucket *tw;
454
455 write_lock_bh(&head->lock);
456
457 for(skp = &(head + tcp_ehash_size)->chain; (sk2=*skp)!=NULL; skp = &sk2->next) {
458 tw = (struct tcp_tw_bucket*)sk2;
459
460 if(*((__u32 *)&(sk2->dport)) == ports &&
461 sk2->family == PF_INET6 &&
462 !ipv6_addr_cmp(&tw->v6_daddr, saddr) &&
463 !ipv6_addr_cmp(&tw->v6_rcv_saddr, daddr) &&
464 sk2->bound_dev_if == sk->bound_dev_if) {
465 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
466
467 if (tw->ts_recent_stamp) {
468 /* See comment in tcp_ipv4.c */
469 if ((tp->write_seq = tw->snd_nxt+65535+2) == 0)
470 tp->write_seq = 1;
471 tp->ts_recent = tw->ts_recent;
472 tp->ts_recent_stamp = tw->ts_recent_stamp;
473 sock_hold(sk2);
474 skp = &head->chain;
475 goto unique;
476 } else
477 goto not_unique;
478 }
479 }
480 tw = NULL;
481
482 for(skp = &head->chain; (sk2=*skp)!=NULL; skp = &sk2->next) {
483 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
484 goto not_unique;
485 }
486
487 unique:
488 BUG_TRAP(sk->pprev==NULL);
489 if ((sk->next = *skp) != NULL)
490 (*skp)->pprev = &sk->next;
491
492 *skp = sk;
493 sk->pprev = skp;
494 sk->hashent = hash;
495 sock_prot_inc_use(sk->prot);
496 write_unlock_bh(&head->lock);
497
498 if (tw) {
499 /* Silly. Should hash-dance instead... */
500 local_bh_disable();
501 tcp_tw_deschedule(tw);
502 tcp_timewait_kill(tw);
503 NET_INC_STATS_BH(TimeWaitRecycled);
504 local_bh_enable();
505
506 tcp_tw_put(tw);
507 }
508 return 0;
509
510 not_unique:
511 write_unlock_bh(&head->lock);
512 return -EADDRNOTAVAIL;
513 }
514
tcp_v6_hash_connect(struct sock * sk)515 static int tcp_v6_hash_connect(struct sock *sk)
516 {
517 struct tcp_bind_hashbucket *head;
518 struct tcp_bind_bucket *tb;
519
520 /* XXX */
521 if (sk->num == 0) {
522 int err = tcp_v6_get_port(sk, sk->num);
523 if (err)
524 return err;
525 sk->sport = htons(sk->num);
526 }
527
528 head = &tcp_bhash[tcp_bhashfn(sk->num)];
529 tb = head->chain;
530
531 spin_lock_bh(&head->lock);
532
533 if (tb->owners == sk && sk->bind_next == NULL) {
534 __tcp_v6_hash(sk);
535 spin_unlock_bh(&head->lock);
536 return 0;
537 } else {
538 spin_unlock_bh(&head->lock);
539 return tcp_v6_check_established(sk);
540 }
541 }
542
tcp_v6_iif(struct sk_buff * skb)543 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
544 {
545 struct inet6_skb_parm *opt = (struct inet6_skb_parm *) skb->cb;
546 return opt->iif;
547 }
548
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)549 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
550 int addr_len)
551 {
552 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
553 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
554 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
555 struct in6_addr *saddr = NULL;
556 struct in6_addr saddr_buf;
557 struct flowi fl;
558 struct dst_entry *dst;
559 int addr_type;
560 int err;
561
562 if (addr_len < SIN6_LEN_RFC2133)
563 return -EINVAL;
564
565 if (usin->sin6_family != AF_INET6)
566 return(-EAFNOSUPPORT);
567
568 fl.fl6_flowlabel = 0;
569 if (np->sndflow) {
570 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
571 IP6_ECN_flow_init(fl.fl6_flowlabel);
572 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
573 struct ip6_flowlabel *flowlabel;
574 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
575 if (flowlabel == NULL)
576 return -EINVAL;
577 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
578 fl6_sock_release(flowlabel);
579 }
580 }
581
582 /*
583 * connect() to INADDR_ANY means loopback (BSD'ism).
584 */
585
586 if(ipv6_addr_any(&usin->sin6_addr))
587 usin->sin6_addr.s6_addr[15] = 0x1;
588
589 addr_type = ipv6_addr_type(&usin->sin6_addr);
590
591 if(addr_type & IPV6_ADDR_MULTICAST)
592 return -ENETUNREACH;
593
594 if (addr_type&IPV6_ADDR_LINKLOCAL) {
595 if (addr_len >= sizeof(struct sockaddr_in6) &&
596 usin->sin6_scope_id) {
597 /* If interface is set while binding, indices
598 * must coincide.
599 */
600 if (sk->bound_dev_if &&
601 sk->bound_dev_if != usin->sin6_scope_id)
602 return -EINVAL;
603
604 sk->bound_dev_if = usin->sin6_scope_id;
605 }
606
607 /* Connect to link-local address requires an interface */
608 if (sk->bound_dev_if == 0)
609 return -EINVAL;
610 }
611
612 if (tp->ts_recent_stamp && ipv6_addr_cmp(&np->daddr, &usin->sin6_addr)) {
613 tp->ts_recent = 0;
614 tp->ts_recent_stamp = 0;
615 tp->write_seq = 0;
616 }
617
618 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
619 np->flow_label = fl.fl6_flowlabel;
620
621 /*
622 * TCP over IPv4
623 */
624
625 if (addr_type == IPV6_ADDR_MAPPED) {
626 u32 exthdrlen = tp->ext_header_len;
627 struct sockaddr_in sin;
628
629 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
630
631 if (__ipv6_only_sock(sk))
632 return -ENETUNREACH;
633
634 sin.sin_family = AF_INET;
635 sin.sin_port = usin->sin6_port;
636 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
637
638 sk->tp_pinfo.af_tcp.af_specific = &ipv6_mapped;
639 sk->backlog_rcv = tcp_v4_do_rcv;
640
641 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
642
643 if (err) {
644 tp->ext_header_len = exthdrlen;
645 sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific;
646 sk->backlog_rcv = tcp_v6_do_rcv;
647 goto failure;
648 } else {
649 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
650 sk->saddr);
651 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
652 sk->rcv_saddr);
653 }
654
655 return err;
656 }
657
658 if (!ipv6_addr_any(&np->rcv_saddr))
659 saddr = &np->rcv_saddr;
660
661 fl.proto = IPPROTO_TCP;
662 fl.fl6_dst = &np->daddr;
663 fl.fl6_src = saddr;
664 fl.oif = sk->bound_dev_if;
665 fl.uli_u.ports.dport = usin->sin6_port;
666 fl.uli_u.ports.sport = sk->sport;
667
668 if (np->opt && np->opt->srcrt) {
669 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
670 fl.nl_u.ip6_u.daddr = rt0->addr;
671 }
672
673 dst = ip6_route_output(sk, &fl);
674
675 if ((err = dst->error) != 0) {
676 dst_release(dst);
677 goto failure;
678 }
679
680 ip6_dst_store(sk, dst, NULL);
681 sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM;
682
683 if (saddr == NULL) {
684 err = ipv6_get_saddr(dst, &np->daddr, &saddr_buf);
685 if (err)
686 goto failure;
687
688 saddr = &saddr_buf;
689 }
690
691 /* set the source address */
692 ipv6_addr_copy(&np->rcv_saddr, saddr);
693 ipv6_addr_copy(&np->saddr, saddr);
694 sk->rcv_saddr= LOOPBACK4_IPV6;
695
696 tp->ext_header_len = 0;
697 if (np->opt)
698 tp->ext_header_len = np->opt->opt_flen+np->opt->opt_nflen;
699 tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
700
701 sk->dport = usin->sin6_port;
702
703 tcp_set_state(sk, TCP_SYN_SENT);
704 err = tcp_v6_hash_connect(sk);
705 if (err)
706 goto late_failure;
707
708 if (!tp->write_seq)
709 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
710 np->daddr.s6_addr32,
711 sk->sport, sk->dport);
712 err = tcp_connect(sk);
713 if (err)
714 goto late_failure;
715
716 return 0;
717
718 late_failure:
719 tcp_set_state(sk, TCP_CLOSE);
720 failure:
721 __sk_dst_reset(sk);
722 sk->dport = 0;
723 sk->route_caps = 0;
724 return err;
725 }
726
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,int type,int code,int offset,__u32 info)727 void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
728 int type, int code, int offset, __u32 info)
729 {
730 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
731 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
732 struct ipv6_pinfo *np;
733 struct sock *sk;
734 int err;
735 struct tcp_opt *tp;
736 __u32 seq;
737
738 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
739
740 if (sk == NULL) {
741 ICMP6_INC_STATS_BH(Icmp6InErrors);
742 return;
743 }
744
745 if (sk->state == TCP_TIME_WAIT) {
746 tcp_tw_put((struct tcp_tw_bucket*)sk);
747 return;
748 }
749
750 bh_lock_sock(sk);
751 if (sk->lock.users)
752 NET_INC_STATS_BH(LockDroppedIcmps);
753
754 if (sk->state == TCP_CLOSE)
755 goto out;
756
757 tp = &sk->tp_pinfo.af_tcp;
758 seq = ntohl(th->seq);
759 if (sk->state != TCP_LISTEN && !between(seq, tp->snd_una, tp->snd_nxt)) {
760 NET_INC_STATS_BH(OutOfWindowIcmps);
761 goto out;
762 }
763
764 np = &sk->net_pinfo.af_inet6;
765
766 if (type == ICMPV6_PKT_TOOBIG) {
767 struct dst_entry *dst = NULL;
768
769 if (sk->lock.users)
770 goto out;
771 if ((1<<sk->state)&(TCPF_LISTEN|TCPF_CLOSE))
772 goto out;
773
774 /* icmp should have updated the destination cache entry */
775 dst = __sk_dst_check(sk, np->dst_cookie);
776
777 if (dst == NULL) {
778 struct flowi fl;
779
780 /* BUGGG_FUTURE: Again, it is not clear how
781 to handle rthdr case. Ignore this complexity
782 for now.
783 */
784 fl.proto = IPPROTO_TCP;
785 fl.nl_u.ip6_u.daddr = &np->daddr;
786 fl.nl_u.ip6_u.saddr = &np->saddr;
787 fl.oif = sk->bound_dev_if;
788 fl.uli_u.ports.dport = sk->dport;
789 fl.uli_u.ports.sport = sk->sport;
790
791 dst = ip6_route_output(sk, &fl);
792 } else
793 dst_hold(dst);
794
795 if (dst->error) {
796 sk->err_soft = -dst->error;
797 } else if (tp->pmtu_cookie > dst->pmtu) {
798 tcp_sync_mss(sk, dst->pmtu);
799 tcp_simple_retransmit(sk);
800 } /* else let the usual retransmit timer handle it */
801 dst_release(dst);
802 goto out;
803 }
804
805 icmpv6_err_convert(type, code, &err);
806
807 /* Might be for an open_request */
808 switch (sk->state) {
809 struct open_request *req, **prev;
810 case TCP_LISTEN:
811 if (sk->lock.users)
812 goto out;
813
814 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
815 &hdr->saddr, tcp_v6_iif(skb));
816 if (!req)
817 goto out;
818
819 /* ICMPs are not backlogged, hence we cannot get
820 * an established socket here.
821 */
822 BUG_TRAP(req->sk == NULL);
823
824 if (seq != req->snt_isn) {
825 NET_INC_STATS_BH(OutOfWindowIcmps);
826 goto out;
827 }
828
829 tcp_synq_drop(sk, req, prev);
830 goto out;
831
832 case TCP_SYN_SENT:
833 case TCP_SYN_RECV: /* Cannot happen.
834 It can, it SYNs are crossed. --ANK */
835 if (sk->lock.users == 0) {
836 TCP_INC_STATS_BH(TcpAttemptFails);
837 sk->err = err;
838 sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
839
840 tcp_done(sk);
841 } else {
842 sk->err_soft = err;
843 }
844 goto out;
845 }
846
847 if (sk->lock.users == 0 && np->recverr) {
848 sk->err = err;
849 sk->error_report(sk);
850 } else {
851 sk->err_soft = err;
852 }
853
854 out:
855 bh_unlock_sock(sk);
856 sock_put(sk);
857 }
858
859
tcp_v6_send_synack(struct sock * sk,struct open_request * req,struct dst_entry * dst)860 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
861 struct dst_entry *dst)
862 {
863 struct sk_buff * skb;
864 struct ipv6_txoptions *opt = NULL;
865 struct flowi fl;
866 int err = -1;
867
868 fl.proto = IPPROTO_TCP;
869 fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
870 fl.nl_u.ip6_u.saddr = &req->af.v6_req.loc_addr;
871 fl.fl6_flowlabel = 0;
872 fl.oif = req->af.v6_req.iif;
873 fl.uli_u.ports.dport = req->rmt_port;
874 fl.uli_u.ports.sport = sk->sport;
875
876 if (dst == NULL) {
877 opt = sk->net_pinfo.af_inet6.opt;
878 if (opt == NULL &&
879 sk->net_pinfo.af_inet6.rxopt.bits.srcrt == 2 &&
880 req->af.v6_req.pktopts) {
881 struct sk_buff *pktopts = req->af.v6_req.pktopts;
882 struct inet6_skb_parm *rxopt = (struct inet6_skb_parm *)pktopts->cb;
883 if (rxopt->srcrt)
884 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
885 }
886
887 if (opt && opt->srcrt) {
888 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
889 fl.nl_u.ip6_u.daddr = rt0->addr;
890 }
891
892 dst = ip6_route_output(sk, &fl);
893 if (dst->error)
894 goto done;
895 }
896
897 skb = tcp_make_synack(sk, dst, req);
898 if (skb) {
899 struct tcphdr *th = skb->h.th;
900
901 th->check = tcp_v6_check(th, skb->len,
902 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
903 csum_partial((char *)th, skb->len, skb->csum));
904
905 fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
906 err = ip6_xmit(sk, skb, &fl, opt);
907 if (err == NET_XMIT_CN)
908 err = 0;
909 }
910
911 done:
912 dst_release(dst);
913 if (opt && opt != sk->net_pinfo.af_inet6.opt)
914 sock_kfree_s(sk, opt, opt->tot_len);
915 return err;
916 }
917
tcp_v6_or_free(struct open_request * req)918 static void tcp_v6_or_free(struct open_request *req)
919 {
920 if (req->af.v6_req.pktopts)
921 kfree_skb(req->af.v6_req.pktopts);
922 }
923
924 static struct or_calltable or_ipv6 = {
925 AF_INET6,
926 tcp_v6_send_synack,
927 tcp_v6_or_send_ack,
928 tcp_v6_or_free,
929 tcp_v6_send_reset
930 };
931
ipv6_opt_accepted(struct sock * sk,struct sk_buff * skb)932 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
933 {
934 struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
935
936 if (sk->net_pinfo.af_inet6.rxopt.all) {
937 if ((opt->hop && sk->net_pinfo.af_inet6.rxopt.bits.hopopts) ||
938 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
939 sk->net_pinfo.af_inet6.rxopt.bits.rxflow) ||
940 (opt->srcrt && sk->net_pinfo.af_inet6.rxopt.bits.srcrt) ||
941 ((opt->dst1 || opt->dst0) && sk->net_pinfo.af_inet6.rxopt.bits.dstopts))
942 return 1;
943 }
944 return 0;
945 }
946
947
tcp_v6_send_check(struct sock * sk,struct tcphdr * th,int len,struct sk_buff * skb)948 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
949 struct sk_buff *skb)
950 {
951 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
952
953 if (skb->ip_summed == CHECKSUM_HW) {
954 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
955 skb->csum = offsetof(struct tcphdr, check);
956 } else {
957 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
958 csum_partial((char *)th, th->doff<<2,
959 skb->csum));
960 }
961 }
962
963
tcp_v6_send_reset(struct sk_buff * skb)964 static void tcp_v6_send_reset(struct sk_buff *skb)
965 {
966 struct tcphdr *th = skb->h.th, *t1;
967 struct sk_buff *buff;
968 struct flowi fl;
969
970 if (th->rst)
971 return;
972
973 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
974 return;
975
976 /*
977 * We need to grab some memory, and put together an RST,
978 * and then put it into the queue to be sent.
979 */
980
981 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr), GFP_ATOMIC);
982 if (buff == NULL)
983 return;
984
985 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
986
987 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
988
989 /* Swap the send and the receive. */
990 memset(t1, 0, sizeof(*t1));
991 t1->dest = th->source;
992 t1->source = th->dest;
993 t1->doff = sizeof(*t1)/4;
994 t1->rst = 1;
995
996 if(th->ack) {
997 t1->seq = th->ack_seq;
998 } else {
999 t1->ack = 1;
1000 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1001 + skb->len - (th->doff<<2));
1002 }
1003
1004 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1005
1006 fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr;
1007 fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr;
1008 fl.fl6_flowlabel = 0;
1009
1010 t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr,
1011 fl.nl_u.ip6_u.daddr,
1012 sizeof(*t1), IPPROTO_TCP,
1013 buff->csum);
1014
1015 fl.proto = IPPROTO_TCP;
1016 fl.oif = tcp_v6_iif(skb);
1017 fl.uli_u.ports.dport = t1->dest;
1018 fl.uli_u.ports.sport = t1->source;
1019
1020 /* sk = NULL, but it is safe for now. RST socket required. */
1021 buff->dst = ip6_route_output(NULL, &fl);
1022
1023 if (buff->dst->error == 0) {
1024 ip6_xmit(NULL, buff, &fl, NULL);
1025 TCP_INC_STATS_BH(TcpOutSegs);
1026 TCP_INC_STATS_BH(TcpOutRsts);
1027 return;
1028 }
1029
1030 kfree_skb(buff);
1031 }
1032
tcp_v6_send_ack(struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 ts)1033 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1034 {
1035 struct tcphdr *th = skb->h.th, *t1;
1036 struct sk_buff *buff;
1037 struct flowi fl;
1038 int tot_len = sizeof(struct tcphdr);
1039
1040 if (ts)
1041 tot_len += 3*4;
1042
1043 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, GFP_ATOMIC);
1044 if (buff == NULL)
1045 return;
1046
1047 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1048
1049 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1050
1051 /* Swap the send and the receive. */
1052 memset(t1, 0, sizeof(*t1));
1053 t1->dest = th->source;
1054 t1->source = th->dest;
1055 t1->doff = tot_len/4;
1056 t1->seq = htonl(seq);
1057 t1->ack_seq = htonl(ack);
1058 t1->ack = 1;
1059 t1->window = htons(win);
1060
1061 if (ts) {
1062 u32 *ptr = (u32*)(t1 + 1);
1063 *ptr++ = htonl((TCPOPT_NOP << 24) |
1064 (TCPOPT_NOP << 16) |
1065 (TCPOPT_TIMESTAMP << 8) |
1066 TCPOLEN_TIMESTAMP);
1067 *ptr++ = htonl(tcp_time_stamp);
1068 *ptr = htonl(ts);
1069 }
1070
1071 buff->csum = csum_partial((char *)t1, tot_len, 0);
1072
1073 fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr;
1074 fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr;
1075 fl.fl6_flowlabel = 0;
1076
1077 t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr,
1078 fl.nl_u.ip6_u.daddr,
1079 tot_len, IPPROTO_TCP,
1080 buff->csum);
1081
1082 fl.proto = IPPROTO_TCP;
1083 fl.oif = tcp_v6_iif(skb);
1084 fl.uli_u.ports.dport = t1->dest;
1085 fl.uli_u.ports.sport = t1->source;
1086
1087 buff->dst = ip6_route_output(NULL, &fl);
1088
1089 if (buff->dst->error == 0) {
1090 ip6_xmit(NULL, buff, &fl, NULL);
1091 TCP_INC_STATS_BH(TcpOutSegs);
1092 return;
1093 }
1094
1095 kfree_skb(buff);
1096 }
1097
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1098 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1099 {
1100 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1101
1102 tcp_v6_send_ack(skb, tw->snd_nxt, tw->rcv_nxt,
1103 tw->rcv_wnd>>tw->rcv_wscale, tw->ts_recent);
1104
1105 tcp_tw_put(tw);
1106 }
1107
tcp_v6_or_send_ack(struct sk_buff * skb,struct open_request * req)1108 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1109 {
1110 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1111 }
1112
1113
tcp_v6_hnd_req(struct sock * sk,struct sk_buff * skb)1114 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1115 {
1116 struct open_request *req, **prev;
1117 struct tcphdr *th = skb->h.th;
1118 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1119 struct sock *nsk;
1120
1121 /* Find possible connection requests. */
1122 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1123 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1124 if (req)
1125 return tcp_check_req(sk, skb, req, prev);
1126
1127 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1128 th->source,
1129 &skb->nh.ipv6h->daddr,
1130 ntohs(th->dest),
1131 tcp_v6_iif(skb));
1132
1133 if (nsk) {
1134 if (nsk->state != TCP_TIME_WAIT) {
1135 bh_lock_sock(nsk);
1136 return nsk;
1137 }
1138 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1139 return NULL;
1140 }
1141
1142 #if 0 /*def CONFIG_SYN_COOKIES*/
1143 if (!th->rst && !th->syn && th->ack)
1144 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1145 #endif
1146 return sk;
1147 }
1148
tcp_v6_synq_add(struct sock * sk,struct open_request * req)1149 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1150 {
1151 struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
1152 struct tcp_listen_opt *lopt = tp->listen_opt;
1153 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1154
1155 req->sk = NULL;
1156 req->expires = jiffies + TCP_TIMEOUT_INIT;
1157 req->retrans = 0;
1158 req->dl_next = lopt->syn_table[h];
1159
1160 write_lock(&tp->syn_wait_lock);
1161 lopt->syn_table[h] = req;
1162 write_unlock(&tp->syn_wait_lock);
1163
1164 tcp_synq_added(sk);
1165 }
1166
1167
1168 /* FIXME: this is substantially similar to the ipv4 code.
1169 * Can some kind of merge be done? -- erics
1170 */
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1171 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1172 {
1173 struct tcp_opt tp;
1174 struct open_request *req = NULL;
1175 __u32 isn = TCP_SKB_CB(skb)->when;
1176
1177 if (skb->protocol == htons(ETH_P_IP))
1178 return tcp_v4_conn_request(sk, skb);
1179
1180 /* FIXME: do the same check for anycast */
1181 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
1182 goto drop;
1183
1184 /*
1185 * There are no SYN attacks on IPv6, yet...
1186 */
1187 if (tcp_synq_is_full(sk) && !isn) {
1188 if (net_ratelimit())
1189 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1190 goto drop;
1191 }
1192
1193 if (tcp_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1194 goto drop;
1195
1196 req = tcp_openreq_alloc();
1197 if (req == NULL)
1198 goto drop;
1199
1200 tcp_clear_options(&tp);
1201 tp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1202 tp.user_mss = sk->tp_pinfo.af_tcp.user_mss;
1203
1204 tcp_parse_options(skb, &tp, 0);
1205
1206 tp.tstamp_ok = tp.saw_tstamp;
1207 tcp_openreq_init(req, &tp, skb);
1208
1209 req->class = &or_ipv6;
1210 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1211 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1212 TCP_ECN_create_request(req, skb->h.th);
1213 req->af.v6_req.pktopts = NULL;
1214 if (ipv6_opt_accepted(sk, skb) ||
1215 sk->net_pinfo.af_inet6.rxopt.bits.rxinfo ||
1216 sk->net_pinfo.af_inet6.rxopt.bits.rxhlim) {
1217 atomic_inc(&skb->users);
1218 req->af.v6_req.pktopts = skb;
1219 }
1220 req->af.v6_req.iif = sk->bound_dev_if;
1221
1222 /* So that link locals have meaning */
1223 if (!sk->bound_dev_if && ipv6_addr_type(&req->af.v6_req.rmt_addr)&IPV6_ADDR_LINKLOCAL)
1224 req->af.v6_req.iif = tcp_v6_iif(skb);
1225
1226 if (isn == 0)
1227 isn = tcp_v6_init_sequence(sk,skb);
1228
1229 req->snt_isn = isn;
1230
1231 if (tcp_v6_send_synack(sk, req, NULL))
1232 goto drop;
1233
1234 tcp_v6_synq_add(sk, req);
1235
1236 return 0;
1237
1238 drop:
1239 if (req)
1240 tcp_openreq_free(req);
1241
1242 TCP_INC_STATS_BH(TcpAttemptFails);
1243 return 0; /* don't send reset */
1244 }
1245
tcp_v6_syn_recv_sock(struct sock * sk,struct sk_buff * skb,struct open_request * req,struct dst_entry * dst)1246 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1247 struct open_request *req,
1248 struct dst_entry *dst)
1249 {
1250 struct ipv6_pinfo *np;
1251 struct flowi fl;
1252 struct tcp_opt *newtp;
1253 struct sock *newsk;
1254 struct ipv6_txoptions *opt;
1255
1256 if (skb->protocol == htons(ETH_P_IP)) {
1257 /*
1258 * v6 mapped
1259 */
1260
1261 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1262
1263 if (newsk == NULL)
1264 return NULL;
1265
1266 np = &newsk->net_pinfo.af_inet6;
1267
1268 ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000FFFF),
1269 newsk->daddr);
1270
1271 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
1272 newsk->saddr);
1273
1274 ipv6_addr_copy(&np->rcv_saddr, &np->saddr);
1275
1276 newsk->tp_pinfo.af_tcp.af_specific = &ipv6_mapped;
1277 newsk->backlog_rcv = tcp_v4_do_rcv;
1278 newsk->net_pinfo.af_inet6.pktoptions = NULL;
1279 newsk->net_pinfo.af_inet6.opt = NULL;
1280 newsk->net_pinfo.af_inet6.mcast_oif = tcp_v6_iif(skb);
1281 newsk->net_pinfo.af_inet6.mcast_hops = skb->nh.ipv6h->hop_limit;
1282
1283 /* Charge newly allocated IPv6 socket. Though it is mapped,
1284 * it is IPv6 yet.
1285 */
1286 #ifdef INET_REFCNT_DEBUG
1287 atomic_inc(&inet6_sock_nr);
1288 #endif
1289 MOD_INC_USE_COUNT;
1290
1291 /* It is tricky place. Until this moment IPv4 tcp
1292 worked with IPv6 af_tcp.af_specific.
1293 Sync it now.
1294 */
1295 tcp_sync_mss(newsk, newsk->tp_pinfo.af_tcp.pmtu_cookie);
1296
1297 return newsk;
1298 }
1299
1300 opt = sk->net_pinfo.af_inet6.opt;
1301
1302 if (tcp_acceptq_is_full(sk))
1303 goto out_overflow;
1304
1305 if (sk->net_pinfo.af_inet6.rxopt.bits.srcrt == 2 &&
1306 opt == NULL && req->af.v6_req.pktopts) {
1307 struct inet6_skb_parm *rxopt = (struct inet6_skb_parm *)req->af.v6_req.pktopts->cb;
1308 if (rxopt->srcrt)
1309 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1310 }
1311
1312 if (dst == NULL) {
1313 fl.proto = IPPROTO_TCP;
1314 fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
1315 if (opt && opt->srcrt) {
1316 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1317 fl.nl_u.ip6_u.daddr = rt0->addr;
1318 }
1319 fl.nl_u.ip6_u.saddr = &req->af.v6_req.loc_addr;
1320 fl.fl6_flowlabel = 0;
1321 fl.oif = sk->bound_dev_if;
1322 fl.uli_u.ports.dport = req->rmt_port;
1323 fl.uli_u.ports.sport = sk->sport;
1324
1325 dst = ip6_route_output(sk, &fl);
1326 }
1327
1328 if (dst->error)
1329 goto out;
1330
1331 newsk = tcp_create_openreq_child(sk, req, skb);
1332 if (newsk == NULL)
1333 goto out;
1334
1335 /* Charge newly allocated IPv6 socket */
1336 #ifdef INET_REFCNT_DEBUG
1337 atomic_inc(&inet6_sock_nr);
1338 #endif
1339 MOD_INC_USE_COUNT;
1340
1341 ip6_dst_store(newsk, dst, NULL);
1342 sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM;
1343
1344 newtp = &(newsk->tp_pinfo.af_tcp);
1345
1346 np = &newsk->net_pinfo.af_inet6;
1347 ipv6_addr_copy(&np->daddr, &req->af.v6_req.rmt_addr);
1348 ipv6_addr_copy(&np->saddr, &req->af.v6_req.loc_addr);
1349 ipv6_addr_copy(&np->rcv_saddr, &req->af.v6_req.loc_addr);
1350 newsk->bound_dev_if = req->af.v6_req.iif;
1351
1352 /* Now IPv6 options...
1353
1354 First: no IPv4 options.
1355 */
1356 newsk->protinfo.af_inet.opt = NULL;
1357 np->ipv6_fl_list = NULL;
1358
1359 /* Clone RX bits */
1360 np->rxopt.all = sk->net_pinfo.af_inet6.rxopt.all;
1361
1362 /* Clone pktoptions received with SYN */
1363 np->pktoptions = NULL;
1364 if (req->af.v6_req.pktopts) {
1365 np->pktoptions = skb_clone(req->af.v6_req.pktopts, GFP_ATOMIC);
1366 kfree_skb(req->af.v6_req.pktopts);
1367 req->af.v6_req.pktopts = NULL;
1368 if (np->pktoptions)
1369 skb_set_owner_r(np->pktoptions, newsk);
1370 }
1371 np->opt = NULL;
1372 np->mcast_oif = tcp_v6_iif(skb);
1373 np->mcast_hops = skb->nh.ipv6h->hop_limit;
1374
1375 /* Clone native IPv6 options from listening socket (if any)
1376
1377 Yes, keeping reference count would be much more clever,
1378 but we make one more one thing there: reattach optmem
1379 to newsk.
1380 */
1381 if (opt) {
1382 np->opt = ipv6_dup_options(newsk, opt);
1383 if (opt != sk->net_pinfo.af_inet6.opt)
1384 sock_kfree_s(sk, opt, opt->tot_len);
1385 }
1386
1387 newtp->ext_header_len = 0;
1388 if (np->opt)
1389 newtp->ext_header_len = np->opt->opt_nflen + np->opt->opt_flen;
1390
1391 tcp_sync_mss(newsk, dst->pmtu);
1392 newtp->advmss = dst->advmss;
1393 tcp_initialize_rcv_mss(newsk);
1394
1395 newsk->daddr = LOOPBACK4_IPV6;
1396 newsk->saddr = LOOPBACK4_IPV6;
1397 newsk->rcv_saddr= LOOPBACK4_IPV6;
1398
1399 __tcp_v6_hash(newsk);
1400 tcp_inherit_port(sk, newsk);
1401
1402 return newsk;
1403
1404 out_overflow:
1405 NET_INC_STATS_BH(ListenOverflows);
1406 out:
1407 NET_INC_STATS_BH(ListenDrops);
1408 if (opt && opt != sk->net_pinfo.af_inet6.opt)
1409 sock_kfree_s(sk, opt, opt->tot_len);
1410 dst_release(dst);
1411 return NULL;
1412 }
1413
tcp_v6_checksum_init(struct sk_buff * skb)1414 static int tcp_v6_checksum_init(struct sk_buff *skb)
1415 {
1416 if (skb->ip_summed == CHECKSUM_HW) {
1417 skb->ip_summed = CHECKSUM_UNNECESSARY;
1418 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1419 &skb->nh.ipv6h->daddr,skb->csum))
1420 return 0;
1421 NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1422 }
1423 if (skb->len <= 76) {
1424 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1425 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1426 return -1;
1427 skb->ip_summed = CHECKSUM_UNNECESSARY;
1428 } else {
1429 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1430 &skb->nh.ipv6h->daddr,0);
1431 }
1432 return 0;
1433 }
1434
1435 /* The socket must have it's spinlock held when we get
1436 * here.
1437 *
1438 * We have a potential double-lock case here, so even when
1439 * doing backlog processing we use the BH locking scheme.
1440 * This is because we cannot sleep with the original spinlock
1441 * held.
1442 */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1443 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1444 {
1445 struct sk_buff *opt_skb = NULL;
1446
1447 /* Imagine: socket is IPv6. IPv4 packet arrives,
1448 goes to IPv4 receive handler and backlogged.
1449 From backlog it always goes here. Kerboom...
1450 Fortunately, tcp_rcv_established and rcv_established
1451 handle them correctly, but it is not case with
1452 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1453 */
1454
1455 if (skb->protocol == htons(ETH_P_IP))
1456 return tcp_v4_do_rcv(sk, skb);
1457
1458 /*
1459 * socket locking is here for SMP purposes as backlog rcv
1460 * is currently called with bh processing disabled.
1461 */
1462
1463 IP6_INC_STATS_BH(Ip6InDelivers);
1464
1465 /* Do Stevens' IPV6_PKTOPTIONS.
1466
1467 Yes, guys, it is the only place in our code, where we
1468 may make it not affecting IPv4.
1469 The rest of code is protocol independent,
1470 and I do not like idea to uglify IPv4.
1471
1472 Actually, all the idea behind IPV6_PKTOPTIONS
1473 looks not very well thought. For now we latch
1474 options, received in the last packet, enqueued
1475 by tcp. Feel free to propose better solution.
1476 --ANK (980728)
1477 */
1478 if (sk->net_pinfo.af_inet6.rxopt.all)
1479 opt_skb = skb_clone(skb, GFP_ATOMIC);
1480
1481 if (sk->state == TCP_ESTABLISHED) { /* Fast path */
1482 TCP_CHECK_TIMER(sk);
1483 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1484 goto reset;
1485 TCP_CHECK_TIMER(sk);
1486 if (opt_skb)
1487 goto ipv6_pktoptions;
1488 return 0;
1489 }
1490
1491 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1492 goto csum_err;
1493
1494 if (sk->state == TCP_LISTEN) {
1495 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1496 if (!nsk)
1497 goto discard;
1498
1499 /*
1500 * Queue it on the new socket if the new socket is active,
1501 * otherwise we just shortcircuit this and continue with
1502 * the new socket..
1503 */
1504 if(nsk != sk) {
1505 if (tcp_child_process(sk, nsk, skb))
1506 goto reset;
1507 if (opt_skb)
1508 __kfree_skb(opt_skb);
1509 return 0;
1510 }
1511 }
1512
1513 TCP_CHECK_TIMER(sk);
1514 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1515 goto reset;
1516 TCP_CHECK_TIMER(sk);
1517 if (opt_skb)
1518 goto ipv6_pktoptions;
1519 return 0;
1520
1521 reset:
1522 tcp_v6_send_reset(skb);
1523 discard:
1524 if (opt_skb)
1525 __kfree_skb(opt_skb);
1526 kfree_skb(skb);
1527 return 0;
1528 csum_err:
1529 TCP_INC_STATS_BH(TcpInErrs);
1530 goto discard;
1531
1532
1533 ipv6_pktoptions:
1534 /* Do you ask, what is it?
1535
1536 1. skb was enqueued by tcp.
1537 2. skb is added to tail of read queue, rather than out of order.
1538 3. socket is not in passive state.
1539 4. Finally, it really contains options, which user wants to receive.
1540 */
1541 if (TCP_SKB_CB(opt_skb)->end_seq == sk->tp_pinfo.af_tcp.rcv_nxt &&
1542 !((1<<sk->state)&(TCPF_CLOSE|TCPF_LISTEN))) {
1543 if (sk->net_pinfo.af_inet6.rxopt.bits.rxinfo)
1544 sk->net_pinfo.af_inet6.mcast_oif = tcp_v6_iif(opt_skb);
1545 if (sk->net_pinfo.af_inet6.rxopt.bits.rxhlim)
1546 sk->net_pinfo.af_inet6.mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1547 if (ipv6_opt_accepted(sk, opt_skb)) {
1548 skb_set_owner_r(opt_skb, sk);
1549 opt_skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, opt_skb);
1550 } else {
1551 __kfree_skb(opt_skb);
1552 opt_skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, NULL);
1553 }
1554 }
1555
1556 if (opt_skb)
1557 kfree_skb(opt_skb);
1558 return 0;
1559 }
1560
tcp_v6_rcv(struct sk_buff * skb)1561 int tcp_v6_rcv(struct sk_buff *skb)
1562 {
1563 struct tcphdr *th;
1564 struct sock *sk;
1565 int ret;
1566
1567 if (skb->pkt_type != PACKET_HOST)
1568 goto discard_it;
1569
1570 /*
1571 * Count it even if it's bad.
1572 */
1573 TCP_INC_STATS_BH(TcpInSegs);
1574
1575 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1576 goto discard_it;
1577
1578 th = skb->h.th;
1579
1580 if (th->doff < sizeof(struct tcphdr)/4)
1581 goto bad_packet;
1582 if (!pskb_may_pull(skb, th->doff*4))
1583 goto discard_it;
1584
1585 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1586 tcp_v6_checksum_init(skb) < 0))
1587 goto bad_packet;
1588
1589 th = skb->h.th;
1590 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1591 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1592 skb->len - th->doff*4);
1593 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1594 TCP_SKB_CB(skb)->when = 0;
1595 TCP_SKB_CB(skb)->flags = ip6_get_dsfield(skb->nh.ipv6h);
1596 TCP_SKB_CB(skb)->sacked = 0;
1597
1598 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1599 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1600
1601 if (!sk)
1602 goto no_tcp_socket;
1603
1604 process:
1605 if(!ipsec_sk_policy(sk,skb))
1606 goto discard_and_relse;
1607 if(sk->state == TCP_TIME_WAIT)
1608 goto do_time_wait;
1609
1610 if (sk_filter(sk, skb, 0))
1611 goto discard_and_relse;
1612
1613 skb->dev = NULL;
1614
1615 bh_lock_sock(sk);
1616 ret = 0;
1617 if (!sk->lock.users) {
1618 if (!tcp_prequeue(sk, skb))
1619 ret = tcp_v6_do_rcv(sk, skb);
1620 } else
1621 sk_add_backlog(sk, skb);
1622 bh_unlock_sock(sk);
1623
1624 sock_put(sk);
1625 return ret;
1626
1627 no_tcp_socket:
1628 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1629 bad_packet:
1630 TCP_INC_STATS_BH(TcpInErrs);
1631 } else {
1632 tcp_v6_send_reset(skb);
1633 }
1634
1635 discard_it:
1636
1637 /*
1638 * Discard frame
1639 */
1640
1641 kfree_skb(skb);
1642 return 0;
1643
1644 discard_and_relse:
1645 sock_put(sk);
1646 goto discard_it;
1647
1648 do_time_wait:
1649 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1650 TCP_INC_STATS_BH(TcpInErrs);
1651 tcp_tw_put((struct tcp_tw_bucket *) sk);
1652 goto discard_it;
1653 }
1654
1655 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1656 skb, th, skb->len)) {
1657 case TCP_TW_SYN:
1658 {
1659 struct sock *sk2;
1660
1661 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1662 if (sk2 != NULL) {
1663 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1664 tcp_timewait_kill((struct tcp_tw_bucket *)sk);
1665 tcp_tw_put((struct tcp_tw_bucket *)sk);
1666 sk = sk2;
1667 goto process;
1668 }
1669 /* Fall through to ACK */
1670 }
1671 case TCP_TW_ACK:
1672 tcp_v6_timewait_ack(sk, skb);
1673 break;
1674 case TCP_TW_RST:
1675 goto no_tcp_socket;
1676 case TCP_TW_SUCCESS:;
1677 }
1678 goto discard_it;
1679 }
1680
tcp_v6_rebuild_header(struct sock * sk)1681 static int tcp_v6_rebuild_header(struct sock *sk)
1682 {
1683 int err;
1684 struct dst_entry *dst;
1685 struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
1686
1687 dst = __sk_dst_check(sk, np->dst_cookie);
1688
1689 if (dst == NULL) {
1690 struct flowi fl;
1691
1692 fl.proto = IPPROTO_TCP;
1693 fl.nl_u.ip6_u.daddr = &np->daddr;
1694 fl.nl_u.ip6_u.saddr = &np->saddr;
1695 fl.fl6_flowlabel = np->flow_label;
1696 fl.oif = sk->bound_dev_if;
1697 fl.uli_u.ports.dport = sk->dport;
1698 fl.uli_u.ports.sport = sk->sport;
1699
1700 if (np->opt && np->opt->srcrt) {
1701 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1702 fl.nl_u.ip6_u.daddr = rt0->addr;
1703 }
1704
1705 dst = ip6_route_output(sk, &fl);
1706
1707 if (dst->error) {
1708 err = dst->error;
1709 dst_release(dst);
1710 sk->route_caps = 0;
1711 return err;
1712 }
1713
1714 ip6_dst_store(sk, dst, NULL);
1715 sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM;
1716 }
1717
1718 return 0;
1719 }
1720
tcp_v6_xmit(struct sk_buff * skb,int ipfragok)1721 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1722 {
1723 struct sock *sk = skb->sk;
1724 struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6;
1725 struct flowi fl;
1726 struct dst_entry *dst;
1727
1728 fl.proto = IPPROTO_TCP;
1729 fl.fl6_dst = &np->daddr;
1730 fl.fl6_src = &np->saddr;
1731 fl.fl6_flowlabel = np->flow_label;
1732 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1733 fl.oif = sk->bound_dev_if;
1734 fl.uli_u.ports.sport = sk->sport;
1735 fl.uli_u.ports.dport = sk->dport;
1736
1737 if (np->opt && np->opt->srcrt) {
1738 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1739 fl.nl_u.ip6_u.daddr = rt0->addr;
1740 }
1741
1742 dst = __sk_dst_check(sk, np->dst_cookie);
1743
1744 if (dst == NULL) {
1745 dst = ip6_route_output(sk, &fl);
1746
1747 if (dst->error) {
1748 sk->err_soft = -dst->error;
1749 dst_release(dst);
1750 return -sk->err_soft;
1751 }
1752
1753 ip6_dst_store(sk, dst, NULL);
1754 }
1755
1756 skb->dst = dst_clone(dst);
1757
1758 /* Restore final destination back after routing done */
1759 fl.nl_u.ip6_u.daddr = &np->daddr;
1760
1761 return ip6_xmit(sk, skb, &fl, np->opt);
1762 }
1763
v6_addr2sockaddr(struct sock * sk,struct sockaddr * uaddr)1764 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1765 {
1766 struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6;
1767 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1768
1769 sin6->sin6_family = AF_INET6;
1770 memcpy(&sin6->sin6_addr, &np->daddr, sizeof(struct in6_addr));
1771 sin6->sin6_port = sk->dport;
1772 /* We do not store received flowlabel for TCP */
1773 sin6->sin6_flowinfo = 0;
1774 sin6->sin6_scope_id = 0;
1775 if (sk->bound_dev_if && ipv6_addr_type(&sin6->sin6_addr)&IPV6_ADDR_LINKLOCAL)
1776 sin6->sin6_scope_id = sk->bound_dev_if;
1777 }
1778
tcp_v6_remember_stamp(struct sock * sk)1779 static int tcp_v6_remember_stamp(struct sock *sk)
1780 {
1781 /* Alas, not yet... */
1782 return 0;
1783 }
1784
1785 static struct tcp_func ipv6_specific = {
1786 tcp_v6_xmit,
1787 tcp_v6_send_check,
1788 tcp_v6_rebuild_header,
1789 tcp_v6_conn_request,
1790 tcp_v6_syn_recv_sock,
1791 tcp_v6_remember_stamp,
1792 sizeof(struct ipv6hdr),
1793
1794 ipv6_setsockopt,
1795 ipv6_getsockopt,
1796 v6_addr2sockaddr,
1797 sizeof(struct sockaddr_in6)
1798 };
1799
1800 /*
1801 * TCP over IPv4 via INET6 API
1802 */
1803
1804 static struct tcp_func ipv6_mapped = {
1805 ip_queue_xmit,
1806 tcp_v4_send_check,
1807 tcp_v4_rebuild_header,
1808 tcp_v6_conn_request,
1809 tcp_v6_syn_recv_sock,
1810 tcp_v4_remember_stamp,
1811 sizeof(struct iphdr),
1812
1813 ipv6_setsockopt,
1814 ipv6_getsockopt,
1815 v6_addr2sockaddr,
1816 sizeof(struct sockaddr_in6)
1817 };
1818
1819
1820
1821 /* NOTE: A lot of things set to zero explicitly by call to
1822 * sk_alloc() so need not be done here.
1823 */
tcp_v6_init_sock(struct sock * sk)1824 static int tcp_v6_init_sock(struct sock *sk)
1825 {
1826 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1827
1828 skb_queue_head_init(&tp->out_of_order_queue);
1829 tcp_init_xmit_timers(sk);
1830 tcp_prequeue_init(tp);
1831
1832 tp->rto = TCP_TIMEOUT_INIT;
1833 tp->mdev = TCP_TIMEOUT_INIT;
1834
1835 /* So many TCP implementations out there (incorrectly) count the
1836 * initial SYN frame in their delayed-ACK and congestion control
1837 * algorithms that we must have the following bandaid to talk
1838 * efficiently to them. -DaveM
1839 */
1840 tp->snd_cwnd = 2;
1841
1842 /* See draft-stevens-tcpca-spec-01 for discussion of the
1843 * initialization of these values.
1844 */
1845 tp->snd_ssthresh = 0x7fffffff;
1846 tp->snd_cwnd_clamp = ~0;
1847 tp->mss_cache = 536;
1848
1849 tp->reordering = sysctl_tcp_reordering;
1850
1851 sk->state = TCP_CLOSE;
1852
1853 sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific;
1854
1855 sk->write_space = tcp_write_space;
1856 sk->use_write_queue = 1;
1857
1858 sk->sndbuf = sysctl_tcp_wmem[1];
1859 sk->rcvbuf = sysctl_tcp_rmem[1];
1860
1861 atomic_inc(&tcp_sockets_allocated);
1862
1863 return 0;
1864 }
1865
tcp_v6_destroy_sock(struct sock * sk)1866 static int tcp_v6_destroy_sock(struct sock *sk)
1867 {
1868 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
1869
1870 tcp_clear_xmit_timers(sk);
1871
1872 /* Cleanup up the write buffer. */
1873 tcp_writequeue_purge(sk);
1874
1875 /* Cleans up our, hopefully empty, out_of_order_queue. */
1876 __skb_queue_purge(&tp->out_of_order_queue);
1877
1878 /* Clean prequeue, it must be empty really */
1879 __skb_queue_purge(&tp->ucopy.prequeue);
1880
1881 /* Clean up a referenced TCP bind bucket. */
1882 if(sk->prev != NULL)
1883 tcp_put_port(sk);
1884
1885 /* If sendmsg cached page exists, toss it. */
1886 if (tp->sndmsg_page != NULL)
1887 __free_page(tp->sndmsg_page);
1888
1889 atomic_dec(&tcp_sockets_allocated);
1890
1891 return inet6_destroy_sock(sk);
1892 }
1893
1894 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct sock * sk,struct open_request * req,char * tmpbuf,int i,int uid)1895 static void get_openreq6(struct sock *sk, struct open_request *req, char *tmpbuf, int i, int uid)
1896 {
1897 struct in6_addr *dest, *src;
1898 int ttd = req->expires - jiffies;
1899
1900 if (ttd < 0)
1901 ttd = 0;
1902
1903 src = &req->af.v6_req.loc_addr;
1904 dest = &req->af.v6_req.rmt_addr;
1905 sprintf(tmpbuf,
1906 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1907 "%02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p",
1908 i,
1909 src->s6_addr32[0], src->s6_addr32[1],
1910 src->s6_addr32[2], src->s6_addr32[3],
1911 ntohs(sk->sport),
1912 dest->s6_addr32[0], dest->s6_addr32[1],
1913 dest->s6_addr32[2], dest->s6_addr32[3],
1914 ntohs(req->rmt_port),
1915 TCP_SYN_RECV,
1916 0,0, /* could print option size, but that is af dependent. */
1917 1, /* timers active (only the expire timer) */
1918 ttd,
1919 req->retrans,
1920 uid,
1921 0, /* non standard timer */
1922 0, /* open_requests have no inode */
1923 0, req);
1924 }
1925
get_tcp6_sock(struct sock * sp,char * tmpbuf,int i)1926 static void get_tcp6_sock(struct sock *sp, char *tmpbuf, int i)
1927 {
1928 struct in6_addr *dest, *src;
1929 __u16 destp, srcp;
1930 int timer_active;
1931 unsigned long timer_expires;
1932 struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
1933
1934 dest = &sp->net_pinfo.af_inet6.daddr;
1935 src = &sp->net_pinfo.af_inet6.rcv_saddr;
1936 destp = ntohs(sp->dport);
1937 srcp = ntohs(sp->sport);
1938 if (tp->pending == TCP_TIME_RETRANS) {
1939 timer_active = 1;
1940 timer_expires = tp->timeout;
1941 } else if (tp->pending == TCP_TIME_PROBE0) {
1942 timer_active = 4;
1943 timer_expires = tp->timeout;
1944 } else if (timer_pending(&sp->timer)) {
1945 timer_active = 2;
1946 timer_expires = sp->timer.expires;
1947 } else {
1948 timer_active = 0;
1949 timer_expires = jiffies;
1950 }
1951
1952 sprintf(tmpbuf,
1953 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1954 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d",
1955 i,
1956 src->s6_addr32[0], src->s6_addr32[1],
1957 src->s6_addr32[2], src->s6_addr32[3], srcp,
1958 dest->s6_addr32[0], dest->s6_addr32[1],
1959 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1960 sp->state,
1961 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1962 timer_active, timer_expires-jiffies,
1963 tp->retransmits,
1964 sock_i_uid(sp),
1965 tp->probes_out,
1966 sock_i_ino(sp),
1967 atomic_read(&sp->refcnt), sp,
1968 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
1969 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1970 );
1971 }
1972
get_timewait6_sock(struct tcp_tw_bucket * tw,char * tmpbuf,int i)1973 static void get_timewait6_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
1974 {
1975 struct in6_addr *dest, *src;
1976 __u16 destp, srcp;
1977 int ttd = tw->ttd - jiffies;
1978
1979 if (ttd < 0)
1980 ttd = 0;
1981
1982 dest = &tw->v6_daddr;
1983 src = &tw->v6_rcv_saddr;
1984 destp = ntohs(tw->dport);
1985 srcp = ntohs(tw->sport);
1986
1987 sprintf(tmpbuf,
1988 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1989 "%02X %08X:%08X %02X:%08X %08X %5d %8d %d %d %p",
1990 i,
1991 src->s6_addr32[0], src->s6_addr32[1],
1992 src->s6_addr32[2], src->s6_addr32[3], srcp,
1993 dest->s6_addr32[0], dest->s6_addr32[1],
1994 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1995 tw->substate, 0, 0,
1996 3, ttd, 0, 0, 0, 0,
1997 atomic_read(&tw->refcnt), tw);
1998 }
1999
2000 #define LINE_LEN 190
2001 #define LINE_FMT "%-190s\n"
2002
tcp6_get_info(char * buffer,char ** start,off_t offset,int length)2003 int tcp6_get_info(char *buffer, char **start, off_t offset, int length)
2004 {
2005 int len = 0, num = 0, i;
2006 off_t begin, pos = 0;
2007 char tmpbuf[LINE_LEN+2];
2008
2009 if (offset < LINE_LEN+1)
2010 len += sprintf(buffer, LINE_FMT,
2011 " sl " /* 6 */
2012 "local_address " /* 38 */
2013 "remote_address " /* 38 */
2014 "st tx_queue rx_queue tr tm->when retrnsmt" /* 41 */
2015 " uid timeout inode"); /* 21 */
2016 /*----*/
2017 /*144 */
2018
2019 pos = LINE_LEN+1;
2020
2021 /* First, walk listening socket table. */
2022 tcp_listen_lock();
2023 for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
2024 struct sock *sk = tcp_listening_hash[i];
2025 struct tcp_listen_opt *lopt;
2026 int k;
2027
2028 for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) {
2029 struct open_request *req;
2030 int uid;
2031 struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
2032
2033 if (sk->family != PF_INET6)
2034 continue;
2035 pos += LINE_LEN+1;
2036 if (pos >= offset) {
2037 get_tcp6_sock(sk, tmpbuf, num);
2038 len += sprintf(buffer+len, LINE_FMT, tmpbuf);
2039 if (pos >= offset + length) {
2040 tcp_listen_unlock();
2041 goto out_no_bh;
2042 }
2043 }
2044
2045 uid = sock_i_uid(sk);
2046 read_lock_bh(&tp->syn_wait_lock);
2047 lopt = tp->listen_opt;
2048 if (lopt && lopt->qlen != 0) {
2049 for (k=0; k<TCP_SYNQ_HSIZE; k++) {
2050 for (req = lopt->syn_table[k]; req; req = req->dl_next, num++) {
2051 if (req->class->family != PF_INET6)
2052 continue;
2053 pos += LINE_LEN+1;
2054 if (pos <= offset)
2055 continue;
2056 get_openreq6(sk, req, tmpbuf, num, uid);
2057 len += sprintf(buffer+len, LINE_FMT, tmpbuf);
2058 if (pos >= offset + length) {
2059 read_unlock_bh(&tp->syn_wait_lock);
2060 tcp_listen_unlock();
2061 goto out_no_bh;
2062 }
2063 }
2064 }
2065 }
2066 read_unlock_bh(&tp->syn_wait_lock);
2067
2068 /* Completed requests are in normal socket hash table */
2069 }
2070 }
2071 tcp_listen_unlock();
2072
2073 local_bh_disable();
2074
2075 /* Next, walk established hash chain. */
2076 for (i = 0; i < tcp_ehash_size; i++) {
2077 struct tcp_ehash_bucket *head = &tcp_ehash[i];
2078 struct sock *sk;
2079 struct tcp_tw_bucket *tw;
2080
2081 read_lock(&head->lock);
2082 for(sk = head->chain; sk; sk = sk->next, num++) {
2083 if (sk->family != PF_INET6)
2084 continue;
2085 pos += LINE_LEN+1;
2086 if (pos <= offset)
2087 continue;
2088 get_tcp6_sock(sk, tmpbuf, num);
2089 len += sprintf(buffer+len, LINE_FMT, tmpbuf);
2090 if (pos >= offset + length) {
2091 read_unlock(&head->lock);
2092 goto out;
2093 }
2094 }
2095 for (tw = (struct tcp_tw_bucket *)tcp_ehash[i+tcp_ehash_size].chain;
2096 tw != NULL;
2097 tw = (struct tcp_tw_bucket *)tw->next, num++) {
2098 if (tw->family != PF_INET6)
2099 continue;
2100 pos += LINE_LEN+1;
2101 if (pos <= offset)
2102 continue;
2103 get_timewait6_sock(tw, tmpbuf, num);
2104 len += sprintf(buffer+len, LINE_FMT, tmpbuf);
2105 if (pos >= offset + length) {
2106 read_unlock(&head->lock);
2107 goto out;
2108 }
2109 }
2110 read_unlock(&head->lock);
2111 }
2112
2113 out:
2114 local_bh_enable();
2115 out_no_bh:
2116
2117 begin = len - (pos - offset);
2118 *start = buffer + begin;
2119 len -= begin;
2120 if (len > length)
2121 len = length;
2122 if (len < 0)
2123 len = 0;
2124 return len;
2125 }
2126
2127 struct proto tcpv6_prot = {
2128 name: "TCPv6",
2129 close: tcp_close,
2130 connect: tcp_v6_connect,
2131 disconnect: tcp_disconnect,
2132 accept: tcp_accept,
2133 ioctl: tcp_ioctl,
2134 init: tcp_v6_init_sock,
2135 destroy: tcp_v6_destroy_sock,
2136 shutdown: tcp_shutdown,
2137 setsockopt: tcp_setsockopt,
2138 getsockopt: tcp_getsockopt,
2139 sendmsg: tcp_sendmsg,
2140 recvmsg: tcp_recvmsg,
2141 backlog_rcv: tcp_v6_do_rcv,
2142 hash: tcp_v6_hash,
2143 unhash: tcp_unhash,
2144 get_port: tcp_v6_get_port,
2145 };
2146
2147 static struct inet6_protocol tcpv6_protocol =
2148 {
2149 tcp_v6_rcv, /* TCP handler */
2150 tcp_v6_err, /* TCP error control */
2151 NULL, /* next */
2152 IPPROTO_TCP, /* protocol ID */
2153 0, /* copy */
2154 NULL, /* data */
2155 "TCPv6" /* name */
2156 };
2157
2158 extern struct proto_ops inet6_stream_ops;
2159
2160 static struct inet_protosw tcpv6_protosw = {
2161 type: SOCK_STREAM,
2162 protocol: IPPROTO_TCP,
2163 prot: &tcpv6_prot,
2164 ops: &inet6_stream_ops,
2165 capability: -1,
2166 no_check: 0,
2167 flags: INET_PROTOSW_PERMANENT,
2168 };
2169
tcpv6_init(void)2170 void __init tcpv6_init(void)
2171 {
2172 /* register inet6 protocol */
2173 inet6_add_protocol(&tcpv6_protocol);
2174 inet6_register_protosw(&tcpv6_protosw);
2175 }
2176