1 /*
2  *	IPv6 output functions
3  *	Linux INET6 implementation
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	Based on linux/net/ipv4/ip_output.c
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  *
15  *	Changes:
16  *	A.N.Kuznetsov	:	airthmetics in fragmentation.
17  *				extension headers are implemented.
18  *				route changes now work.
19  *				ip6_forward does not confuse sniffers.
20  *				etc.
21  *
22  *      H. von Brand    :       Added missing #include <linux/string.h>
23  *	Imran Patel	: 	frag id should be in NBO
24  *      Kazunori MIYAZAWA @USAGI
25  *			:       add ip6_append_data and related functions
26  *				for datagram xmit
27  */
28 
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
41 
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
44 
45 #include <net/sock.h>
46 #include <net/snmp.h>
47 
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58 
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 
__ip6_local_out(struct sk_buff * skb)61 int __ip6_local_out(struct sk_buff *skb)
62 {
63 	int len;
64 
65 	len = skb->len - sizeof(struct ipv6hdr);
66 	if (len > IPV6_MAXPLEN)
67 		len = 0;
68 	ipv6_hdr(skb)->payload_len = htons(len);
69 
70 	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 		       skb_dst(skb)->dev, dst_output);
72 }
73 
ip6_local_out(struct sk_buff * skb)74 int ip6_local_out(struct sk_buff *skb)
75 {
76 	int err;
77 
78 	err = __ip6_local_out(skb);
79 	if (likely(err == 1))
80 		err = dst_output(skb);
81 
82 	return err;
83 }
84 EXPORT_SYMBOL_GPL(ip6_local_out);
85 
86 /* dev_loopback_xmit for use with netfilter. */
ip6_dev_loopback_xmit(struct sk_buff * newskb)87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
88 {
89 	skb_reset_mac_header(newskb);
90 	__skb_pull(newskb, skb_network_offset(newskb));
91 	newskb->pkt_type = PACKET_LOOPBACK;
92 	newskb->ip_summed = CHECKSUM_UNNECESSARY;
93 	WARN_ON(!skb_dst(newskb));
94 
95 	netif_rx_ni(newskb);
96 	return 0;
97 }
98 
ip6_finish_output2(struct sk_buff * skb)99 static int ip6_finish_output2(struct sk_buff *skb)
100 {
101 	struct dst_entry *dst = skb_dst(skb);
102 	struct net_device *dev = dst->dev;
103 
104 	skb->protocol = htons(ETH_P_IPV6);
105 	skb->dev = dev;
106 
107 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
108 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
109 
110 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
111 		    ((mroute6_socket(dev_net(dev), skb) &&
112 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
113 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
114 					 &ipv6_hdr(skb)->saddr))) {
115 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
116 
117 			/* Do not check for IFF_ALLMULTI; multicast routing
118 			   is not supported in any case.
119 			 */
120 			if (newskb)
121 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
122 					newskb, NULL, newskb->dev,
123 					ip6_dev_loopback_xmit);
124 
125 			if (ipv6_hdr(skb)->hop_limit == 0) {
126 				IP6_INC_STATS(dev_net(dev), idev,
127 					      IPSTATS_MIB_OUTDISCARDS);
128 				kfree_skb(skb);
129 				return 0;
130 			}
131 		}
132 
133 		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
134 				skb->len);
135 	}
136 
137 	if (dst->hh)
138 		return neigh_hh_output(dst->hh, skb);
139 	else if (dst->neighbour)
140 		return dst->neighbour->output(skb);
141 
142 	IP6_INC_STATS_BH(dev_net(dst->dev),
143 			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
144 	kfree_skb(skb);
145 	return -EINVAL;
146 }
147 
ip6_finish_output(struct sk_buff * skb)148 static int ip6_finish_output(struct sk_buff *skb)
149 {
150 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151 	    dst_allfrag(skb_dst(skb)))
152 		return ip6_fragment(skb, ip6_finish_output2);
153 	else
154 		return ip6_finish_output2(skb);
155 }
156 
ip6_output(struct sk_buff * skb)157 int ip6_output(struct sk_buff *skb)
158 {
159 	struct net_device *dev = skb_dst(skb)->dev;
160 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
161 	if (unlikely(idev->cnf.disable_ipv6)) {
162 		IP6_INC_STATS(dev_net(dev), idev,
163 			      IPSTATS_MIB_OUTDISCARDS);
164 		kfree_skb(skb);
165 		return 0;
166 	}
167 
168 	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
169 			    ip6_finish_output,
170 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
171 }
172 
173 /*
174  *	xmit an sk_buff (used by TCP, SCTP and DCCP)
175  */
176 
ip6_xmit(struct sock * sk,struct sk_buff * skb,struct flowi6 * fl6,struct ipv6_txoptions * opt)177 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
178 	     struct ipv6_txoptions *opt)
179 {
180 	struct net *net = sock_net(sk);
181 	struct ipv6_pinfo *np = inet6_sk(sk);
182 	struct in6_addr *first_hop = &fl6->daddr;
183 	struct dst_entry *dst = skb_dst(skb);
184 	struct ipv6hdr *hdr;
185 	u8  proto = fl6->flowi6_proto;
186 	int seg_len = skb->len;
187 	int hlimit = -1;
188 	int tclass = 0;
189 	u32 mtu;
190 
191 	if (opt) {
192 		unsigned int head_room;
193 
194 		/* First: exthdrs may take lots of space (~8K for now)
195 		   MAX_HEADER is not enough.
196 		 */
197 		head_room = opt->opt_nflen + opt->opt_flen;
198 		seg_len += head_room;
199 		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
200 
201 		if (skb_headroom(skb) < head_room) {
202 			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
203 			if (skb2 == NULL) {
204 				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
205 					      IPSTATS_MIB_OUTDISCARDS);
206 				kfree_skb(skb);
207 				return -ENOBUFS;
208 			}
209 			kfree_skb(skb);
210 			skb = skb2;
211 			skb_set_owner_w(skb, sk);
212 		}
213 		if (opt->opt_flen)
214 			ipv6_push_frag_opts(skb, opt, &proto);
215 		if (opt->opt_nflen)
216 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
217 	}
218 
219 	skb_push(skb, sizeof(struct ipv6hdr));
220 	skb_reset_network_header(skb);
221 	hdr = ipv6_hdr(skb);
222 
223 	/*
224 	 *	Fill in the IPv6 header
225 	 */
226 	if (np) {
227 		tclass = np->tclass;
228 		hlimit = np->hop_limit;
229 	}
230 	if (hlimit < 0)
231 		hlimit = ip6_dst_hoplimit(dst);
232 
233 	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
234 
235 	hdr->payload_len = htons(seg_len);
236 	hdr->nexthdr = proto;
237 	hdr->hop_limit = hlimit;
238 
239 	ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
240 	ipv6_addr_copy(&hdr->daddr, first_hop);
241 
242 	skb->priority = sk->sk_priority;
243 	skb->mark = sk->sk_mark;
244 
245 	mtu = dst_mtu(dst);
246 	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
247 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
248 			      IPSTATS_MIB_OUT, skb->len);
249 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
250 			       dst->dev, dst_output);
251 	}
252 
253 	if (net_ratelimit())
254 		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
255 	skb->dev = dst->dev;
256 	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
257 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
258 	kfree_skb(skb);
259 	return -EMSGSIZE;
260 }
261 
262 EXPORT_SYMBOL(ip6_xmit);
263 
264 /*
265  *	To avoid extra problems ND packets are send through this
266  *	routine. It's code duplication but I really want to avoid
267  *	extra checks since ipv6_build_header is used by TCP (which
268  *	is for us performance critical)
269  */
270 
ip6_nd_hdr(struct sock * sk,struct sk_buff * skb,struct net_device * dev,const struct in6_addr * saddr,const struct in6_addr * daddr,int proto,int len)271 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
272 	       const struct in6_addr *saddr, const struct in6_addr *daddr,
273 	       int proto, int len)
274 {
275 	struct ipv6_pinfo *np = inet6_sk(sk);
276 	struct ipv6hdr *hdr;
277 
278 	skb->protocol = htons(ETH_P_IPV6);
279 	skb->dev = dev;
280 
281 	skb_reset_network_header(skb);
282 	skb_put(skb, sizeof(struct ipv6hdr));
283 	hdr = ipv6_hdr(skb);
284 
285 	*(__be32*)hdr = htonl(0x60000000);
286 
287 	hdr->payload_len = htons(len);
288 	hdr->nexthdr = proto;
289 	hdr->hop_limit = np->hop_limit;
290 
291 	ipv6_addr_copy(&hdr->saddr, saddr);
292 	ipv6_addr_copy(&hdr->daddr, daddr);
293 
294 	return 0;
295 }
296 
ip6_call_ra_chain(struct sk_buff * skb,int sel)297 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
298 {
299 	struct ip6_ra_chain *ra;
300 	struct sock *last = NULL;
301 
302 	read_lock(&ip6_ra_lock);
303 	for (ra = ip6_ra_chain; ra; ra = ra->next) {
304 		struct sock *sk = ra->sk;
305 		if (sk && ra->sel == sel &&
306 		    (!sk->sk_bound_dev_if ||
307 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
308 			if (last) {
309 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
310 				if (skb2)
311 					rawv6_rcv(last, skb2);
312 			}
313 			last = sk;
314 		}
315 	}
316 
317 	if (last) {
318 		rawv6_rcv(last, skb);
319 		read_unlock(&ip6_ra_lock);
320 		return 1;
321 	}
322 	read_unlock(&ip6_ra_lock);
323 	return 0;
324 }
325 
ip6_forward_proxy_check(struct sk_buff * skb)326 static int ip6_forward_proxy_check(struct sk_buff *skb)
327 {
328 	struct ipv6hdr *hdr = ipv6_hdr(skb);
329 	u8 nexthdr = hdr->nexthdr;
330 	int offset;
331 
332 	if (ipv6_ext_hdr(nexthdr)) {
333 		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
334 		if (offset < 0)
335 			return 0;
336 	} else
337 		offset = sizeof(struct ipv6hdr);
338 
339 	if (nexthdr == IPPROTO_ICMPV6) {
340 		struct icmp6hdr *icmp6;
341 
342 		if (!pskb_may_pull(skb, (skb_network_header(skb) +
343 					 offset + 1 - skb->data)))
344 			return 0;
345 
346 		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
347 
348 		switch (icmp6->icmp6_type) {
349 		case NDISC_ROUTER_SOLICITATION:
350 		case NDISC_ROUTER_ADVERTISEMENT:
351 		case NDISC_NEIGHBOUR_SOLICITATION:
352 		case NDISC_NEIGHBOUR_ADVERTISEMENT:
353 		case NDISC_REDIRECT:
354 			/* For reaction involving unicast neighbor discovery
355 			 * message destined to the proxied address, pass it to
356 			 * input function.
357 			 */
358 			return 1;
359 		default:
360 			break;
361 		}
362 	}
363 
364 	/*
365 	 * The proxying router can't forward traffic sent to a link-local
366 	 * address, so signal the sender and discard the packet. This
367 	 * behavior is clarified by the MIPv6 specification.
368 	 */
369 	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
370 		dst_link_failure(skb);
371 		return -1;
372 	}
373 
374 	return 0;
375 }
376 
ip6_forward_finish(struct sk_buff * skb)377 static inline int ip6_forward_finish(struct sk_buff *skb)
378 {
379 	return dst_output(skb);
380 }
381 
ip6_forward(struct sk_buff * skb)382 int ip6_forward(struct sk_buff *skb)
383 {
384 	struct dst_entry *dst = skb_dst(skb);
385 	struct ipv6hdr *hdr = ipv6_hdr(skb);
386 	struct inet6_skb_parm *opt = IP6CB(skb);
387 	struct net *net = dev_net(dst->dev);
388 	u32 mtu;
389 
390 	if (net->ipv6.devconf_all->forwarding == 0)
391 		goto error;
392 
393 	if (skb_warn_if_lro(skb))
394 		goto drop;
395 
396 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
397 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
398 		goto drop;
399 	}
400 
401 	if (skb->pkt_type != PACKET_HOST)
402 		goto drop;
403 
404 	skb_forward_csum(skb);
405 
406 	/*
407 	 *	We DO NOT make any processing on
408 	 *	RA packets, pushing them to user level AS IS
409 	 *	without ane WARRANTY that application will be able
410 	 *	to interpret them. The reason is that we
411 	 *	cannot make anything clever here.
412 	 *
413 	 *	We are not end-node, so that if packet contains
414 	 *	AH/ESP, we cannot make anything.
415 	 *	Defragmentation also would be mistake, RA packets
416 	 *	cannot be fragmented, because there is no warranty
417 	 *	that different fragments will go along one path. --ANK
418 	 */
419 	if (opt->ra) {
420 		u8 *ptr = skb_network_header(skb) + opt->ra;
421 		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
422 			return 0;
423 	}
424 
425 	/*
426 	 *	check and decrement ttl
427 	 */
428 	if (hdr->hop_limit <= 1) {
429 		/* Force OUTPUT device used as source address */
430 		skb->dev = dst->dev;
431 		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
432 		IP6_INC_STATS_BH(net,
433 				 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
434 
435 		kfree_skb(skb);
436 		return -ETIMEDOUT;
437 	}
438 
439 	/* XXX: idev->cnf.proxy_ndp? */
440 	if (net->ipv6.devconf_all->proxy_ndp &&
441 	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
442 		int proxied = ip6_forward_proxy_check(skb);
443 		if (proxied > 0)
444 			return ip6_input(skb);
445 		else if (proxied < 0) {
446 			IP6_INC_STATS(net, ip6_dst_idev(dst),
447 				      IPSTATS_MIB_INDISCARDS);
448 			goto drop;
449 		}
450 	}
451 
452 	if (!xfrm6_route_forward(skb)) {
453 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
454 		goto drop;
455 	}
456 	dst = skb_dst(skb);
457 
458 	/* IPv6 specs say nothing about it, but it is clear that we cannot
459 	   send redirects to source routed frames.
460 	   We don't send redirects to frames decapsulated from IPsec.
461 	 */
462 	if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
463 	    !skb_sec_path(skb)) {
464 		struct in6_addr *target = NULL;
465 		struct rt6_info *rt;
466 		struct neighbour *n = dst->neighbour;
467 
468 		/*
469 		 *	incoming and outgoing devices are the same
470 		 *	send a redirect.
471 		 */
472 
473 		rt = (struct rt6_info *) dst;
474 		if ((rt->rt6i_flags & RTF_GATEWAY))
475 			target = (struct in6_addr*)&n->primary_key;
476 		else
477 			target = &hdr->daddr;
478 
479 		if (!rt->rt6i_peer)
480 			rt6_bind_peer(rt, 1);
481 
482 		/* Limit redirects both by destination (here)
483 		   and by source (inside ndisc_send_redirect)
484 		 */
485 		if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
486 			ndisc_send_redirect(skb, n, target);
487 	} else {
488 		int addrtype = ipv6_addr_type(&hdr->saddr);
489 
490 		/* This check is security critical. */
491 		if (addrtype == IPV6_ADDR_ANY ||
492 		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
493 			goto error;
494 		if (addrtype & IPV6_ADDR_LINKLOCAL) {
495 			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
496 				    ICMPV6_NOT_NEIGHBOUR, 0);
497 			goto error;
498 		}
499 	}
500 
501 	mtu = dst_mtu(dst);
502 	if (mtu < IPV6_MIN_MTU)
503 		mtu = IPV6_MIN_MTU;
504 
505 	if (skb->len > mtu && !skb_is_gso(skb)) {
506 		/* Again, force OUTPUT device used as source address */
507 		skb->dev = dst->dev;
508 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
509 		IP6_INC_STATS_BH(net,
510 				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
511 		IP6_INC_STATS_BH(net,
512 				 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
513 		kfree_skb(skb);
514 		return -EMSGSIZE;
515 	}
516 
517 	if (skb_cow(skb, dst->dev->hard_header_len)) {
518 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
519 		goto drop;
520 	}
521 
522 	hdr = ipv6_hdr(skb);
523 
524 	/* Mangling hops number delayed to point after skb COW */
525 
526 	hdr->hop_limit--;
527 
528 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
529 	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
530 		       ip6_forward_finish);
531 
532 error:
533 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
534 drop:
535 	kfree_skb(skb);
536 	return -EINVAL;
537 }
538 
ip6_copy_metadata(struct sk_buff * to,struct sk_buff * from)539 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
540 {
541 	to->pkt_type = from->pkt_type;
542 	to->priority = from->priority;
543 	to->protocol = from->protocol;
544 	skb_dst_drop(to);
545 	skb_dst_set(to, dst_clone(skb_dst(from)));
546 	to->dev = from->dev;
547 	to->mark = from->mark;
548 
549 #ifdef CONFIG_NET_SCHED
550 	to->tc_index = from->tc_index;
551 #endif
552 	nf_copy(to, from);
553 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
554     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
555 	to->nf_trace = from->nf_trace;
556 #endif
557 	skb_copy_secmark(to, from);
558 }
559 
ip6_find_1stfragopt(struct sk_buff * skb,u8 ** nexthdr)560 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
561 {
562 	u16 offset = sizeof(struct ipv6hdr);
563 	struct ipv6_opt_hdr *exthdr =
564 				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
565 	unsigned int packet_len = skb->tail - skb->network_header;
566 	int found_rhdr = 0;
567 	*nexthdr = &ipv6_hdr(skb)->nexthdr;
568 
569 	while (offset + 1 <= packet_len) {
570 
571 		switch (**nexthdr) {
572 
573 		case NEXTHDR_HOP:
574 			break;
575 		case NEXTHDR_ROUTING:
576 			found_rhdr = 1;
577 			break;
578 		case NEXTHDR_DEST:
579 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
580 			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
581 				break;
582 #endif
583 			if (found_rhdr)
584 				return offset;
585 			break;
586 		default :
587 			return offset;
588 		}
589 
590 		offset += ipv6_optlen(exthdr);
591 		*nexthdr = &exthdr->nexthdr;
592 		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
593 						 offset);
594 	}
595 
596 	return offset;
597 }
598 
ip6_fragment(struct sk_buff * skb,int (* output)(struct sk_buff *))599 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
600 {
601 	struct sk_buff *frag;
602 	struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
603 	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
604 	struct ipv6hdr *tmp_hdr;
605 	struct frag_hdr *fh;
606 	unsigned int mtu, hlen, left, len;
607 	__be32 frag_id = 0;
608 	int ptr, offset = 0, err=0;
609 	u8 *prevhdr, nexthdr = 0;
610 	struct net *net = dev_net(skb_dst(skb)->dev);
611 
612 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
613 	nexthdr = *prevhdr;
614 
615 	mtu = ip6_skb_dst_mtu(skb);
616 
617 	/* We must not fragment if the socket is set to force MTU discovery
618 	 * or if the skb it not generated by a local socket.
619 	 */
620 	if (!skb->local_df && skb->len > mtu) {
621 		skb->dev = skb_dst(skb)->dev;
622 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
623 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
624 			      IPSTATS_MIB_FRAGFAILS);
625 		kfree_skb(skb);
626 		return -EMSGSIZE;
627 	}
628 
629 	if (np && np->frag_size < mtu) {
630 		if (np->frag_size)
631 			mtu = np->frag_size;
632 	}
633 	mtu -= hlen + sizeof(struct frag_hdr);
634 
635 	if (skb_has_frag_list(skb)) {
636 		int first_len = skb_pagelen(skb);
637 		struct sk_buff *frag2;
638 
639 		if (first_len - hlen > mtu ||
640 		    ((first_len - hlen) & 7) ||
641 		    skb_cloned(skb))
642 			goto slow_path;
643 
644 		skb_walk_frags(skb, frag) {
645 			/* Correct geometry. */
646 			if (frag->len > mtu ||
647 			    ((frag->len & 7) && frag->next) ||
648 			    skb_headroom(frag) < hlen)
649 				goto slow_path_clean;
650 
651 			/* Partially cloned skb? */
652 			if (skb_shared(frag))
653 				goto slow_path_clean;
654 
655 			BUG_ON(frag->sk);
656 			if (skb->sk) {
657 				frag->sk = skb->sk;
658 				frag->destructor = sock_wfree;
659 			}
660 			skb->truesize -= frag->truesize;
661 		}
662 
663 		err = 0;
664 		offset = 0;
665 		frag = skb_shinfo(skb)->frag_list;
666 		skb_frag_list_init(skb);
667 		/* BUILD HEADER */
668 
669 		*prevhdr = NEXTHDR_FRAGMENT;
670 		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
671 		if (!tmp_hdr) {
672 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
673 				      IPSTATS_MIB_FRAGFAILS);
674 			return -ENOMEM;
675 		}
676 
677 		__skb_pull(skb, hlen);
678 		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
679 		__skb_push(skb, hlen);
680 		skb_reset_network_header(skb);
681 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
682 
683 		ipv6_select_ident(fh);
684 		fh->nexthdr = nexthdr;
685 		fh->reserved = 0;
686 		fh->frag_off = htons(IP6_MF);
687 		frag_id = fh->identification;
688 
689 		first_len = skb_pagelen(skb);
690 		skb->data_len = first_len - skb_headlen(skb);
691 		skb->len = first_len;
692 		ipv6_hdr(skb)->payload_len = htons(first_len -
693 						   sizeof(struct ipv6hdr));
694 
695 		dst_hold(&rt->dst);
696 
697 		for (;;) {
698 			/* Prepare header of the next frame,
699 			 * before previous one went down. */
700 			if (frag) {
701 				frag->ip_summed = CHECKSUM_NONE;
702 				skb_reset_transport_header(frag);
703 				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
704 				__skb_push(frag, hlen);
705 				skb_reset_network_header(frag);
706 				memcpy(skb_network_header(frag), tmp_hdr,
707 				       hlen);
708 				offset += skb->len - hlen - sizeof(struct frag_hdr);
709 				fh->nexthdr = nexthdr;
710 				fh->reserved = 0;
711 				fh->frag_off = htons(offset);
712 				if (frag->next != NULL)
713 					fh->frag_off |= htons(IP6_MF);
714 				fh->identification = frag_id;
715 				ipv6_hdr(frag)->payload_len =
716 						htons(frag->len -
717 						      sizeof(struct ipv6hdr));
718 				ip6_copy_metadata(frag, skb);
719 			}
720 
721 			err = output(skb);
722 			if(!err)
723 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
724 					      IPSTATS_MIB_FRAGCREATES);
725 
726 			if (err || !frag)
727 				break;
728 
729 			skb = frag;
730 			frag = skb->next;
731 			skb->next = NULL;
732 		}
733 
734 		kfree(tmp_hdr);
735 
736 		if (err == 0) {
737 			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
738 				      IPSTATS_MIB_FRAGOKS);
739 			dst_release(&rt->dst);
740 			return 0;
741 		}
742 
743 		while (frag) {
744 			skb = frag->next;
745 			kfree_skb(frag);
746 			frag = skb;
747 		}
748 
749 		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
750 			      IPSTATS_MIB_FRAGFAILS);
751 		dst_release(&rt->dst);
752 		return err;
753 
754 slow_path_clean:
755 		skb_walk_frags(skb, frag2) {
756 			if (frag2 == frag)
757 				break;
758 			frag2->sk = NULL;
759 			frag2->destructor = NULL;
760 			skb->truesize += frag2->truesize;
761 		}
762 	}
763 
764 slow_path:
765 	left = skb->len - hlen;		/* Space per frame */
766 	ptr = hlen;			/* Where to start from */
767 
768 	/*
769 	 *	Fragment the datagram.
770 	 */
771 
772 	*prevhdr = NEXTHDR_FRAGMENT;
773 
774 	/*
775 	 *	Keep copying data until we run out.
776 	 */
777 	while(left > 0)	{
778 		len = left;
779 		/* IF: it doesn't fit, use 'mtu' - the data space left */
780 		if (len > mtu)
781 			len = mtu;
782 		/* IF: we are not sending up to and including the packet end
783 		   then align the next start on an eight byte boundary */
784 		if (len < left)	{
785 			len &= ~7;
786 		}
787 		/*
788 		 *	Allocate buffer.
789 		 */
790 
791 		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
792 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
793 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
794 				      IPSTATS_MIB_FRAGFAILS);
795 			err = -ENOMEM;
796 			goto fail;
797 		}
798 
799 		/*
800 		 *	Set up data on packet
801 		 */
802 
803 		ip6_copy_metadata(frag, skb);
804 		skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
805 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
806 		skb_reset_network_header(frag);
807 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
808 		frag->transport_header = (frag->network_header + hlen +
809 					  sizeof(struct frag_hdr));
810 
811 		/*
812 		 *	Charge the memory for the fragment to any owner
813 		 *	it might possess
814 		 */
815 		if (skb->sk)
816 			skb_set_owner_w(frag, skb->sk);
817 
818 		/*
819 		 *	Copy the packet header into the new buffer.
820 		 */
821 		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
822 
823 		/*
824 		 *	Build fragment header.
825 		 */
826 		fh->nexthdr = nexthdr;
827 		fh->reserved = 0;
828 		if (!frag_id) {
829 			ipv6_select_ident(fh);
830 			frag_id = fh->identification;
831 		} else
832 			fh->identification = frag_id;
833 
834 		/*
835 		 *	Copy a block of the IP datagram.
836 		 */
837 		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
838 			BUG();
839 		left -= len;
840 
841 		fh->frag_off = htons(offset);
842 		if (left > 0)
843 			fh->frag_off |= htons(IP6_MF);
844 		ipv6_hdr(frag)->payload_len = htons(frag->len -
845 						    sizeof(struct ipv6hdr));
846 
847 		ptr += len;
848 		offset += len;
849 
850 		/*
851 		 *	Put this fragment into the sending queue.
852 		 */
853 		err = output(frag);
854 		if (err)
855 			goto fail;
856 
857 		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
858 			      IPSTATS_MIB_FRAGCREATES);
859 	}
860 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
861 		      IPSTATS_MIB_FRAGOKS);
862 	kfree_skb(skb);
863 	return err;
864 
865 fail:
866 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
867 		      IPSTATS_MIB_FRAGFAILS);
868 	kfree_skb(skb);
869 	return err;
870 }
871 
ip6_rt_check(struct rt6key * rt_key,struct in6_addr * fl_addr,struct in6_addr * addr_cache)872 static inline int ip6_rt_check(struct rt6key *rt_key,
873 			       struct in6_addr *fl_addr,
874 			       struct in6_addr *addr_cache)
875 {
876 	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877 		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
878 }
879 
ip6_sk_dst_check(struct sock * sk,struct dst_entry * dst,struct flowi6 * fl6)880 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881 					  struct dst_entry *dst,
882 					  struct flowi6 *fl6)
883 {
884 	struct ipv6_pinfo *np = inet6_sk(sk);
885 	struct rt6_info *rt = (struct rt6_info *)dst;
886 
887 	if (!dst)
888 		goto out;
889 
890 	/* Yes, checking route validity in not connected
891 	 * case is not very simple. Take into account,
892 	 * that we do not support routing by source, TOS,
893 	 * and MSG_DONTROUTE 		--ANK (980726)
894 	 *
895 	 * 1. ip6_rt_check(): If route was host route,
896 	 *    check that cached destination is current.
897 	 *    If it is network route, we still may
898 	 *    check its validity using saved pointer
899 	 *    to the last used address: daddr_cache.
900 	 *    We do not want to save whole address now,
901 	 *    (because main consumer of this service
902 	 *    is tcp, which has not this problem),
903 	 *    so that the last trick works only on connected
904 	 *    sockets.
905 	 * 2. oif also should be the same.
906 	 */
907 	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
908 #ifdef CONFIG_IPV6_SUBTREES
909 	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
910 #endif
911 	    (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
912 		dst_release(dst);
913 		dst = NULL;
914 	}
915 
916 out:
917 	return dst;
918 }
919 
ip6_dst_lookup_tail(struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)920 static int ip6_dst_lookup_tail(struct sock *sk,
921 			       struct dst_entry **dst, struct flowi6 *fl6)
922 {
923 	int err;
924 	struct net *net = sock_net(sk);
925 
926 	if (*dst == NULL)
927 		*dst = ip6_route_output(net, sk, fl6);
928 
929 	if ((err = (*dst)->error))
930 		goto out_err_release;
931 
932 	if (ipv6_addr_any(&fl6->saddr)) {
933 		err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
934 					 &fl6->daddr,
935 					 sk ? inet6_sk(sk)->srcprefs : 0,
936 					 &fl6->saddr);
937 		if (err)
938 			goto out_err_release;
939 	}
940 
941 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
942 	/*
943 	 * Here if the dst entry we've looked up
944 	 * has a neighbour entry that is in the INCOMPLETE
945 	 * state and the src address from the flow is
946 	 * marked as OPTIMISTIC, we release the found
947 	 * dst entry and replace it instead with the
948 	 * dst entry of the nexthop router
949 	 */
950 	if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
951 		struct inet6_ifaddr *ifp;
952 		struct flowi6 fl_gw6;
953 		int redirect;
954 
955 		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
956 				      (*dst)->dev, 1);
957 
958 		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
959 		if (ifp)
960 			in6_ifa_put(ifp);
961 
962 		if (redirect) {
963 			/*
964 			 * We need to get the dst entry for the
965 			 * default router instead
966 			 */
967 			dst_release(*dst);
968 			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
969 			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
970 			*dst = ip6_route_output(net, sk, &fl_gw6);
971 			if ((err = (*dst)->error))
972 				goto out_err_release;
973 		}
974 	}
975 #endif
976 
977 	return 0;
978 
979 out_err_release:
980 	if (err == -ENETUNREACH)
981 		IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
982 	dst_release(*dst);
983 	*dst = NULL;
984 	return err;
985 }
986 
987 /**
988  *	ip6_dst_lookup - perform route lookup on flow
989  *	@sk: socket which provides route info
990  *	@dst: pointer to dst_entry * for result
991  *	@fl6: flow to lookup
992  *
993  *	This function performs a route lookup on the given flow.
994  *
995  *	It returns zero on success, or a standard errno code on error.
996  */
ip6_dst_lookup(struct sock * sk,struct dst_entry ** dst,struct flowi6 * fl6)997 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
998 {
999 	*dst = NULL;
1000 	return ip6_dst_lookup_tail(sk, dst, fl6);
1001 }
1002 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1003 
1004 /**
1005  *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1006  *	@sk: socket which provides route info
1007  *	@fl6: flow to lookup
1008  *	@final_dst: final destination address for ipsec lookup
1009  *	@can_sleep: we are in a sleepable context
1010  *
1011  *	This function performs a route lookup on the given flow.
1012  *
1013  *	It returns a valid dst pointer on success, or a pointer encoded
1014  *	error code.
1015  */
ip6_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst,bool can_sleep)1016 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1017 				      const struct in6_addr *final_dst,
1018 				      bool can_sleep)
1019 {
1020 	struct dst_entry *dst = NULL;
1021 	int err;
1022 
1023 	err = ip6_dst_lookup_tail(sk, &dst, fl6);
1024 	if (err)
1025 		return ERR_PTR(err);
1026 	if (final_dst)
1027 		ipv6_addr_copy(&fl6->daddr, final_dst);
1028 	if (can_sleep)
1029 		fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1030 
1031 	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1032 }
1033 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1034 
1035 /**
1036  *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1037  *	@sk: socket which provides the dst cache and route info
1038  *	@fl6: flow to lookup
1039  *	@final_dst: final destination address for ipsec lookup
1040  *	@can_sleep: we are in a sleepable context
1041  *
1042  *	This function performs a route lookup on the given flow with the
1043  *	possibility of using the cached route in the socket if it is valid.
1044  *	It will take the socket dst lock when operating on the dst cache.
1045  *	As a result, this function can only be used in process context.
1046  *
1047  *	It returns a valid dst pointer on success, or a pointer encoded
1048  *	error code.
1049  */
ip6_sk_dst_lookup_flow(struct sock * sk,struct flowi6 * fl6,const struct in6_addr * final_dst,bool can_sleep)1050 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1051 					 const struct in6_addr *final_dst,
1052 					 bool can_sleep)
1053 {
1054 	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1055 	int err;
1056 
1057 	dst = ip6_sk_dst_check(sk, dst, fl6);
1058 
1059 	err = ip6_dst_lookup_tail(sk, &dst, fl6);
1060 	if (err)
1061 		return ERR_PTR(err);
1062 	if (final_dst)
1063 		ipv6_addr_copy(&fl6->daddr, final_dst);
1064 	if (can_sleep)
1065 		fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1066 
1067 	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1068 }
1069 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1070 
ip6_ufo_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int hh_len,int fragheaderlen,int transhdrlen,int mtu,unsigned int flags)1071 static inline int ip6_ufo_append_data(struct sock *sk,
1072 			int getfrag(void *from, char *to, int offset, int len,
1073 			int odd, struct sk_buff *skb),
1074 			void *from, int length, int hh_len, int fragheaderlen,
1075 			int transhdrlen, int mtu,unsigned int flags)
1076 
1077 {
1078 	struct sk_buff *skb;
1079 	int err;
1080 
1081 	/* There is support for UDP large send offload by network
1082 	 * device, so create one single skb packet containing complete
1083 	 * udp datagram
1084 	 */
1085 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1086 		skb = sock_alloc_send_skb(sk,
1087 			hh_len + fragheaderlen + transhdrlen + 20,
1088 			(flags & MSG_DONTWAIT), &err);
1089 		if (skb == NULL)
1090 			return -ENOMEM;
1091 
1092 		/* reserve space for Hardware header */
1093 		skb_reserve(skb, hh_len);
1094 
1095 		/* create space for UDP/IP header */
1096 		skb_put(skb,fragheaderlen + transhdrlen);
1097 
1098 		/* initialize network header pointer */
1099 		skb_reset_network_header(skb);
1100 
1101 		/* initialize protocol header pointer */
1102 		skb->transport_header = skb->network_header + fragheaderlen;
1103 
1104 		skb->ip_summed = CHECKSUM_PARTIAL;
1105 		skb->csum = 0;
1106 	}
1107 
1108 	err = skb_append_datato_frags(sk,skb, getfrag, from,
1109 				      (length - transhdrlen));
1110 	if (!err) {
1111 		struct frag_hdr fhdr;
1112 
1113 		/* Specify the length of each IPv6 datagram fragment.
1114 		 * It has to be a multiple of 8.
1115 		 */
1116 		skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1117 					     sizeof(struct frag_hdr)) & ~7;
1118 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1119 		ipv6_select_ident(&fhdr);
1120 		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1121 		__skb_queue_tail(&sk->sk_write_queue, skb);
1122 
1123 		return 0;
1124 	}
1125 	/* There is not enough support do UPD LSO,
1126 	 * so follow normal path
1127 	 */
1128 	kfree_skb(skb);
1129 
1130 	return err;
1131 }
1132 
ip6_opt_dup(struct ipv6_opt_hdr * src,gfp_t gfp)1133 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1134 					       gfp_t gfp)
1135 {
1136 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1137 }
1138 
ip6_rthdr_dup(struct ipv6_rt_hdr * src,gfp_t gfp)1139 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1140 						gfp_t gfp)
1141 {
1142 	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1143 }
1144 
ip6_append_data(struct sock * sk,int getfrag (void * from,char * to,int offset,int len,int odd,struct sk_buff * skb),void * from,int length,int transhdrlen,int hlimit,int tclass,struct ipv6_txoptions * opt,struct flowi6 * fl6,struct rt6_info * rt,unsigned int flags,int dontfrag)1145 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1146 	int offset, int len, int odd, struct sk_buff *skb),
1147 	void *from, int length, int transhdrlen,
1148 	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1149 	struct rt6_info *rt, unsigned int flags, int dontfrag)
1150 {
1151 	struct inet_sock *inet = inet_sk(sk);
1152 	struct ipv6_pinfo *np = inet6_sk(sk);
1153 	struct sk_buff *skb;
1154 	unsigned int maxfraglen, fragheaderlen;
1155 	int exthdrlen;
1156 	int hh_len;
1157 	int mtu;
1158 	int copy;
1159 	int err;
1160 	int offset = 0;
1161 	int csummode = CHECKSUM_NONE;
1162 	__u8 tx_flags = 0;
1163 
1164 	if (flags&MSG_PROBE)
1165 		return 0;
1166 	if (skb_queue_empty(&sk->sk_write_queue)) {
1167 		/*
1168 		 * setup for corking
1169 		 */
1170 		if (opt) {
1171 			if (WARN_ON(np->cork.opt))
1172 				return -EINVAL;
1173 
1174 			np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1175 			if (unlikely(np->cork.opt == NULL))
1176 				return -ENOBUFS;
1177 
1178 			np->cork.opt->tot_len = opt->tot_len;
1179 			np->cork.opt->opt_flen = opt->opt_flen;
1180 			np->cork.opt->opt_nflen = opt->opt_nflen;
1181 
1182 			np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1183 							    sk->sk_allocation);
1184 			if (opt->dst0opt && !np->cork.opt->dst0opt)
1185 				return -ENOBUFS;
1186 
1187 			np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1188 							    sk->sk_allocation);
1189 			if (opt->dst1opt && !np->cork.opt->dst1opt)
1190 				return -ENOBUFS;
1191 
1192 			np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1193 							   sk->sk_allocation);
1194 			if (opt->hopopt && !np->cork.opt->hopopt)
1195 				return -ENOBUFS;
1196 
1197 			np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1198 							    sk->sk_allocation);
1199 			if (opt->srcrt && !np->cork.opt->srcrt)
1200 				return -ENOBUFS;
1201 
1202 			/* need source address above miyazawa*/
1203 		}
1204 		dst_hold(&rt->dst);
1205 		inet->cork.dst = &rt->dst;
1206 		inet->cork.fl.u.ip6 = *fl6;
1207 		np->cork.hop_limit = hlimit;
1208 		np->cork.tclass = tclass;
1209 		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1210 		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1211 		if (np->frag_size < mtu) {
1212 			if (np->frag_size)
1213 				mtu = np->frag_size;
1214 		}
1215 		inet->cork.fragsize = mtu;
1216 		if (dst_allfrag(rt->dst.path))
1217 			inet->cork.flags |= IPCORK_ALLFRAG;
1218 		inet->cork.length = 0;
1219 		sk->sk_sndmsg_page = NULL;
1220 		sk->sk_sndmsg_off = 0;
1221 		exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
1222 			    rt->rt6i_nfheader_len;
1223 		length += exthdrlen;
1224 		transhdrlen += exthdrlen;
1225 	} else {
1226 		rt = (struct rt6_info *)inet->cork.dst;
1227 		fl6 = &inet->cork.fl.u.ip6;
1228 		opt = np->cork.opt;
1229 		transhdrlen = 0;
1230 		exthdrlen = 0;
1231 		mtu = inet->cork.fragsize;
1232 	}
1233 
1234 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1235 
1236 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1237 			(opt ? opt->opt_nflen : 0);
1238 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1239 
1240 	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1241 		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1242 			ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1243 			return -EMSGSIZE;
1244 		}
1245 	}
1246 
1247 	/* For UDP, check if TX timestamp is enabled */
1248 	if (sk->sk_type == SOCK_DGRAM) {
1249 		err = sock_tx_timestamp(sk, &tx_flags);
1250 		if (err)
1251 			goto error;
1252 	}
1253 
1254 	/*
1255 	 * Let's try using as much space as possible.
1256 	 * Use MTU if total length of the message fits into the MTU.
1257 	 * Otherwise, we need to reserve fragment header and
1258 	 * fragment alignment (= 8-15 octects, in total).
1259 	 *
1260 	 * Note that we may need to "move" the data from the tail of
1261 	 * of the buffer to the new fragment when we split
1262 	 * the message.
1263 	 *
1264 	 * FIXME: It may be fragmented into multiple chunks
1265 	 *        at once if non-fragmentable extension headers
1266 	 *        are too large.
1267 	 * --yoshfuji
1268 	 */
1269 
1270 	inet->cork.length += length;
1271 	if (length > mtu) {
1272 		int proto = sk->sk_protocol;
1273 		if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1274 			ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1275 			return -EMSGSIZE;
1276 		}
1277 
1278 		if (proto == IPPROTO_UDP &&
1279 		    (rt->dst.dev->features & NETIF_F_UFO)) {
1280 
1281 			err = ip6_ufo_append_data(sk, getfrag, from, length,
1282 						  hh_len, fragheaderlen,
1283 						  transhdrlen, mtu, flags);
1284 			if (err)
1285 				goto error;
1286 			return 0;
1287 		}
1288 	}
1289 
1290 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1291 		goto alloc_new_skb;
1292 
1293 	while (length > 0) {
1294 		/* Check if the remaining data fits into current packet. */
1295 		copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1296 		if (copy < length)
1297 			copy = maxfraglen - skb->len;
1298 
1299 		if (copy <= 0) {
1300 			char *data;
1301 			unsigned int datalen;
1302 			unsigned int fraglen;
1303 			unsigned int fraggap;
1304 			unsigned int alloclen;
1305 			struct sk_buff *skb_prev;
1306 alloc_new_skb:
1307 			skb_prev = skb;
1308 
1309 			/* There's no room in the current skb */
1310 			if (skb_prev)
1311 				fraggap = skb_prev->len - maxfraglen;
1312 			else
1313 				fraggap = 0;
1314 
1315 			/*
1316 			 * If remaining data exceeds the mtu,
1317 			 * we know we need more fragment(s).
1318 			 */
1319 			datalen = length + fraggap;
1320 			if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1321 				datalen = maxfraglen - fragheaderlen;
1322 
1323 			fraglen = datalen + fragheaderlen;
1324 			if ((flags & MSG_MORE) &&
1325 			    !(rt->dst.dev->features&NETIF_F_SG))
1326 				alloclen = mtu;
1327 			else
1328 				alloclen = datalen + fragheaderlen;
1329 
1330 			/*
1331 			 * The last fragment gets additional space at tail.
1332 			 * Note: we overallocate on fragments with MSG_MODE
1333 			 * because we have no idea if we're the last one.
1334 			 */
1335 			if (datalen == length + fraggap)
1336 				alloclen += rt->dst.trailer_len;
1337 
1338 			/*
1339 			 * We just reserve space for fragment header.
1340 			 * Note: this may be overallocation if the message
1341 			 * (without MSG_MORE) fits into the MTU.
1342 			 */
1343 			alloclen += sizeof(struct frag_hdr);
1344 
1345 			if (transhdrlen) {
1346 				skb = sock_alloc_send_skb(sk,
1347 						alloclen + hh_len,
1348 						(flags & MSG_DONTWAIT), &err);
1349 			} else {
1350 				skb = NULL;
1351 				if (atomic_read(&sk->sk_wmem_alloc) <=
1352 				    2 * sk->sk_sndbuf)
1353 					skb = sock_wmalloc(sk,
1354 							   alloclen + hh_len, 1,
1355 							   sk->sk_allocation);
1356 				if (unlikely(skb == NULL))
1357 					err = -ENOBUFS;
1358 				else {
1359 					/* Only the initial fragment
1360 					 * is time stamped.
1361 					 */
1362 					tx_flags = 0;
1363 				}
1364 			}
1365 			if (skb == NULL)
1366 				goto error;
1367 			/*
1368 			 *	Fill in the control structures
1369 			 */
1370 			skb->ip_summed = csummode;
1371 			skb->csum = 0;
1372 			/* reserve for fragmentation */
1373 			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1374 
1375 			if (sk->sk_type == SOCK_DGRAM)
1376 				skb_shinfo(skb)->tx_flags = tx_flags;
1377 
1378 			/*
1379 			 *	Find where to start putting bytes
1380 			 */
1381 			data = skb_put(skb, fraglen);
1382 			skb_set_network_header(skb, exthdrlen);
1383 			data += fragheaderlen;
1384 			skb->transport_header = (skb->network_header +
1385 						 fragheaderlen);
1386 			if (fraggap) {
1387 				skb->csum = skb_copy_and_csum_bits(
1388 					skb_prev, maxfraglen,
1389 					data + transhdrlen, fraggap, 0);
1390 				skb_prev->csum = csum_sub(skb_prev->csum,
1391 							  skb->csum);
1392 				data += fraggap;
1393 				pskb_trim_unique(skb_prev, maxfraglen);
1394 			}
1395 			copy = datalen - transhdrlen - fraggap;
1396 			if (copy < 0) {
1397 				err = -EINVAL;
1398 				kfree_skb(skb);
1399 				goto error;
1400 			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1401 				err = -EFAULT;
1402 				kfree_skb(skb);
1403 				goto error;
1404 			}
1405 
1406 			offset += copy;
1407 			length -= datalen - fraggap;
1408 			transhdrlen = 0;
1409 			exthdrlen = 0;
1410 			csummode = CHECKSUM_NONE;
1411 
1412 			/*
1413 			 * Put the packet on the pending queue
1414 			 */
1415 			__skb_queue_tail(&sk->sk_write_queue, skb);
1416 			continue;
1417 		}
1418 
1419 		if (copy > length)
1420 			copy = length;
1421 
1422 		if (!(rt->dst.dev->features&NETIF_F_SG)) {
1423 			unsigned int off;
1424 
1425 			off = skb->len;
1426 			if (getfrag(from, skb_put(skb, copy),
1427 						offset, copy, off, skb) < 0) {
1428 				__skb_trim(skb, off);
1429 				err = -EFAULT;
1430 				goto error;
1431 			}
1432 		} else {
1433 			int i = skb_shinfo(skb)->nr_frags;
1434 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1435 			struct page *page = sk->sk_sndmsg_page;
1436 			int off = sk->sk_sndmsg_off;
1437 			unsigned int left;
1438 
1439 			if (page && (left = PAGE_SIZE - off) > 0) {
1440 				if (copy >= left)
1441 					copy = left;
1442 				if (page != frag->page) {
1443 					if (i == MAX_SKB_FRAGS) {
1444 						err = -EMSGSIZE;
1445 						goto error;
1446 					}
1447 					get_page(page);
1448 					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1449 					frag = &skb_shinfo(skb)->frags[i];
1450 				}
1451 			} else if(i < MAX_SKB_FRAGS) {
1452 				if (copy > PAGE_SIZE)
1453 					copy = PAGE_SIZE;
1454 				page = alloc_pages(sk->sk_allocation, 0);
1455 				if (page == NULL) {
1456 					err = -ENOMEM;
1457 					goto error;
1458 				}
1459 				sk->sk_sndmsg_page = page;
1460 				sk->sk_sndmsg_off = 0;
1461 
1462 				skb_fill_page_desc(skb, i, page, 0, 0);
1463 				frag = &skb_shinfo(skb)->frags[i];
1464 			} else {
1465 				err = -EMSGSIZE;
1466 				goto error;
1467 			}
1468 			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1469 				err = -EFAULT;
1470 				goto error;
1471 			}
1472 			sk->sk_sndmsg_off += copy;
1473 			frag->size += copy;
1474 			skb->len += copy;
1475 			skb->data_len += copy;
1476 			skb->truesize += copy;
1477 			atomic_add(copy, &sk->sk_wmem_alloc);
1478 		}
1479 		offset += copy;
1480 		length -= copy;
1481 	}
1482 	return 0;
1483 error:
1484 	inet->cork.length -= length;
1485 	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1486 	return err;
1487 }
1488 
ip6_cork_release(struct inet_sock * inet,struct ipv6_pinfo * np)1489 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1490 {
1491 	if (np->cork.opt) {
1492 		kfree(np->cork.opt->dst0opt);
1493 		kfree(np->cork.opt->dst1opt);
1494 		kfree(np->cork.opt->hopopt);
1495 		kfree(np->cork.opt->srcrt);
1496 		kfree(np->cork.opt);
1497 		np->cork.opt = NULL;
1498 	}
1499 
1500 	if (inet->cork.dst) {
1501 		dst_release(inet->cork.dst);
1502 		inet->cork.dst = NULL;
1503 		inet->cork.flags &= ~IPCORK_ALLFRAG;
1504 	}
1505 	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1506 }
1507 
ip6_push_pending_frames(struct sock * sk)1508 int ip6_push_pending_frames(struct sock *sk)
1509 {
1510 	struct sk_buff *skb, *tmp_skb;
1511 	struct sk_buff **tail_skb;
1512 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1513 	struct inet_sock *inet = inet_sk(sk);
1514 	struct ipv6_pinfo *np = inet6_sk(sk);
1515 	struct net *net = sock_net(sk);
1516 	struct ipv6hdr *hdr;
1517 	struct ipv6_txoptions *opt = np->cork.opt;
1518 	struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1519 	struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1520 	unsigned char proto = fl6->flowi6_proto;
1521 	int err = 0;
1522 
1523 	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1524 		goto out;
1525 	tail_skb = &(skb_shinfo(skb)->frag_list);
1526 
1527 	/* move skb->data to ip header from ext header */
1528 	if (skb->data < skb_network_header(skb))
1529 		__skb_pull(skb, skb_network_offset(skb));
1530 	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1531 		__skb_pull(tmp_skb, skb_network_header_len(skb));
1532 		*tail_skb = tmp_skb;
1533 		tail_skb = &(tmp_skb->next);
1534 		skb->len += tmp_skb->len;
1535 		skb->data_len += tmp_skb->len;
1536 		skb->truesize += tmp_skb->truesize;
1537 		tmp_skb->destructor = NULL;
1538 		tmp_skb->sk = NULL;
1539 	}
1540 
1541 	/* Allow local fragmentation. */
1542 	if (np->pmtudisc < IPV6_PMTUDISC_DO)
1543 		skb->local_df = 1;
1544 
1545 	ipv6_addr_copy(final_dst, &fl6->daddr);
1546 	__skb_pull(skb, skb_network_header_len(skb));
1547 	if (opt && opt->opt_flen)
1548 		ipv6_push_frag_opts(skb, opt, &proto);
1549 	if (opt && opt->opt_nflen)
1550 		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1551 
1552 	skb_push(skb, sizeof(struct ipv6hdr));
1553 	skb_reset_network_header(skb);
1554 	hdr = ipv6_hdr(skb);
1555 
1556 	*(__be32*)hdr = fl6->flowlabel |
1557 		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1558 
1559 	hdr->hop_limit = np->cork.hop_limit;
1560 	hdr->nexthdr = proto;
1561 	ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1562 	ipv6_addr_copy(&hdr->daddr, final_dst);
1563 
1564 	skb->priority = sk->sk_priority;
1565 	skb->mark = sk->sk_mark;
1566 
1567 	skb_dst_set(skb, dst_clone(&rt->dst));
1568 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1569 	if (proto == IPPROTO_ICMPV6) {
1570 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1571 
1572 		ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1573 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1574 	}
1575 
1576 	err = ip6_local_out(skb);
1577 	if (err) {
1578 		if (err > 0)
1579 			err = net_xmit_errno(err);
1580 		if (err)
1581 			goto error;
1582 	}
1583 
1584 out:
1585 	ip6_cork_release(inet, np);
1586 	return err;
1587 error:
1588 	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1589 	goto out;
1590 }
1591 
ip6_flush_pending_frames(struct sock * sk)1592 void ip6_flush_pending_frames(struct sock *sk)
1593 {
1594 	struct sk_buff *skb;
1595 
1596 	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1597 		if (skb_dst(skb))
1598 			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1599 				      IPSTATS_MIB_OUTDISCARDS);
1600 		kfree_skb(skb);
1601 	}
1602 
1603 	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1604 }
1605