1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/export.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/mroute6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <linux/slab.h>
45 #include <net/net_namespace.h>
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56 #include <net/netevent.h>
57 #include <net/netlink.h>
58 
59 #include <asm/uaccess.h>
60 
61 #ifdef CONFIG_SYSCTL
62 #include <linux/sysctl.h>
63 #endif
64 
65 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
66 				    const struct in6_addr *dest);
67 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
68 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
69 static unsigned int	 ip6_mtu(const struct dst_entry *dst);
70 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71 static void		ip6_dst_destroy(struct dst_entry *);
72 static void		ip6_dst_ifdown(struct dst_entry *,
73 				       struct net_device *dev, int how);
74 static int		 ip6_dst_gc(struct dst_ops *ops);
75 
76 static int		ip6_pkt_discard(struct sk_buff *skb);
77 static int		ip6_pkt_discard_out(struct sk_buff *skb);
78 static void		ip6_link_failure(struct sk_buff *skb);
79 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80 
81 #ifdef CONFIG_IPV6_ROUTE_INFO
82 static struct rt6_info *rt6_add_route_info(struct net *net,
83 					   const struct in6_addr *prefix, int prefixlen,
84 					   const struct in6_addr *gwaddr, int ifindex,
85 					   unsigned pref);
86 static struct rt6_info *rt6_get_route_info(struct net *net,
87 					   const struct in6_addr *prefix, int prefixlen,
88 					   const struct in6_addr *gwaddr, int ifindex);
89 #endif
90 
ipv6_cow_metrics(struct dst_entry * dst,unsigned long old)91 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92 {
93 	struct rt6_info *rt = (struct rt6_info *) dst;
94 	struct inet_peer *peer;
95 	u32 *p = NULL;
96 
97 	if (!(rt->dst.flags & DST_HOST))
98 		return NULL;
99 
100 	if (!rt->rt6i_peer)
101 		rt6_bind_peer(rt, 1);
102 
103 	peer = rt->rt6i_peer;
104 	if (peer) {
105 		u32 *old_p = __DST_METRICS_PTR(old);
106 		unsigned long prev, new;
107 
108 		p = peer->metrics;
109 		if (inet_metrics_new(peer))
110 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111 
112 		new = (unsigned long) p;
113 		prev = cmpxchg(&dst->_metrics, old, new);
114 
115 		if (prev != old) {
116 			p = __DST_METRICS_PTR(prev);
117 			if (prev & DST_METRICS_READ_ONLY)
118 				p = NULL;
119 		}
120 	}
121 	return p;
122 }
123 
choose_neigh_daddr(struct rt6_info * rt,const void * daddr)124 static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125 {
126 	struct in6_addr *p = &rt->rt6i_gateway;
127 
128 	if (!ipv6_addr_any(p))
129 		return (const void *) p;
130 	return daddr;
131 }
132 
ip6_neigh_lookup(const struct dst_entry * dst,const void * daddr)133 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134 {
135 	struct rt6_info *rt = (struct rt6_info *) dst;
136 	struct neighbour *n;
137 
138 	daddr = choose_neigh_daddr(rt, daddr);
139 	n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
140 	if (n)
141 		return n;
142 	return neigh_create(&nd_tbl, daddr, dst->dev);
143 }
144 
rt6_bind_neighbour(struct rt6_info * rt,struct net_device * dev)145 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
146 {
147 	struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 	if (!n) {
149 		n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 		if (IS_ERR(n))
151 			return PTR_ERR(n);
152 	}
153 	dst_set_neighbour(&rt->dst, n);
154 
155 	return 0;
156 }
157 
158 static struct dst_ops ip6_dst_ops_template = {
159 	.family			=	AF_INET6,
160 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
161 	.gc			=	ip6_dst_gc,
162 	.gc_thresh		=	1024,
163 	.check			=	ip6_dst_check,
164 	.default_advmss		=	ip6_default_advmss,
165 	.mtu			=	ip6_mtu,
166 	.cow_metrics		=	ipv6_cow_metrics,
167 	.destroy		=	ip6_dst_destroy,
168 	.ifdown			=	ip6_dst_ifdown,
169 	.negative_advice	=	ip6_negative_advice,
170 	.link_failure		=	ip6_link_failure,
171 	.update_pmtu		=	ip6_rt_update_pmtu,
172 	.local_out		=	__ip6_local_out,
173 	.neigh_lookup		=	ip6_neigh_lookup,
174 };
175 
ip6_blackhole_mtu(const struct dst_entry * dst)176 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
177 {
178 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179 
180 	return mtu ? : dst->dev->mtu;
181 }
182 
ip6_rt_blackhole_update_pmtu(struct dst_entry * dst,u32 mtu)183 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184 {
185 }
186 
ip6_rt_blackhole_cow_metrics(struct dst_entry * dst,unsigned long old)187 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 					 unsigned long old)
189 {
190 	return NULL;
191 }
192 
193 static struct dst_ops ip6_dst_blackhole_ops = {
194 	.family			=	AF_INET6,
195 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
196 	.destroy		=	ip6_dst_destroy,
197 	.check			=	ip6_dst_check,
198 	.mtu			=	ip6_blackhole_mtu,
199 	.default_advmss		=	ip6_default_advmss,
200 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
201 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
202 	.neigh_lookup		=	ip6_neigh_lookup,
203 };
204 
205 static const u32 ip6_template_metrics[RTAX_MAX] = {
206 	[RTAX_HOPLIMIT - 1] = 0,
207 };
208 
209 static struct rt6_info ip6_null_entry_template = {
210 	.dst = {
211 		.__refcnt	= ATOMIC_INIT(1),
212 		.__use		= 1,
213 		.obsolete	= -1,
214 		.error		= -ENETUNREACH,
215 		.input		= ip6_pkt_discard,
216 		.output		= ip6_pkt_discard_out,
217 	},
218 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
219 	.rt6i_protocol  = RTPROT_KERNEL,
220 	.rt6i_metric	= ~(u32) 0,
221 	.rt6i_ref	= ATOMIC_INIT(1),
222 };
223 
224 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
225 
226 static int ip6_pkt_prohibit(struct sk_buff *skb);
227 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
228 
229 static struct rt6_info ip6_prohibit_entry_template = {
230 	.dst = {
231 		.__refcnt	= ATOMIC_INIT(1),
232 		.__use		= 1,
233 		.obsolete	= -1,
234 		.error		= -EACCES,
235 		.input		= ip6_pkt_prohibit,
236 		.output		= ip6_pkt_prohibit_out,
237 	},
238 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
239 	.rt6i_protocol  = RTPROT_KERNEL,
240 	.rt6i_metric	= ~(u32) 0,
241 	.rt6i_ref	= ATOMIC_INIT(1),
242 };
243 
244 static struct rt6_info ip6_blk_hole_entry_template = {
245 	.dst = {
246 		.__refcnt	= ATOMIC_INIT(1),
247 		.__use		= 1,
248 		.obsolete	= -1,
249 		.error		= -EINVAL,
250 		.input		= dst_discard,
251 		.output		= dst_discard,
252 	},
253 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
254 	.rt6i_protocol  = RTPROT_KERNEL,
255 	.rt6i_metric	= ~(u32) 0,
256 	.rt6i_ref	= ATOMIC_INIT(1),
257 };
258 
259 #endif
260 
261 /* allocate dst with ip6_dst_ops */
ip6_dst_alloc(struct dst_ops * ops,struct net_device * dev,int flags)262 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
263 					     struct net_device *dev,
264 					     int flags)
265 {
266 	struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
267 
268 	if (rt)
269 		memset(&rt->rt6i_table, 0,
270 		       sizeof(*rt) - sizeof(struct dst_entry));
271 
272 	return rt;
273 }
274 
ip6_dst_destroy(struct dst_entry * dst)275 static void ip6_dst_destroy(struct dst_entry *dst)
276 {
277 	struct rt6_info *rt = (struct rt6_info *)dst;
278 	struct inet6_dev *idev = rt->rt6i_idev;
279 	struct inet_peer *peer = rt->rt6i_peer;
280 
281 	if (!(rt->dst.flags & DST_HOST))
282 		dst_destroy_metrics_generic(dst);
283 
284 	if (idev) {
285 		rt->rt6i_idev = NULL;
286 		in6_dev_put(idev);
287 	}
288 
289 	if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
290 		dst_release(dst->from);
291 
292 	if (peer) {
293 		rt->rt6i_peer = NULL;
294 		inet_putpeer(peer);
295 	}
296 }
297 
298 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
299 
rt6_peer_genid(void)300 static u32 rt6_peer_genid(void)
301 {
302 	return atomic_read(&__rt6_peer_genid);
303 }
304 
rt6_bind_peer(struct rt6_info * rt,int create)305 void rt6_bind_peer(struct rt6_info *rt, int create)
306 {
307 	struct inet_peer *peer;
308 
309 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
310 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
311 		inet_putpeer(peer);
312 	else
313 		rt->rt6i_peer_genid = rt6_peer_genid();
314 }
315 
ip6_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)316 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
317 			   int how)
318 {
319 	struct rt6_info *rt = (struct rt6_info *)dst;
320 	struct inet6_dev *idev = rt->rt6i_idev;
321 	struct net_device *loopback_dev =
322 		dev_net(dev)->loopback_dev;
323 
324 	if (dev != loopback_dev && idev && idev->dev == dev) {
325 		struct inet6_dev *loopback_idev =
326 			in6_dev_get(loopback_dev);
327 		if (loopback_idev) {
328 			rt->rt6i_idev = loopback_idev;
329 			in6_dev_put(idev);
330 		}
331 	}
332 }
333 
rt6_check_expired(const struct rt6_info * rt)334 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
335 {
336 	struct rt6_info *ort = NULL;
337 
338 	if (rt->rt6i_flags & RTF_EXPIRES) {
339 		if (time_after(jiffies, rt->dst.expires))
340 			return 1;
341 	} else if (rt->dst.from) {
342 		ort = (struct rt6_info *) rt->dst.from;
343 		return (ort->rt6i_flags & RTF_EXPIRES) &&
344 			time_after(jiffies, ort->dst.expires);
345 	}
346 	return 0;
347 }
348 
rt6_need_strict(const struct in6_addr * daddr)349 static inline int rt6_need_strict(const struct in6_addr *daddr)
350 {
351 	return ipv6_addr_type(daddr) &
352 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
353 }
354 
355 /*
356  *	Route lookup. Any table->tb6_lock is implied.
357  */
358 
rt6_device_match(struct net * net,struct rt6_info * rt,const struct in6_addr * saddr,int oif,int flags)359 static inline struct rt6_info *rt6_device_match(struct net *net,
360 						    struct rt6_info *rt,
361 						    const struct in6_addr *saddr,
362 						    int oif,
363 						    int flags)
364 {
365 	struct rt6_info *local = NULL;
366 	struct rt6_info *sprt;
367 
368 	if (!oif && ipv6_addr_any(saddr))
369 		goto out;
370 
371 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
372 		struct net_device *dev = sprt->dst.dev;
373 
374 		if (oif) {
375 			if (dev->ifindex == oif)
376 				return sprt;
377 			if (dev->flags & IFF_LOOPBACK) {
378 				if (!sprt->rt6i_idev ||
379 				    sprt->rt6i_idev->dev->ifindex != oif) {
380 					if (flags & RT6_LOOKUP_F_IFACE && oif)
381 						continue;
382 					if (local && (!oif ||
383 						      local->rt6i_idev->dev->ifindex == oif))
384 						continue;
385 				}
386 				local = sprt;
387 			}
388 		} else {
389 			if (ipv6_chk_addr(net, saddr, dev,
390 					  flags & RT6_LOOKUP_F_IFACE))
391 				return sprt;
392 		}
393 	}
394 
395 	if (oif) {
396 		if (local)
397 			return local;
398 
399 		if (flags & RT6_LOOKUP_F_IFACE)
400 			return net->ipv6.ip6_null_entry;
401 	}
402 out:
403 	return rt;
404 }
405 
406 #ifdef CONFIG_IPV6_ROUTER_PREF
rt6_probe(struct rt6_info * rt)407 static void rt6_probe(struct rt6_info *rt)
408 {
409 	struct neighbour *neigh;
410 	/*
411 	 * Okay, this does not seem to be appropriate
412 	 * for now, however, we need to check if it
413 	 * is really so; aka Router Reachability Probing.
414 	 *
415 	 * Router Reachability Probe MUST be rate-limited
416 	 * to no more than one per minute.
417 	 */
418 	rcu_read_lock();
419 	neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
420 	if (!neigh || (neigh->nud_state & NUD_VALID))
421 		goto out;
422 	read_lock_bh(&neigh->lock);
423 	if (!(neigh->nud_state & NUD_VALID) &&
424 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
425 		struct in6_addr mcaddr;
426 		struct in6_addr *target;
427 
428 		neigh->updated = jiffies;
429 		read_unlock_bh(&neigh->lock);
430 
431 		target = (struct in6_addr *)&neigh->primary_key;
432 		addrconf_addr_solict_mult(target, &mcaddr);
433 		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
434 	} else {
435 		read_unlock_bh(&neigh->lock);
436 	}
437 out:
438 	rcu_read_unlock();
439 }
440 #else
rt6_probe(struct rt6_info * rt)441 static inline void rt6_probe(struct rt6_info *rt)
442 {
443 }
444 #endif
445 
446 /*
447  * Default Router Selection (RFC 2461 6.3.6)
448  */
rt6_check_dev(struct rt6_info * rt,int oif)449 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
450 {
451 	struct net_device *dev = rt->dst.dev;
452 	if (!oif || dev->ifindex == oif)
453 		return 2;
454 	if ((dev->flags & IFF_LOOPBACK) &&
455 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
456 		return 1;
457 	return 0;
458 }
459 
rt6_check_neigh(struct rt6_info * rt)460 static inline int rt6_check_neigh(struct rt6_info *rt)
461 {
462 	struct neighbour *neigh;
463 	int m;
464 
465 	rcu_read_lock();
466 	neigh = dst_get_neighbour_noref(&rt->dst);
467 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
468 	    !(rt->rt6i_flags & RTF_GATEWAY))
469 		m = 1;
470 	else if (neigh) {
471 		read_lock_bh(&neigh->lock);
472 		if (neigh->nud_state & NUD_VALID)
473 			m = 2;
474 #ifdef CONFIG_IPV6_ROUTER_PREF
475 		else if (neigh->nud_state & NUD_FAILED)
476 			m = 0;
477 #endif
478 		else
479 			m = 1;
480 		read_unlock_bh(&neigh->lock);
481 	} else
482 		m = 0;
483 	rcu_read_unlock();
484 	return m;
485 }
486 
rt6_score_route(struct rt6_info * rt,int oif,int strict)487 static int rt6_score_route(struct rt6_info *rt, int oif,
488 			   int strict)
489 {
490 	int m, n;
491 
492 	m = rt6_check_dev(rt, oif);
493 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
494 		return -1;
495 #ifdef CONFIG_IPV6_ROUTER_PREF
496 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
497 #endif
498 	n = rt6_check_neigh(rt);
499 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
500 		return -1;
501 	return m;
502 }
503 
find_match(struct rt6_info * rt,int oif,int strict,int * mpri,struct rt6_info * match)504 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
505 				   int *mpri, struct rt6_info *match)
506 {
507 	int m;
508 
509 	if (rt6_check_expired(rt))
510 		goto out;
511 
512 	m = rt6_score_route(rt, oif, strict);
513 	if (m < 0)
514 		goto out;
515 
516 	if (m > *mpri) {
517 		if (strict & RT6_LOOKUP_F_REACHABLE)
518 			rt6_probe(match);
519 		*mpri = m;
520 		match = rt;
521 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
522 		rt6_probe(rt);
523 	}
524 
525 out:
526 	return match;
527 }
528 
find_rr_leaf(struct fib6_node * fn,struct rt6_info * rr_head,u32 metric,int oif,int strict)529 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
530 				     struct rt6_info *rr_head,
531 				     u32 metric, int oif, int strict)
532 {
533 	struct rt6_info *rt, *match;
534 	int mpri = -1;
535 
536 	match = NULL;
537 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
538 	     rt = rt->dst.rt6_next)
539 		match = find_match(rt, oif, strict, &mpri, match);
540 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
541 	     rt = rt->dst.rt6_next)
542 		match = find_match(rt, oif, strict, &mpri, match);
543 
544 	return match;
545 }
546 
rt6_select(struct fib6_node * fn,int oif,int strict)547 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
548 {
549 	struct rt6_info *match, *rt0;
550 	struct net *net;
551 
552 	rt0 = fn->rr_ptr;
553 	if (!rt0)
554 		fn->rr_ptr = rt0 = fn->leaf;
555 
556 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
557 
558 	if (!match &&
559 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
560 		struct rt6_info *next = rt0->dst.rt6_next;
561 
562 		/* no entries matched; do round-robin */
563 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
564 			next = fn->leaf;
565 
566 		if (next != rt0)
567 			fn->rr_ptr = next;
568 	}
569 
570 	net = dev_net(rt0->dst.dev);
571 	return match ? match : net->ipv6.ip6_null_entry;
572 }
573 
574 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_route_rcv(struct net_device * dev,u8 * opt,int len,const struct in6_addr * gwaddr)575 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
576 		  const struct in6_addr *gwaddr)
577 {
578 	struct net *net = dev_net(dev);
579 	struct route_info *rinfo = (struct route_info *) opt;
580 	struct in6_addr prefix_buf, *prefix;
581 	unsigned int pref;
582 	unsigned long lifetime;
583 	struct rt6_info *rt;
584 
585 	if (len < sizeof(struct route_info)) {
586 		return -EINVAL;
587 	}
588 
589 	/* Sanity check for prefix_len and length */
590 	if (rinfo->length > 3) {
591 		return -EINVAL;
592 	} else if (rinfo->prefix_len > 128) {
593 		return -EINVAL;
594 	} else if (rinfo->prefix_len > 64) {
595 		if (rinfo->length < 2) {
596 			return -EINVAL;
597 		}
598 	} else if (rinfo->prefix_len > 0) {
599 		if (rinfo->length < 1) {
600 			return -EINVAL;
601 		}
602 	}
603 
604 	pref = rinfo->route_pref;
605 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
606 		return -EINVAL;
607 
608 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
609 
610 	if (rinfo->length == 3)
611 		prefix = (struct in6_addr *)rinfo->prefix;
612 	else {
613 		/* this function is safe */
614 		ipv6_addr_prefix(&prefix_buf,
615 				 (struct in6_addr *)rinfo->prefix,
616 				 rinfo->prefix_len);
617 		prefix = &prefix_buf;
618 	}
619 
620 	if (rinfo->prefix_len == 0)
621 		rt = rt6_get_dflt_router(gwaddr, dev);
622 	else
623 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
624 					gwaddr, dev->ifindex);
625 
626 	if (rt && !lifetime) {
627 		ip6_del_rt(rt);
628 		rt = NULL;
629 	}
630 
631 	if (!rt && lifetime)
632 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
633 					pref);
634 	else if (rt)
635 		rt->rt6i_flags = RTF_ROUTEINFO |
636 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
637 
638 	if (rt) {
639 		if (!addrconf_finite_timeout(lifetime))
640 			rt6_clean_expires(rt);
641 		else
642 			rt6_set_expires(rt, jiffies + HZ * lifetime);
643 
644 		dst_release(&rt->dst);
645 	}
646 	return 0;
647 }
648 #endif
649 
650 #define BACKTRACK(__net, saddr)			\
651 do { \
652 	if (rt == __net->ipv6.ip6_null_entry) {	\
653 		struct fib6_node *pn; \
654 		while (1) { \
655 			if (fn->fn_flags & RTN_TL_ROOT) \
656 				goto out; \
657 			pn = fn->parent; \
658 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
659 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
660 			else \
661 				fn = pn; \
662 			if (fn->fn_flags & RTN_RTINFO) \
663 				goto restart; \
664 		} \
665 	} \
666 } while (0)
667 
ip6_pol_route_lookup(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)668 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
669 					     struct fib6_table *table,
670 					     struct flowi6 *fl6, int flags)
671 {
672 	struct fib6_node *fn;
673 	struct rt6_info *rt;
674 
675 	read_lock_bh(&table->tb6_lock);
676 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
677 restart:
678 	rt = fn->leaf;
679 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
680 	BACKTRACK(net, &fl6->saddr);
681 out:
682 	dst_use(&rt->dst, jiffies);
683 	read_unlock_bh(&table->tb6_lock);
684 	return rt;
685 
686 }
687 
ip6_route_lookup(struct net * net,struct flowi6 * fl6,int flags)688 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
689 				    int flags)
690 {
691 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
692 }
693 EXPORT_SYMBOL_GPL(ip6_route_lookup);
694 
rt6_lookup(struct net * net,const struct in6_addr * daddr,const struct in6_addr * saddr,int oif,int strict)695 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
696 			    const struct in6_addr *saddr, int oif, int strict)
697 {
698 	struct flowi6 fl6 = {
699 		.flowi6_oif = oif,
700 		.daddr = *daddr,
701 	};
702 	struct dst_entry *dst;
703 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
704 
705 	if (saddr) {
706 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
707 		flags |= RT6_LOOKUP_F_HAS_SADDR;
708 	}
709 
710 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
711 	if (dst->error == 0)
712 		return (struct rt6_info *) dst;
713 
714 	dst_release(dst);
715 
716 	return NULL;
717 }
718 
719 EXPORT_SYMBOL(rt6_lookup);
720 
721 /* ip6_ins_rt is called with FREE table->tb6_lock.
722    It takes new route entry, the addition fails by any reason the
723    route is freed. In any case, if caller does not hold it, it may
724    be destroyed.
725  */
726 
__ip6_ins_rt(struct rt6_info * rt,struct nl_info * info)727 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
728 {
729 	int err;
730 	struct fib6_table *table;
731 
732 	table = rt->rt6i_table;
733 	write_lock_bh(&table->tb6_lock);
734 	err = fib6_add(&table->tb6_root, rt, info);
735 	write_unlock_bh(&table->tb6_lock);
736 
737 	return err;
738 }
739 
ip6_ins_rt(struct rt6_info * rt)740 int ip6_ins_rt(struct rt6_info *rt)
741 {
742 	struct nl_info info = {
743 		.nl_net = dev_net(rt->dst.dev),
744 	};
745 	return __ip6_ins_rt(rt, &info);
746 }
747 
rt6_alloc_cow(struct rt6_info * ort,const struct in6_addr * daddr,const struct in6_addr * saddr)748 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
749 				      const struct in6_addr *daddr,
750 				      const struct in6_addr *saddr)
751 {
752 	struct rt6_info *rt;
753 
754 	/*
755 	 *	Clone the route.
756 	 */
757 
758 	rt = ip6_rt_copy(ort, daddr);
759 
760 	if (rt) {
761 		int attempts = !in_softirq();
762 
763 		if (!(rt->rt6i_flags & RTF_GATEWAY)) {
764 			if (ort->rt6i_dst.plen != 128 &&
765 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
766 				rt->rt6i_flags |= RTF_ANYCAST;
767 			rt->rt6i_gateway = *daddr;
768 		}
769 
770 		rt->rt6i_flags |= RTF_CACHE;
771 
772 #ifdef CONFIG_IPV6_SUBTREES
773 		if (rt->rt6i_src.plen && saddr) {
774 			rt->rt6i_src.addr = *saddr;
775 			rt->rt6i_src.plen = 128;
776 		}
777 #endif
778 
779 	retry:
780 		if (rt6_bind_neighbour(rt, rt->dst.dev)) {
781 			struct net *net = dev_net(rt->dst.dev);
782 			int saved_rt_min_interval =
783 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
784 			int saved_rt_elasticity =
785 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
786 
787 			if (attempts-- > 0) {
788 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
789 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
790 
791 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
792 
793 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
794 					saved_rt_elasticity;
795 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
796 					saved_rt_min_interval;
797 				goto retry;
798 			}
799 
800 			if (net_ratelimit())
801 				printk(KERN_WARNING
802 				       "ipv6: Neighbour table overflow.\n");
803 			dst_free(&rt->dst);
804 			return NULL;
805 		}
806 	}
807 
808 	return rt;
809 }
810 
rt6_alloc_clone(struct rt6_info * ort,const struct in6_addr * daddr)811 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
812 					const struct in6_addr *daddr)
813 {
814 	struct rt6_info *rt = ip6_rt_copy(ort, daddr);
815 
816 	if (rt) {
817 		rt->rt6i_flags |= RTF_CACHE;
818 		dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
819 	}
820 	return rt;
821 }
822 
ip6_pol_route(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,int flags,bool input)823 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
824 				      struct flowi6 *fl6, int flags, bool input)
825 {
826 	struct fib6_node *fn;
827 	struct rt6_info *rt, *nrt;
828 	int strict = 0;
829 	int attempts = 3;
830 	int err;
831 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
832 	int local = RTF_NONEXTHOP;
833 
834 	strict |= flags & RT6_LOOKUP_F_IFACE;
835 	if (input)
836 		local |= RTF_LOCAL;
837 
838 relookup:
839 	read_lock_bh(&table->tb6_lock);
840 
841 restart_2:
842 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
843 
844 restart:
845 	rt = rt6_select(fn, oif, strict | reachable);
846 
847 	BACKTRACK(net, &fl6->saddr);
848 	if (rt == net->ipv6.ip6_null_entry ||
849 	    rt->rt6i_flags & RTF_CACHE)
850 		goto out;
851 
852 	dst_hold(&rt->dst);
853 	read_unlock_bh(&table->tb6_lock);
854 
855 	if (!dst_get_neighbour_noref_raw(&rt->dst) &&
856 	    !(rt->rt6i_flags & local))
857 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
858 	else if (!(rt->dst.flags & DST_HOST))
859 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
860 	else
861 		goto out2;
862 
863 	dst_release(&rt->dst);
864 	rt = nrt ? : net->ipv6.ip6_null_entry;
865 
866 	dst_hold(&rt->dst);
867 	if (nrt) {
868 		err = ip6_ins_rt(nrt);
869 		if (!err)
870 			goto out2;
871 	}
872 
873 	if (--attempts <= 0)
874 		goto out2;
875 
876 	/*
877 	 * Race condition! In the gap, when table->tb6_lock was
878 	 * released someone could insert this route.  Relookup.
879 	 */
880 	dst_release(&rt->dst);
881 	goto relookup;
882 
883 out:
884 	if (reachable) {
885 		reachable = 0;
886 		goto restart_2;
887 	}
888 	dst_hold(&rt->dst);
889 	read_unlock_bh(&table->tb6_lock);
890 out2:
891 	rt->dst.lastuse = jiffies;
892 	rt->dst.__use++;
893 
894 	return rt;
895 }
896 
ip6_pol_route_input(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)897 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
898 					    struct flowi6 *fl6, int flags)
899 {
900 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags, true);
901 }
902 
ip6_route_input_lookup(struct net * net,struct net_device * dev,struct flowi6 * fl6,int flags)903 static struct dst_entry *ip6_route_input_lookup(struct net *net,
904 						struct net_device *dev,
905 						struct flowi6 *fl6, int flags)
906 {
907 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
908 		flags |= RT6_LOOKUP_F_IFACE;
909 
910 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
911 }
912 
ip6_route_input(struct sk_buff * skb)913 void ip6_route_input(struct sk_buff *skb)
914 {
915 	const struct ipv6hdr *iph = ipv6_hdr(skb);
916 	struct net *net = dev_net(skb->dev);
917 	int flags = RT6_LOOKUP_F_HAS_SADDR;
918 	struct flowi6 fl6 = {
919 		.flowi6_iif = skb->dev->ifindex,
920 		.daddr = iph->daddr,
921 		.saddr = iph->saddr,
922 		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
923 		.flowi6_mark = skb->mark,
924 		.flowi6_proto = iph->nexthdr,
925 	};
926 
927 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
928 }
929 
ip6_pol_route_output(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)930 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
931 					     struct flowi6 *fl6, int flags)
932 {
933 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags, false);
934 }
935 
ip6_route_output(struct net * net,const struct sock * sk,struct flowi6 * fl6)936 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
937 				    struct flowi6 *fl6)
938 {
939 	int flags = 0;
940 
941 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
942 		flags |= RT6_LOOKUP_F_IFACE;
943 
944 	if (!ipv6_addr_any(&fl6->saddr))
945 		flags |= RT6_LOOKUP_F_HAS_SADDR;
946 	else if (sk)
947 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
948 
949 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
950 }
951 
952 EXPORT_SYMBOL(ip6_route_output);
953 
ip6_blackhole_route(struct net * net,struct dst_entry * dst_orig)954 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
955 {
956 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
957 	struct dst_entry *new = NULL;
958 
959 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
960 	if (rt) {
961 		memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
962 
963 		new = &rt->dst;
964 
965 		new->__use = 1;
966 		new->input = dst_discard;
967 		new->output = dst_discard;
968 
969 		if (dst_metrics_read_only(&ort->dst))
970 			new->_metrics = ort->dst._metrics;
971 		else
972 			dst_copy_metrics(new, &ort->dst);
973 		rt->rt6i_idev = ort->rt6i_idev;
974 		if (rt->rt6i_idev)
975 			in6_dev_hold(rt->rt6i_idev);
976 
977 		rt->rt6i_gateway = ort->rt6i_gateway;
978 		rt->rt6i_flags = ort->rt6i_flags;
979 		rt6_clean_expires(rt);
980 		rt->rt6i_metric = 0;
981 
982 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
983 #ifdef CONFIG_IPV6_SUBTREES
984 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
985 #endif
986 
987 		dst_free(new);
988 	}
989 
990 	dst_release(dst_orig);
991 	return new ? new : ERR_PTR(-ENOMEM);
992 }
993 
994 /*
995  *	Destination cache support functions
996  */
997 
ip6_dst_check(struct dst_entry * dst,u32 cookie)998 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
999 {
1000 	struct rt6_info *rt;
1001 
1002 	rt = (struct rt6_info *) dst;
1003 
1004 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1005 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1006 			if (!rt->rt6i_peer)
1007 				rt6_bind_peer(rt, 0);
1008 			rt->rt6i_peer_genid = rt6_peer_genid();
1009 		}
1010 		return dst;
1011 	}
1012 	return NULL;
1013 }
1014 
ip6_negative_advice(struct dst_entry * dst)1015 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1016 {
1017 	struct rt6_info *rt = (struct rt6_info *) dst;
1018 
1019 	if (rt) {
1020 		if (rt->rt6i_flags & RTF_CACHE) {
1021 			if (rt6_check_expired(rt)) {
1022 				ip6_del_rt(rt);
1023 				dst = NULL;
1024 			}
1025 		} else {
1026 			dst_release(dst);
1027 			dst = NULL;
1028 		}
1029 	}
1030 	return dst;
1031 }
1032 
ip6_link_failure(struct sk_buff * skb)1033 static void ip6_link_failure(struct sk_buff *skb)
1034 {
1035 	struct rt6_info *rt;
1036 
1037 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1038 
1039 	rt = (struct rt6_info *) skb_dst(skb);
1040 	if (rt) {
1041 		if (rt->rt6i_flags & RTF_CACHE) {
1042 			dst_hold(&rt->dst);
1043 			if (ip6_del_rt(rt))
1044 				dst_free(&rt->dst);
1045 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1046 			rt->rt6i_node->fn_sernum = -1;
1047 		}
1048 	}
1049 }
1050 
ip6_rt_update_pmtu(struct dst_entry * dst,u32 mtu)1051 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1052 {
1053 	struct rt6_info *rt6 = (struct rt6_info*)dst;
1054 
1055 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1056 		rt6->rt6i_flags |= RTF_MODIFIED;
1057 		if (mtu < IPV6_MIN_MTU) {
1058 			u32 features = dst_metric(dst, RTAX_FEATURES);
1059 			mtu = IPV6_MIN_MTU;
1060 			features |= RTAX_FEATURE_ALLFRAG;
1061 			dst_metric_set(dst, RTAX_FEATURES, features);
1062 		}
1063 		dst_metric_set(dst, RTAX_MTU, mtu);
1064 	}
1065 }
1066 
ip6_default_advmss(const struct dst_entry * dst)1067 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1068 {
1069 	struct net_device *dev = dst->dev;
1070 	unsigned int mtu = dst_mtu(dst);
1071 	struct net *net = dev_net(dev);
1072 
1073 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1074 
1075 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1076 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1077 
1078 	/*
1079 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1080 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1081 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1082 	 * rely only on pmtu discovery"
1083 	 */
1084 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1085 		mtu = IPV6_MAXPLEN;
1086 	return mtu;
1087 }
1088 
ip6_mtu(const struct dst_entry * dst)1089 static unsigned int ip6_mtu(const struct dst_entry *dst)
1090 {
1091 	struct inet6_dev *idev;
1092 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1093 
1094 	if (mtu)
1095 		goto out;
1096 
1097 	mtu = IPV6_MIN_MTU;
1098 
1099 	rcu_read_lock();
1100 	idev = __in6_dev_get(dst->dev);
1101 	if (idev)
1102 		mtu = idev->cnf.mtu6;
1103 	rcu_read_unlock();
1104 
1105 out:
1106 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1107 }
1108 
1109 static struct dst_entry *icmp6_dst_gc_list;
1110 static DEFINE_SPINLOCK(icmp6_dst_lock);
1111 
icmp6_dst_alloc(struct net_device * dev,struct neighbour * neigh,struct flowi6 * fl6)1112 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1113 				  struct neighbour *neigh,
1114 				  struct flowi6 *fl6)
1115 {
1116 	struct dst_entry *dst;
1117 	struct rt6_info *rt;
1118 	struct inet6_dev *idev = in6_dev_get(dev);
1119 	struct net *net = dev_net(dev);
1120 
1121 	if (unlikely(!idev))
1122 		return ERR_PTR(-ENODEV);
1123 
1124 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1125 	if (unlikely(!rt)) {
1126 		in6_dev_put(idev);
1127 		dst = ERR_PTR(-ENOMEM);
1128 		goto out;
1129 	}
1130 
1131 	if (neigh)
1132 		neigh_hold(neigh);
1133 	else {
1134 		neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
1135 		if (IS_ERR(neigh)) {
1136 			in6_dev_put(idev);
1137 			dst_free(&rt->dst);
1138 			return ERR_CAST(neigh);
1139 		}
1140 	}
1141 
1142 	rt->dst.flags |= DST_HOST;
1143 	rt->dst.output  = ip6_output;
1144 	dst_set_neighbour(&rt->dst, neigh);
1145 	atomic_set(&rt->dst.__refcnt, 1);
1146 	rt->rt6i_dst.addr = fl6->daddr;
1147 	rt->rt6i_dst.plen = 128;
1148 	rt->rt6i_idev     = idev;
1149 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1150 
1151 	spin_lock_bh(&icmp6_dst_lock);
1152 	rt->dst.next = icmp6_dst_gc_list;
1153 	icmp6_dst_gc_list = &rt->dst;
1154 	spin_unlock_bh(&icmp6_dst_lock);
1155 
1156 	fib6_force_start_gc(net);
1157 
1158 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1159 
1160 out:
1161 	return dst;
1162 }
1163 
icmp6_dst_gc(void)1164 int icmp6_dst_gc(void)
1165 {
1166 	struct dst_entry *dst, **pprev;
1167 	int more = 0;
1168 
1169 	spin_lock_bh(&icmp6_dst_lock);
1170 	pprev = &icmp6_dst_gc_list;
1171 
1172 	while ((dst = *pprev) != NULL) {
1173 		if (!atomic_read(&dst->__refcnt)) {
1174 			*pprev = dst->next;
1175 			dst_free(dst);
1176 		} else {
1177 			pprev = &dst->next;
1178 			++more;
1179 		}
1180 	}
1181 
1182 	spin_unlock_bh(&icmp6_dst_lock);
1183 
1184 	return more;
1185 }
1186 
icmp6_clean_all(int (* func)(struct rt6_info * rt,void * arg),void * arg)1187 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1188 			    void *arg)
1189 {
1190 	struct dst_entry *dst, **pprev;
1191 
1192 	spin_lock_bh(&icmp6_dst_lock);
1193 	pprev = &icmp6_dst_gc_list;
1194 	while ((dst = *pprev) != NULL) {
1195 		struct rt6_info *rt = (struct rt6_info *) dst;
1196 		if (func(rt, arg)) {
1197 			*pprev = dst->next;
1198 			dst_free(dst);
1199 		} else {
1200 			pprev = &dst->next;
1201 		}
1202 	}
1203 	spin_unlock_bh(&icmp6_dst_lock);
1204 }
1205 
ip6_dst_gc(struct dst_ops * ops)1206 static int ip6_dst_gc(struct dst_ops *ops)
1207 {
1208 	unsigned long now = jiffies;
1209 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1210 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1211 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1212 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1213 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1214 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1215 	int entries;
1216 
1217 	entries = dst_entries_get_fast(ops);
1218 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1219 	    entries <= rt_max_size)
1220 		goto out;
1221 
1222 	net->ipv6.ip6_rt_gc_expire++;
1223 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1224 	net->ipv6.ip6_rt_last_gc = now;
1225 	entries = dst_entries_get_slow(ops);
1226 	if (entries < ops->gc_thresh)
1227 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1228 out:
1229 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1230 	return entries > rt_max_size;
1231 }
1232 
1233 /* Clean host part of a prefix. Not necessary in radix tree,
1234    but results in cleaner routing tables.
1235 
1236    Remove it only when all the things will work!
1237  */
1238 
ip6_dst_hoplimit(struct dst_entry * dst)1239 int ip6_dst_hoplimit(struct dst_entry *dst)
1240 {
1241 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1242 	if (hoplimit == 0) {
1243 		struct net_device *dev = dst->dev;
1244 		struct inet6_dev *idev;
1245 
1246 		rcu_read_lock();
1247 		idev = __in6_dev_get(dev);
1248 		if (idev)
1249 			hoplimit = idev->cnf.hop_limit;
1250 		else
1251 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1252 		rcu_read_unlock();
1253 	}
1254 	return hoplimit;
1255 }
1256 EXPORT_SYMBOL(ip6_dst_hoplimit);
1257 
1258 /*
1259  *
1260  */
1261 
ip6_route_add(struct fib6_config * cfg)1262 int ip6_route_add(struct fib6_config *cfg)
1263 {
1264 	int err;
1265 	struct net *net = cfg->fc_nlinfo.nl_net;
1266 	struct rt6_info *rt = NULL;
1267 	struct net_device *dev = NULL;
1268 	struct inet6_dev *idev = NULL;
1269 	struct fib6_table *table;
1270 	int addr_type;
1271 
1272 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1273 		return -EINVAL;
1274 #ifndef CONFIG_IPV6_SUBTREES
1275 	if (cfg->fc_src_len)
1276 		return -EINVAL;
1277 #endif
1278 	if (cfg->fc_ifindex) {
1279 		err = -ENODEV;
1280 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1281 		if (!dev)
1282 			goto out;
1283 		idev = in6_dev_get(dev);
1284 		if (!idev)
1285 			goto out;
1286 	}
1287 
1288 	if (cfg->fc_metric == 0)
1289 		cfg->fc_metric = IP6_RT_PRIO_USER;
1290 
1291 	err = -ENOBUFS;
1292 	if (cfg->fc_nlinfo.nlh &&
1293 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1294 		table = fib6_get_table(net, cfg->fc_table);
1295 		if (!table) {
1296 			printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1297 			table = fib6_new_table(net, cfg->fc_table);
1298 		}
1299 	} else {
1300 		table = fib6_new_table(net, cfg->fc_table);
1301 	}
1302 
1303 	if (!table)
1304 		goto out;
1305 
1306 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1307 
1308 	if (!rt) {
1309 		err = -ENOMEM;
1310 		goto out;
1311 	}
1312 
1313 	rt->dst.obsolete = -1;
1314 
1315 	if (cfg->fc_flags & RTF_EXPIRES)
1316 		rt6_set_expires(rt, jiffies +
1317 				clock_t_to_jiffies(cfg->fc_expires));
1318 	else
1319 		rt6_clean_expires(rt);
1320 
1321 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1322 		cfg->fc_protocol = RTPROT_BOOT;
1323 	rt->rt6i_protocol = cfg->fc_protocol;
1324 
1325 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1326 
1327 	if (addr_type & IPV6_ADDR_MULTICAST)
1328 		rt->dst.input = ip6_mc_input;
1329 	else if (cfg->fc_flags & RTF_LOCAL)
1330 		rt->dst.input = ip6_input;
1331 	else
1332 		rt->dst.input = ip6_forward;
1333 
1334 	rt->dst.output = ip6_output;
1335 
1336 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1337 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1338 	if (rt->rt6i_dst.plen == 128)
1339 	       rt->dst.flags |= DST_HOST;
1340 
1341 	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1342 		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1343 		if (!metrics) {
1344 			err = -ENOMEM;
1345 			goto out;
1346 		}
1347 		dst_init_metrics(&rt->dst, metrics, 0);
1348 	}
1349 #ifdef CONFIG_IPV6_SUBTREES
1350 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1351 	rt->rt6i_src.plen = cfg->fc_src_len;
1352 #endif
1353 
1354 	rt->rt6i_metric = cfg->fc_metric;
1355 
1356 	/* We cannot add true routes via loopback here,
1357 	   they would result in kernel looping; promote them to reject routes
1358 	 */
1359 	if ((cfg->fc_flags & RTF_REJECT) ||
1360 	    (dev && (dev->flags & IFF_LOOPBACK) &&
1361 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
1362 	     !(cfg->fc_flags & RTF_LOCAL))) {
1363 		/* hold loopback dev/idev if we haven't done so. */
1364 		if (dev != net->loopback_dev) {
1365 			if (dev) {
1366 				dev_put(dev);
1367 				in6_dev_put(idev);
1368 			}
1369 			dev = net->loopback_dev;
1370 			dev_hold(dev);
1371 			idev = in6_dev_get(dev);
1372 			if (!idev) {
1373 				err = -ENODEV;
1374 				goto out;
1375 			}
1376 		}
1377 		rt->dst.output = ip6_pkt_discard_out;
1378 		rt->dst.input = ip6_pkt_discard;
1379 		rt->dst.error = -ENETUNREACH;
1380 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1381 		goto install_route;
1382 	}
1383 
1384 	if (cfg->fc_flags & RTF_GATEWAY) {
1385 		const struct in6_addr *gw_addr;
1386 		int gwa_type;
1387 
1388 		gw_addr = &cfg->fc_gateway;
1389 		rt->rt6i_gateway = *gw_addr;
1390 		gwa_type = ipv6_addr_type(gw_addr);
1391 
1392 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1393 			struct rt6_info *grt;
1394 
1395 			/* IPv6 strictly inhibits using not link-local
1396 			   addresses as nexthop address.
1397 			   Otherwise, router will not able to send redirects.
1398 			   It is very good, but in some (rare!) circumstances
1399 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1400 			   some exceptions. --ANK
1401 			 */
1402 			err = -EINVAL;
1403 			if (!(gwa_type & IPV6_ADDR_UNICAST))
1404 				goto out;
1405 
1406 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1407 
1408 			err = -EHOSTUNREACH;
1409 			if (!grt)
1410 				goto out;
1411 			if (dev) {
1412 				if (dev != grt->dst.dev) {
1413 					dst_release(&grt->dst);
1414 					goto out;
1415 				}
1416 			} else {
1417 				dev = grt->dst.dev;
1418 				idev = grt->rt6i_idev;
1419 				dev_hold(dev);
1420 				in6_dev_hold(grt->rt6i_idev);
1421 			}
1422 			if (!(grt->rt6i_flags & RTF_GATEWAY))
1423 				err = 0;
1424 			dst_release(&grt->dst);
1425 
1426 			if (err)
1427 				goto out;
1428 		}
1429 		err = -EINVAL;
1430 		if (!dev || (dev->flags & IFF_LOOPBACK))
1431 			goto out;
1432 	}
1433 
1434 	err = -ENODEV;
1435 	if (!dev)
1436 		goto out;
1437 
1438 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1439 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1440 			err = -EINVAL;
1441 			goto out;
1442 		}
1443 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1444 		rt->rt6i_prefsrc.plen = 128;
1445 	} else
1446 		rt->rt6i_prefsrc.plen = 0;
1447 
1448 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1449 		err = rt6_bind_neighbour(rt, dev);
1450 		if (err)
1451 			goto out;
1452 	}
1453 
1454 	rt->rt6i_flags = cfg->fc_flags;
1455 
1456 install_route:
1457 	if (cfg->fc_mx) {
1458 		struct nlattr *nla;
1459 		int remaining;
1460 
1461 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1462 			int type = nla_type(nla);
1463 
1464 			if (type) {
1465 				if (type > RTAX_MAX) {
1466 					err = -EINVAL;
1467 					goto out;
1468 				}
1469 
1470 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1471 			}
1472 		}
1473 	}
1474 
1475 	rt->dst.dev = dev;
1476 	rt->rt6i_idev = idev;
1477 	rt->rt6i_table = table;
1478 
1479 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1480 
1481 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1482 
1483 out:
1484 	if (dev)
1485 		dev_put(dev);
1486 	if (idev)
1487 		in6_dev_put(idev);
1488 	if (rt)
1489 		dst_free(&rt->dst);
1490 	return err;
1491 }
1492 
__ip6_del_rt(struct rt6_info * rt,struct nl_info * info)1493 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1494 {
1495 	int err;
1496 	struct fib6_table *table;
1497 	struct net *net = dev_net(rt->dst.dev);
1498 
1499 	if (rt == net->ipv6.ip6_null_entry) {
1500 		err = -ENOENT;
1501 		goto out;
1502 	}
1503 
1504 	table = rt->rt6i_table;
1505 	write_lock_bh(&table->tb6_lock);
1506 	err = fib6_del(rt, info);
1507 	write_unlock_bh(&table->tb6_lock);
1508 
1509 out:
1510 	dst_release(&rt->dst);
1511 	return err;
1512 }
1513 
ip6_del_rt(struct rt6_info * rt)1514 int ip6_del_rt(struct rt6_info *rt)
1515 {
1516 	struct nl_info info = {
1517 		.nl_net = dev_net(rt->dst.dev),
1518 	};
1519 	return __ip6_del_rt(rt, &info);
1520 }
1521 
ip6_route_del(struct fib6_config * cfg)1522 static int ip6_route_del(struct fib6_config *cfg)
1523 {
1524 	struct fib6_table *table;
1525 	struct fib6_node *fn;
1526 	struct rt6_info *rt;
1527 	int err = -ESRCH;
1528 
1529 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1530 	if (!table)
1531 		return err;
1532 
1533 	read_lock_bh(&table->tb6_lock);
1534 
1535 	fn = fib6_locate(&table->tb6_root,
1536 			 &cfg->fc_dst, cfg->fc_dst_len,
1537 			 &cfg->fc_src, cfg->fc_src_len);
1538 
1539 	if (fn) {
1540 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1541 			if (cfg->fc_ifindex &&
1542 			    (!rt->dst.dev ||
1543 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
1544 				continue;
1545 			if (cfg->fc_flags & RTF_GATEWAY &&
1546 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1547 				continue;
1548 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1549 				continue;
1550 			dst_hold(&rt->dst);
1551 			read_unlock_bh(&table->tb6_lock);
1552 
1553 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1554 		}
1555 	}
1556 	read_unlock_bh(&table->tb6_lock);
1557 
1558 	return err;
1559 }
1560 
1561 /*
1562  *	Handle redirects
1563  */
1564 struct ip6rd_flowi {
1565 	struct flowi6 fl6;
1566 	struct in6_addr gateway;
1567 };
1568 
__ip6_route_redirect(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1569 static struct rt6_info *__ip6_route_redirect(struct net *net,
1570 					     struct fib6_table *table,
1571 					     struct flowi6 *fl6,
1572 					     int flags)
1573 {
1574 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1575 	struct rt6_info *rt;
1576 	struct fib6_node *fn;
1577 
1578 	/*
1579 	 * Get the "current" route for this destination and
1580 	 * check if the redirect has come from approriate router.
1581 	 *
1582 	 * RFC 2461 specifies that redirects should only be
1583 	 * accepted if they come from the nexthop to the target.
1584 	 * Due to the way the routes are chosen, this notion
1585 	 * is a bit fuzzy and one might need to check all possible
1586 	 * routes.
1587 	 */
1588 
1589 	read_lock_bh(&table->tb6_lock);
1590 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1591 restart:
1592 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1593 		/*
1594 		 * Current route is on-link; redirect is always invalid.
1595 		 *
1596 		 * Seems, previous statement is not true. It could
1597 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1598 		 * But then router serving it might decide, that we should
1599 		 * know truth 8)8) --ANK (980726).
1600 		 */
1601 		if (rt6_check_expired(rt))
1602 			continue;
1603 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1604 			continue;
1605 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1606 			continue;
1607 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1608 			continue;
1609 		break;
1610 	}
1611 
1612 	if (!rt)
1613 		rt = net->ipv6.ip6_null_entry;
1614 	BACKTRACK(net, &fl6->saddr);
1615 out:
1616 	dst_hold(&rt->dst);
1617 
1618 	read_unlock_bh(&table->tb6_lock);
1619 
1620 	return rt;
1621 };
1622 
ip6_route_redirect(const struct in6_addr * dest,const struct in6_addr * src,const struct in6_addr * gateway,struct net_device * dev)1623 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1624 					   const struct in6_addr *src,
1625 					   const struct in6_addr *gateway,
1626 					   struct net_device *dev)
1627 {
1628 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1629 	struct net *net = dev_net(dev);
1630 	struct ip6rd_flowi rdfl = {
1631 		.fl6 = {
1632 			.flowi6_oif = dev->ifindex,
1633 			.daddr = *dest,
1634 			.saddr = *src,
1635 		},
1636 	};
1637 
1638 	rdfl.gateway = *gateway;
1639 
1640 	if (rt6_need_strict(dest))
1641 		flags |= RT6_LOOKUP_F_IFACE;
1642 
1643 	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1644 						   flags, __ip6_route_redirect);
1645 }
1646 
rt6_redirect(const struct in6_addr * dest,const struct in6_addr * src,const struct in6_addr * saddr,struct neighbour * neigh,u8 * lladdr,int on_link)1647 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1648 		  const struct in6_addr *saddr,
1649 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1650 {
1651 	struct rt6_info *rt, *nrt = NULL;
1652 	struct netevent_redirect netevent;
1653 	struct net *net = dev_net(neigh->dev);
1654 
1655 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1656 
1657 	if (rt == net->ipv6.ip6_null_entry) {
1658 		if (net_ratelimit())
1659 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1660 			       "for redirect target\n");
1661 		goto out;
1662 	}
1663 
1664 	/*
1665 	 *	We have finally decided to accept it.
1666 	 */
1667 
1668 	neigh_update(neigh, lladdr, NUD_STALE,
1669 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1670 		     NEIGH_UPDATE_F_OVERRIDE|
1671 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1672 				     NEIGH_UPDATE_F_ISROUTER))
1673 		     );
1674 
1675 	/*
1676 	 * Redirect received -> path was valid.
1677 	 * Look, redirects are sent only in response to data packets,
1678 	 * so that this nexthop apparently is reachable. --ANK
1679 	 */
1680 	dst_confirm(&rt->dst);
1681 
1682 	/* Duplicate redirect: silently ignore. */
1683 	if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1684 		goto out;
1685 
1686 	nrt = ip6_rt_copy(rt, dest);
1687 	if (!nrt)
1688 		goto out;
1689 
1690 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1691 	if (on_link)
1692 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1693 
1694 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1695 	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1696 
1697 	if (ip6_ins_rt(nrt))
1698 		goto out;
1699 
1700 	netevent.old = &rt->dst;
1701 	netevent.new = &nrt->dst;
1702 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1703 
1704 	if (rt->rt6i_flags & RTF_CACHE) {
1705 		ip6_del_rt(rt);
1706 		return;
1707 	}
1708 
1709 out:
1710 	dst_release(&rt->dst);
1711 }
1712 
1713 /*
1714  *	Handle ICMP "packet too big" messages
1715  *	i.e. Path MTU discovery
1716  */
1717 
rt6_do_pmtu_disc(const struct in6_addr * daddr,const struct in6_addr * saddr,struct net * net,u32 pmtu,int ifindex)1718 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1719 			     struct net *net, u32 pmtu, int ifindex)
1720 {
1721 	struct rt6_info *rt, *nrt;
1722 	int allfrag = 0;
1723 again:
1724 	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1725 	if (!rt)
1726 		return;
1727 
1728 	if (rt6_check_expired(rt)) {
1729 		ip6_del_rt(rt);
1730 		goto again;
1731 	}
1732 
1733 	if (pmtu >= dst_mtu(&rt->dst))
1734 		goto out;
1735 
1736 	if (pmtu < IPV6_MIN_MTU) {
1737 		/*
1738 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1739 		 * MTU (1280) and a fragment header should always be included
1740 		 * after a node receiving Too Big message reporting PMTU is
1741 		 * less than the IPv6 Minimum Link MTU.
1742 		 */
1743 		pmtu = IPV6_MIN_MTU;
1744 		allfrag = 1;
1745 	}
1746 
1747 	/* New mtu received -> path was valid.
1748 	   They are sent only in response to data packets,
1749 	   so that this nexthop apparently is reachable. --ANK
1750 	 */
1751 	dst_confirm(&rt->dst);
1752 
1753 	/* Host route. If it is static, it would be better
1754 	   not to override it, but add new one, so that
1755 	   when cache entry will expire old pmtu
1756 	   would return automatically.
1757 	 */
1758 	if (rt->rt6i_flags & RTF_CACHE) {
1759 		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1760 		if (allfrag) {
1761 			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1762 			features |= RTAX_FEATURE_ALLFRAG;
1763 			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1764 		}
1765 		rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1766 		rt->rt6i_flags |= RTF_MODIFIED;
1767 		goto out;
1768 	}
1769 
1770 	/* Network route.
1771 	   Two cases are possible:
1772 	   1. It is connected route. Action: COW
1773 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1774 	 */
1775 	if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1776 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1777 	else
1778 		nrt = rt6_alloc_clone(rt, daddr);
1779 
1780 	if (nrt) {
1781 		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1782 		if (allfrag) {
1783 			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1784 			features |= RTAX_FEATURE_ALLFRAG;
1785 			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1786 		}
1787 
1788 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1789 		 * happened within 5 mins, the recommended timer is 10 mins.
1790 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1791 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1792 		 * and detecting PMTU increase will be automatically happened.
1793 		 */
1794 		rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1795 		nrt->rt6i_flags |= RTF_DYNAMIC;
1796 		ip6_ins_rt(nrt);
1797 	}
1798 out:
1799 	dst_release(&rt->dst);
1800 }
1801 
rt6_pmtu_discovery(const struct in6_addr * daddr,const struct in6_addr * saddr,struct net_device * dev,u32 pmtu)1802 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1803 			struct net_device *dev, u32 pmtu)
1804 {
1805 	struct net *net = dev_net(dev);
1806 
1807 	/*
1808 	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1809 	 * is sending along the path" that caused the Packet Too Big message.
1810 	 * Since it's not possible in the general case to determine which
1811 	 * interface was used to send the original packet, we update the MTU
1812 	 * on the interface that will be used to send future packets. We also
1813 	 * update the MTU on the interface that received the Packet Too Big in
1814 	 * case the original packet was forced out that interface with
1815 	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1816 	 * correct behaviour, which would be to update the MTU on all
1817 	 * interfaces.
1818 	 */
1819 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1820 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1821 }
1822 
1823 /*
1824  *	Misc support functions
1825  */
1826 
ip6_rt_copy(struct rt6_info * ort,const struct in6_addr * dest)1827 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1828 				    const struct in6_addr *dest)
1829 {
1830 	struct net *net = dev_net(ort->dst.dev);
1831 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1832 					    ort->dst.dev, 0);
1833 
1834 	if (rt) {
1835 		rt->dst.input = ort->dst.input;
1836 		rt->dst.output = ort->dst.output;
1837 		rt->dst.flags |= DST_HOST;
1838 
1839 		rt->rt6i_dst.addr = *dest;
1840 		rt->rt6i_dst.plen = 128;
1841 		dst_copy_metrics(&rt->dst, &ort->dst);
1842 		rt->dst.error = ort->dst.error;
1843 		rt->rt6i_idev = ort->rt6i_idev;
1844 		if (rt->rt6i_idev)
1845 			in6_dev_hold(rt->rt6i_idev);
1846 		rt->dst.lastuse = jiffies;
1847 
1848 		rt->rt6i_gateway = ort->rt6i_gateway;
1849 		rt->rt6i_flags = ort->rt6i_flags;
1850 		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1851 		    (RTF_DEFAULT | RTF_ADDRCONF))
1852 			rt6_set_from(rt, ort);
1853 		else
1854 			rt6_clean_expires(rt);
1855 		rt->rt6i_metric = 0;
1856 
1857 #ifdef CONFIG_IPV6_SUBTREES
1858 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1859 #endif
1860 		memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1861 		rt->rt6i_table = ort->rt6i_table;
1862 	}
1863 	return rt;
1864 }
1865 
1866 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_get_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,int ifindex)1867 static struct rt6_info *rt6_get_route_info(struct net *net,
1868 					   const struct in6_addr *prefix, int prefixlen,
1869 					   const struct in6_addr *gwaddr, int ifindex)
1870 {
1871 	struct fib6_node *fn;
1872 	struct rt6_info *rt = NULL;
1873 	struct fib6_table *table;
1874 
1875 	table = fib6_get_table(net, RT6_TABLE_INFO);
1876 	if (!table)
1877 		return NULL;
1878 
1879 	write_lock_bh(&table->tb6_lock);
1880 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1881 	if (!fn)
1882 		goto out;
1883 
1884 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1885 		if (rt->dst.dev->ifindex != ifindex)
1886 			continue;
1887 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1888 			continue;
1889 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1890 			continue;
1891 		dst_hold(&rt->dst);
1892 		break;
1893 	}
1894 out:
1895 	write_unlock_bh(&table->tb6_lock);
1896 	return rt;
1897 }
1898 
rt6_add_route_info(struct net * net,const struct in6_addr * prefix,int prefixlen,const struct in6_addr * gwaddr,int ifindex,unsigned pref)1899 static struct rt6_info *rt6_add_route_info(struct net *net,
1900 					   const struct in6_addr *prefix, int prefixlen,
1901 					   const struct in6_addr *gwaddr, int ifindex,
1902 					   unsigned pref)
1903 {
1904 	struct fib6_config cfg = {
1905 		.fc_table	= RT6_TABLE_INFO,
1906 		.fc_metric	= IP6_RT_PRIO_USER,
1907 		.fc_ifindex	= ifindex,
1908 		.fc_dst_len	= prefixlen,
1909 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1910 				  RTF_UP | RTF_PREF(pref),
1911 		.fc_nlinfo.pid = 0,
1912 		.fc_nlinfo.nlh = NULL,
1913 		.fc_nlinfo.nl_net = net,
1914 	};
1915 
1916 	cfg.fc_dst = *prefix;
1917 	cfg.fc_gateway = *gwaddr;
1918 
1919 	/* We should treat it as a default route if prefix length is 0. */
1920 	if (!prefixlen)
1921 		cfg.fc_flags |= RTF_DEFAULT;
1922 
1923 	ip6_route_add(&cfg);
1924 
1925 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1926 }
1927 #endif
1928 
rt6_get_dflt_router(const struct in6_addr * addr,struct net_device * dev)1929 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1930 {
1931 	struct rt6_info *rt;
1932 	struct fib6_table *table;
1933 
1934 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1935 	if (!table)
1936 		return NULL;
1937 
1938 	write_lock_bh(&table->tb6_lock);
1939 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1940 		if (dev == rt->dst.dev &&
1941 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1942 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1943 			break;
1944 	}
1945 	if (rt)
1946 		dst_hold(&rt->dst);
1947 	write_unlock_bh(&table->tb6_lock);
1948 	return rt;
1949 }
1950 
rt6_add_dflt_router(const struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)1951 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1952 				     struct net_device *dev,
1953 				     unsigned int pref)
1954 {
1955 	struct fib6_config cfg = {
1956 		.fc_table	= RT6_TABLE_DFLT,
1957 		.fc_metric	= IP6_RT_PRIO_USER,
1958 		.fc_ifindex	= dev->ifindex,
1959 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1960 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1961 		.fc_nlinfo.pid = 0,
1962 		.fc_nlinfo.nlh = NULL,
1963 		.fc_nlinfo.nl_net = dev_net(dev),
1964 	};
1965 
1966 	cfg.fc_gateway = *gwaddr;
1967 
1968 	ip6_route_add(&cfg);
1969 
1970 	return rt6_get_dflt_router(gwaddr, dev);
1971 }
1972 
rt6_purge_dflt_routers(struct net * net)1973 void rt6_purge_dflt_routers(struct net *net)
1974 {
1975 	struct rt6_info *rt;
1976 	struct fib6_table *table;
1977 
1978 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1979 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1980 	if (!table)
1981 		return;
1982 
1983 restart:
1984 	read_lock_bh(&table->tb6_lock);
1985 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1986 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
1987 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
1988 			dst_hold(&rt->dst);
1989 			read_unlock_bh(&table->tb6_lock);
1990 			ip6_del_rt(rt);
1991 			goto restart;
1992 		}
1993 	}
1994 	read_unlock_bh(&table->tb6_lock);
1995 }
1996 
rtmsg_to_fib6_config(struct net * net,struct in6_rtmsg * rtmsg,struct fib6_config * cfg)1997 static void rtmsg_to_fib6_config(struct net *net,
1998 				 struct in6_rtmsg *rtmsg,
1999 				 struct fib6_config *cfg)
2000 {
2001 	memset(cfg, 0, sizeof(*cfg));
2002 
2003 	cfg->fc_table = RT6_TABLE_MAIN;
2004 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2005 	cfg->fc_metric = rtmsg->rtmsg_metric;
2006 	cfg->fc_expires = rtmsg->rtmsg_info;
2007 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2008 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
2009 	cfg->fc_flags = rtmsg->rtmsg_flags;
2010 
2011 	cfg->fc_nlinfo.nl_net = net;
2012 
2013 	cfg->fc_dst = rtmsg->rtmsg_dst;
2014 	cfg->fc_src = rtmsg->rtmsg_src;
2015 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
2016 }
2017 
ipv6_route_ioctl(struct net * net,unsigned int cmd,void __user * arg)2018 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2019 {
2020 	struct fib6_config cfg;
2021 	struct in6_rtmsg rtmsg;
2022 	int err;
2023 
2024 	switch(cmd) {
2025 	case SIOCADDRT:		/* Add a route */
2026 	case SIOCDELRT:		/* Delete a route */
2027 		if (!capable(CAP_NET_ADMIN))
2028 			return -EPERM;
2029 		err = copy_from_user(&rtmsg, arg,
2030 				     sizeof(struct in6_rtmsg));
2031 		if (err)
2032 			return -EFAULT;
2033 
2034 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2035 
2036 		rtnl_lock();
2037 		switch (cmd) {
2038 		case SIOCADDRT:
2039 			err = ip6_route_add(&cfg);
2040 			break;
2041 		case SIOCDELRT:
2042 			err = ip6_route_del(&cfg);
2043 			break;
2044 		default:
2045 			err = -EINVAL;
2046 		}
2047 		rtnl_unlock();
2048 
2049 		return err;
2050 	}
2051 
2052 	return -EINVAL;
2053 }
2054 
2055 /*
2056  *	Drop the packet on the floor
2057  */
2058 
ip6_pkt_drop(struct sk_buff * skb,u8 code,int ipstats_mib_noroutes)2059 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2060 {
2061 	int type;
2062 	struct dst_entry *dst = skb_dst(skb);
2063 	switch (ipstats_mib_noroutes) {
2064 	case IPSTATS_MIB_INNOROUTES:
2065 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2066 		if (type == IPV6_ADDR_ANY) {
2067 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2068 				      IPSTATS_MIB_INADDRERRORS);
2069 			break;
2070 		}
2071 		/* FALLTHROUGH */
2072 	case IPSTATS_MIB_OUTNOROUTES:
2073 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2074 			      ipstats_mib_noroutes);
2075 		break;
2076 	}
2077 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2078 	kfree_skb(skb);
2079 	return 0;
2080 }
2081 
ip6_pkt_discard(struct sk_buff * skb)2082 static int ip6_pkt_discard(struct sk_buff *skb)
2083 {
2084 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2085 }
2086 
ip6_pkt_discard_out(struct sk_buff * skb)2087 static int ip6_pkt_discard_out(struct sk_buff *skb)
2088 {
2089 	skb->dev = skb_dst(skb)->dev;
2090 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2091 }
2092 
2093 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2094 
ip6_pkt_prohibit(struct sk_buff * skb)2095 static int ip6_pkt_prohibit(struct sk_buff *skb)
2096 {
2097 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2098 }
2099 
ip6_pkt_prohibit_out(struct sk_buff * skb)2100 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2101 {
2102 	skb->dev = skb_dst(skb)->dev;
2103 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2104 }
2105 
2106 #endif
2107 
2108 /*
2109  *	Allocate a dst for local (unicast / anycast) address.
2110  */
2111 
addrconf_dst_alloc(struct inet6_dev * idev,const struct in6_addr * addr,bool anycast)2112 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2113 				    const struct in6_addr *addr,
2114 				    bool anycast)
2115 {
2116 	struct net *net = dev_net(idev->dev);
2117 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2118 					    net->loopback_dev, DST_NOCOUNT);
2119 	int err;
2120 
2121 	if (!rt)
2122 		return ERR_PTR(-ENOMEM);
2123 
2124 	in6_dev_hold(idev);
2125 
2126 	rt->dst.flags |= DST_HOST;
2127 	rt->dst.input = ip6_input;
2128 	rt->dst.output = ip6_output;
2129 	rt->rt6i_idev = idev;
2130 	rt->dst.obsolete = -1;
2131 
2132 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2133 	if (anycast)
2134 		rt->rt6i_flags |= RTF_ANYCAST;
2135 	else
2136 		rt->rt6i_flags |= RTF_LOCAL;
2137 	err = rt6_bind_neighbour(rt, rt->dst.dev);
2138 	if (err) {
2139 		dst_free(&rt->dst);
2140 		return ERR_PTR(err);
2141 	}
2142 
2143 	rt->rt6i_dst.addr = *addr;
2144 	rt->rt6i_dst.plen = 128;
2145 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2146 
2147 	atomic_set(&rt->dst.__refcnt, 1);
2148 
2149 	return rt;
2150 }
2151 
ip6_route_get_saddr(struct net * net,struct rt6_info * rt,const struct in6_addr * daddr,unsigned int prefs,struct in6_addr * saddr)2152 int ip6_route_get_saddr(struct net *net,
2153 			struct rt6_info *rt,
2154 			const struct in6_addr *daddr,
2155 			unsigned int prefs,
2156 			struct in6_addr *saddr)
2157 {
2158 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2159 	int err = 0;
2160 	if (rt->rt6i_prefsrc.plen)
2161 		*saddr = rt->rt6i_prefsrc.addr;
2162 	else
2163 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2164 					 daddr, prefs, saddr);
2165 	return err;
2166 }
2167 
2168 /* remove deleted ip from prefsrc entries */
2169 struct arg_dev_net_ip {
2170 	struct net_device *dev;
2171 	struct net *net;
2172 	struct in6_addr *addr;
2173 };
2174 
fib6_remove_prefsrc(struct rt6_info * rt,void * arg)2175 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2176 {
2177 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2178 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2179 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2180 
2181 	if (((void *)rt->dst.dev == dev || !dev) &&
2182 	    rt != net->ipv6.ip6_null_entry &&
2183 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2184 		/* remove prefsrc entry */
2185 		rt->rt6i_prefsrc.plen = 0;
2186 	}
2187 	return 0;
2188 }
2189 
rt6_remove_prefsrc(struct inet6_ifaddr * ifp)2190 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2191 {
2192 	struct net *net = dev_net(ifp->idev->dev);
2193 	struct arg_dev_net_ip adni = {
2194 		.dev = ifp->idev->dev,
2195 		.net = net,
2196 		.addr = &ifp->addr,
2197 	};
2198 	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2199 }
2200 
2201 struct arg_dev_net {
2202 	struct net_device *dev;
2203 	struct net *net;
2204 };
2205 
fib6_ifdown(struct rt6_info * rt,void * arg)2206 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2207 {
2208 	const struct arg_dev_net *adn = arg;
2209 	const struct net_device *dev = adn->dev;
2210 
2211 	if ((rt->dst.dev == dev || !dev) &&
2212 	    rt != adn->net->ipv6.ip6_null_entry)
2213 		return -1;
2214 
2215 	return 0;
2216 }
2217 
rt6_ifdown(struct net * net,struct net_device * dev)2218 void rt6_ifdown(struct net *net, struct net_device *dev)
2219 {
2220 	struct arg_dev_net adn = {
2221 		.dev = dev,
2222 		.net = net,
2223 	};
2224 
2225 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2226 	icmp6_clean_all(fib6_ifdown, &adn);
2227 }
2228 
2229 struct rt6_mtu_change_arg
2230 {
2231 	struct net_device *dev;
2232 	unsigned mtu;
2233 };
2234 
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)2235 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2236 {
2237 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2238 	struct inet6_dev *idev;
2239 
2240 	/* In IPv6 pmtu discovery is not optional,
2241 	   so that RTAX_MTU lock cannot disable it.
2242 	   We still use this lock to block changes
2243 	   caused by addrconf/ndisc.
2244 	*/
2245 
2246 	idev = __in6_dev_get(arg->dev);
2247 	if (!idev)
2248 		return 0;
2249 
2250 	/* For administrative MTU increase, there is no way to discover
2251 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2252 	   Since RFC 1981 doesn't include administrative MTU increase
2253 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2254 	 */
2255 	/*
2256 	   If new MTU is less than route PMTU, this new MTU will be the
2257 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2258 	   decreases; if new MTU is greater than route PMTU, and the
2259 	   old MTU is the lowest MTU in the path, update the route PMTU
2260 	   to reflect the increase. In this case if the other nodes' MTU
2261 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2262 	   PMTU discouvery.
2263 	 */
2264 	if (rt->dst.dev == arg->dev &&
2265 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2266 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2267 	     (dst_mtu(&rt->dst) < arg->mtu &&
2268 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2269 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2270 	}
2271 	return 0;
2272 }
2273 
rt6_mtu_change(struct net_device * dev,unsigned mtu)2274 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2275 {
2276 	struct rt6_mtu_change_arg arg = {
2277 		.dev = dev,
2278 		.mtu = mtu,
2279 	};
2280 
2281 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2282 }
2283 
2284 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2285 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2286 	[RTA_OIF]               = { .type = NLA_U32 },
2287 	[RTA_IIF]		= { .type = NLA_U32 },
2288 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2289 	[RTA_METRICS]           = { .type = NLA_NESTED },
2290 };
2291 
rtm_to_fib6_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct fib6_config * cfg)2292 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2293 			      struct fib6_config *cfg)
2294 {
2295 	struct rtmsg *rtm;
2296 	struct nlattr *tb[RTA_MAX+1];
2297 	int err;
2298 
2299 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2300 	if (err < 0)
2301 		goto errout;
2302 
2303 	err = -EINVAL;
2304 	rtm = nlmsg_data(nlh);
2305 	memset(cfg, 0, sizeof(*cfg));
2306 
2307 	cfg->fc_table = rtm->rtm_table;
2308 	cfg->fc_dst_len = rtm->rtm_dst_len;
2309 	cfg->fc_src_len = rtm->rtm_src_len;
2310 	cfg->fc_flags = RTF_UP;
2311 	cfg->fc_protocol = rtm->rtm_protocol;
2312 
2313 	if (rtm->rtm_type == RTN_UNREACHABLE)
2314 		cfg->fc_flags |= RTF_REJECT;
2315 
2316 	if (rtm->rtm_type == RTN_LOCAL)
2317 		cfg->fc_flags |= RTF_LOCAL;
2318 
2319 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2320 	cfg->fc_nlinfo.nlh = nlh;
2321 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2322 
2323 	if (tb[RTA_GATEWAY]) {
2324 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2325 		cfg->fc_flags |= RTF_GATEWAY;
2326 	}
2327 
2328 	if (tb[RTA_DST]) {
2329 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2330 
2331 		if (nla_len(tb[RTA_DST]) < plen)
2332 			goto errout;
2333 
2334 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2335 	}
2336 
2337 	if (tb[RTA_SRC]) {
2338 		int plen = (rtm->rtm_src_len + 7) >> 3;
2339 
2340 		if (nla_len(tb[RTA_SRC]) < plen)
2341 			goto errout;
2342 
2343 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2344 	}
2345 
2346 	if (tb[RTA_PREFSRC])
2347 		nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2348 
2349 	if (tb[RTA_OIF])
2350 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2351 
2352 	if (tb[RTA_PRIORITY])
2353 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2354 
2355 	if (tb[RTA_METRICS]) {
2356 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2357 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2358 	}
2359 
2360 	if (tb[RTA_TABLE])
2361 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2362 
2363 	err = 0;
2364 errout:
2365 	return err;
2366 }
2367 
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)2368 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2369 {
2370 	struct fib6_config cfg;
2371 	int err;
2372 
2373 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2374 	if (err < 0)
2375 		return err;
2376 
2377 	return ip6_route_del(&cfg);
2378 }
2379 
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)2380 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2381 {
2382 	struct fib6_config cfg;
2383 	int err;
2384 
2385 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2386 	if (err < 0)
2387 		return err;
2388 
2389 	return ip6_route_add(&cfg);
2390 }
2391 
rt6_nlmsg_size(void)2392 static inline size_t rt6_nlmsg_size(void)
2393 {
2394 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2395 	       + nla_total_size(16) /* RTA_SRC */
2396 	       + nla_total_size(16) /* RTA_DST */
2397 	       + nla_total_size(16) /* RTA_GATEWAY */
2398 	       + nla_total_size(16) /* RTA_PREFSRC */
2399 	       + nla_total_size(4) /* RTA_TABLE */
2400 	       + nla_total_size(4) /* RTA_IIF */
2401 	       + nla_total_size(4) /* RTA_OIF */
2402 	       + nla_total_size(4) /* RTA_PRIORITY */
2403 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2404 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2405 }
2406 
rt6_fill_node(struct net * net,struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 pid,u32 seq,int prefix,int nowait,unsigned int flags)2407 static int rt6_fill_node(struct net *net,
2408 			 struct sk_buff *skb, struct rt6_info *rt,
2409 			 struct in6_addr *dst, struct in6_addr *src,
2410 			 int iif, int type, u32 pid, u32 seq,
2411 			 int prefix, int nowait, unsigned int flags)
2412 {
2413 	const struct inet_peer *peer;
2414 	struct rtmsg *rtm;
2415 	struct nlmsghdr *nlh;
2416 	long expires;
2417 	u32 table;
2418 	struct neighbour *n;
2419 	u32 ts, tsage;
2420 
2421 	if (prefix) {	/* user wants prefix routes only */
2422 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2423 			/* success since this is not a prefix route */
2424 			return 1;
2425 		}
2426 	}
2427 
2428 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2429 	if (!nlh)
2430 		return -EMSGSIZE;
2431 
2432 	rtm = nlmsg_data(nlh);
2433 	rtm->rtm_family = AF_INET6;
2434 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2435 	rtm->rtm_src_len = rt->rt6i_src.plen;
2436 	rtm->rtm_tos = 0;
2437 	if (rt->rt6i_table)
2438 		table = rt->rt6i_table->tb6_id;
2439 	else
2440 		table = RT6_TABLE_UNSPEC;
2441 	rtm->rtm_table = table;
2442 	NLA_PUT_U32(skb, RTA_TABLE, table);
2443 	if (rt->rt6i_flags & RTF_REJECT)
2444 		rtm->rtm_type = RTN_UNREACHABLE;
2445 	else if (rt->rt6i_flags & RTF_LOCAL)
2446 		rtm->rtm_type = RTN_LOCAL;
2447 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2448 		rtm->rtm_type = RTN_LOCAL;
2449 	else
2450 		rtm->rtm_type = RTN_UNICAST;
2451 	rtm->rtm_flags = 0;
2452 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2453 	rtm->rtm_protocol = rt->rt6i_protocol;
2454 	if (rt->rt6i_flags & RTF_DYNAMIC)
2455 		rtm->rtm_protocol = RTPROT_REDIRECT;
2456 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2457 		rtm->rtm_protocol = RTPROT_KERNEL;
2458 	else if (rt->rt6i_flags & RTF_DEFAULT)
2459 		rtm->rtm_protocol = RTPROT_RA;
2460 
2461 	if (rt->rt6i_flags & RTF_CACHE)
2462 		rtm->rtm_flags |= RTM_F_CLONED;
2463 
2464 	if (dst) {
2465 		NLA_PUT(skb, RTA_DST, 16, dst);
2466 		rtm->rtm_dst_len = 128;
2467 	} else if (rtm->rtm_dst_len)
2468 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2469 #ifdef CONFIG_IPV6_SUBTREES
2470 	if (src) {
2471 		NLA_PUT(skb, RTA_SRC, 16, src);
2472 		rtm->rtm_src_len = 128;
2473 	} else if (rtm->rtm_src_len)
2474 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2475 #endif
2476 	if (iif) {
2477 #ifdef CONFIG_IPV6_MROUTE
2478 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2479 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2480 			if (err <= 0) {
2481 				if (!nowait) {
2482 					if (err == 0)
2483 						return 0;
2484 					goto nla_put_failure;
2485 				} else {
2486 					if (err == -EMSGSIZE)
2487 						goto nla_put_failure;
2488 				}
2489 			}
2490 		} else
2491 #endif
2492 			NLA_PUT_U32(skb, RTA_IIF, iif);
2493 	} else if (dst) {
2494 		struct in6_addr saddr_buf;
2495 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2496 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2497 	}
2498 
2499 	if (rt->rt6i_prefsrc.plen) {
2500 		struct in6_addr saddr_buf;
2501 		saddr_buf = rt->rt6i_prefsrc.addr;
2502 		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2503 	}
2504 
2505 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2506 		goto nla_put_failure;
2507 
2508 	rcu_read_lock();
2509 	n = dst_get_neighbour_noref(&rt->dst);
2510 	if (n) {
2511 		if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2512 			rcu_read_unlock();
2513 			goto nla_put_failure;
2514 		}
2515 	}
2516 	rcu_read_unlock();
2517 
2518 	if (rt->dst.dev)
2519 		NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2520 
2521 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2522 
2523 	if (!(rt->rt6i_flags & RTF_EXPIRES))
2524 		expires = 0;
2525 	else if (rt->dst.expires - jiffies < INT_MAX)
2526 		expires = rt->dst.expires - jiffies;
2527 	else
2528 		expires = INT_MAX;
2529 
2530 	peer = rt->rt6i_peer;
2531 	ts = tsage = 0;
2532 	if (peer && peer->tcp_ts_stamp) {
2533 		ts = peer->tcp_ts;
2534 		tsage = get_seconds() - peer->tcp_ts_stamp;
2535 	}
2536 
2537 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
2538 			       expires, rt->dst.error) < 0)
2539 		goto nla_put_failure;
2540 
2541 	return nlmsg_end(skb, nlh);
2542 
2543 nla_put_failure:
2544 	nlmsg_cancel(skb, nlh);
2545 	return -EMSGSIZE;
2546 }
2547 
rt6_dump_route(struct rt6_info * rt,void * p_arg)2548 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2549 {
2550 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2551 	int prefix;
2552 
2553 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2554 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2555 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2556 	} else
2557 		prefix = 0;
2558 
2559 	return rt6_fill_node(arg->net,
2560 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2561 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2562 		     prefix, 0, NLM_F_MULTI);
2563 }
2564 
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh,void * arg)2565 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2566 {
2567 	struct net *net = sock_net(in_skb->sk);
2568 	struct nlattr *tb[RTA_MAX+1];
2569 	struct rt6_info *rt;
2570 	struct sk_buff *skb;
2571 	struct rtmsg *rtm;
2572 	struct flowi6 fl6;
2573 	int err, iif = 0, oif = 0;
2574 
2575 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2576 	if (err < 0)
2577 		goto errout;
2578 
2579 	err = -EINVAL;
2580 	memset(&fl6, 0, sizeof(fl6));
2581 
2582 	if (tb[RTA_SRC]) {
2583 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2584 			goto errout;
2585 
2586 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2587 	}
2588 
2589 	if (tb[RTA_DST]) {
2590 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2591 			goto errout;
2592 
2593 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2594 	}
2595 
2596 	if (tb[RTA_IIF])
2597 		iif = nla_get_u32(tb[RTA_IIF]);
2598 
2599 	if (tb[RTA_OIF])
2600 		oif = nla_get_u32(tb[RTA_OIF]);
2601 
2602 	if (iif) {
2603 		struct net_device *dev;
2604 		int flags = 0;
2605 
2606 		dev = __dev_get_by_index(net, iif);
2607 		if (!dev) {
2608 			err = -ENODEV;
2609 			goto errout;
2610 		}
2611 
2612 		fl6.flowi6_iif = iif;
2613 
2614 		if (!ipv6_addr_any(&fl6.saddr))
2615 			flags |= RT6_LOOKUP_F_HAS_SADDR;
2616 
2617 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2618 							       flags);
2619 	} else {
2620 		fl6.flowi6_oif = oif;
2621 
2622 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2623 	}
2624 
2625 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2626 	if (!skb) {
2627 		err = -ENOBUFS;
2628 		goto errout;
2629 	}
2630 
2631 	/* Reserve room for dummy headers, this skb can pass
2632 	   through good chunk of routing engine.
2633 	 */
2634 	skb_reset_mac_header(skb);
2635 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2636 
2637 	skb_dst_set(skb, &rt->dst);
2638 
2639 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2640 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2641 			    nlh->nlmsg_seq, 0, 0, 0);
2642 	if (err < 0) {
2643 		kfree_skb(skb);
2644 		goto errout;
2645 	}
2646 
2647 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2648 errout:
2649 	return err;
2650 }
2651 
inet6_rt_notify(int event,struct rt6_info * rt,struct nl_info * info)2652 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2653 {
2654 	struct sk_buff *skb;
2655 	struct net *net = info->nl_net;
2656 	u32 seq;
2657 	int err;
2658 
2659 	err = -ENOBUFS;
2660 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2661 
2662 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2663 	if (!skb)
2664 		goto errout;
2665 
2666 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2667 				event, info->pid, seq, 0, 0, 0);
2668 	if (err < 0) {
2669 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2670 		WARN_ON(err == -EMSGSIZE);
2671 		kfree_skb(skb);
2672 		goto errout;
2673 	}
2674 	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2675 		    info->nlh, gfp_any());
2676 	return;
2677 errout:
2678 	if (err < 0)
2679 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2680 }
2681 
ip6_route_dev_notify(struct notifier_block * this,unsigned long event,void * data)2682 static int ip6_route_dev_notify(struct notifier_block *this,
2683 				unsigned long event, void *data)
2684 {
2685 	struct net_device *dev = (struct net_device *)data;
2686 	struct net *net = dev_net(dev);
2687 
2688 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2689 		net->ipv6.ip6_null_entry->dst.dev = dev;
2690 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2691 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2692 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2693 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2694 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2695 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2696 #endif
2697 	}
2698 
2699 	return NOTIFY_OK;
2700 }
2701 
2702 /*
2703  *	/proc
2704  */
2705 
2706 #ifdef CONFIG_PROC_FS
2707 
2708 struct rt6_proc_arg
2709 {
2710 	char *buffer;
2711 	int offset;
2712 	int length;
2713 	int skip;
2714 	int len;
2715 };
2716 
rt6_info_route(struct rt6_info * rt,void * p_arg)2717 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2718 {
2719 	struct seq_file *m = p_arg;
2720 	struct neighbour *n;
2721 
2722 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2723 
2724 #ifdef CONFIG_IPV6_SUBTREES
2725 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2726 #else
2727 	seq_puts(m, "00000000000000000000000000000000 00 ");
2728 #endif
2729 	rcu_read_lock();
2730 	n = dst_get_neighbour_noref(&rt->dst);
2731 	if (n) {
2732 		seq_printf(m, "%pi6", n->primary_key);
2733 	} else {
2734 		seq_puts(m, "00000000000000000000000000000000");
2735 	}
2736 	rcu_read_unlock();
2737 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2738 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2739 		   rt->dst.__use, rt->rt6i_flags,
2740 		   rt->dst.dev ? rt->dst.dev->name : "");
2741 	return 0;
2742 }
2743 
ipv6_route_show(struct seq_file * m,void * v)2744 static int ipv6_route_show(struct seq_file *m, void *v)
2745 {
2746 	struct net *net = (struct net *)m->private;
2747 	fib6_clean_all_ro(net, rt6_info_route, 0, m);
2748 	return 0;
2749 }
2750 
ipv6_route_open(struct inode * inode,struct file * file)2751 static int ipv6_route_open(struct inode *inode, struct file *file)
2752 {
2753 	return single_open_net(inode, file, ipv6_route_show);
2754 }
2755 
2756 static const struct file_operations ipv6_route_proc_fops = {
2757 	.owner		= THIS_MODULE,
2758 	.open		= ipv6_route_open,
2759 	.read		= seq_read,
2760 	.llseek		= seq_lseek,
2761 	.release	= single_release_net,
2762 };
2763 
rt6_stats_seq_show(struct seq_file * seq,void * v)2764 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2765 {
2766 	struct net *net = (struct net *)seq->private;
2767 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2768 		   net->ipv6.rt6_stats->fib_nodes,
2769 		   net->ipv6.rt6_stats->fib_route_nodes,
2770 		   net->ipv6.rt6_stats->fib_rt_alloc,
2771 		   net->ipv6.rt6_stats->fib_rt_entries,
2772 		   net->ipv6.rt6_stats->fib_rt_cache,
2773 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2774 		   net->ipv6.rt6_stats->fib_discarded_routes);
2775 
2776 	return 0;
2777 }
2778 
rt6_stats_seq_open(struct inode * inode,struct file * file)2779 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2780 {
2781 	return single_open_net(inode, file, rt6_stats_seq_show);
2782 }
2783 
2784 static const struct file_operations rt6_stats_seq_fops = {
2785 	.owner	 = THIS_MODULE,
2786 	.open	 = rt6_stats_seq_open,
2787 	.read	 = seq_read,
2788 	.llseek	 = seq_lseek,
2789 	.release = single_release_net,
2790 };
2791 #endif	/* CONFIG_PROC_FS */
2792 
2793 #ifdef CONFIG_SYSCTL
2794 
2795 static
ipv6_sysctl_rtcache_flush(ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2796 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2797 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2798 {
2799 	struct net *net;
2800 	int delay;
2801 	if (!write)
2802 		return -EINVAL;
2803 
2804 	net = (struct net *)ctl->extra1;
2805 	delay = net->ipv6.sysctl.flush_delay;
2806 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2807 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2808 	return 0;
2809 }
2810 
2811 ctl_table ipv6_route_table_template[] = {
2812 	{
2813 		.procname	=	"flush",
2814 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2815 		.maxlen		=	sizeof(int),
2816 		.mode		=	0200,
2817 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2818 	},
2819 	{
2820 		.procname	=	"gc_thresh",
2821 		.data		=	&ip6_dst_ops_template.gc_thresh,
2822 		.maxlen		=	sizeof(int),
2823 		.mode		=	0644,
2824 		.proc_handler	=	proc_dointvec,
2825 	},
2826 	{
2827 		.procname	=	"max_size",
2828 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2829 		.maxlen		=	sizeof(int),
2830 		.mode		=	0644,
2831 		.proc_handler	=	proc_dointvec,
2832 	},
2833 	{
2834 		.procname	=	"gc_min_interval",
2835 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2836 		.maxlen		=	sizeof(int),
2837 		.mode		=	0644,
2838 		.proc_handler	=	proc_dointvec_jiffies,
2839 	},
2840 	{
2841 		.procname	=	"gc_timeout",
2842 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2843 		.maxlen		=	sizeof(int),
2844 		.mode		=	0644,
2845 		.proc_handler	=	proc_dointvec_jiffies,
2846 	},
2847 	{
2848 		.procname	=	"gc_interval",
2849 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2850 		.maxlen		=	sizeof(int),
2851 		.mode		=	0644,
2852 		.proc_handler	=	proc_dointvec_jiffies,
2853 	},
2854 	{
2855 		.procname	=	"gc_elasticity",
2856 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2857 		.maxlen		=	sizeof(int),
2858 		.mode		=	0644,
2859 		.proc_handler	=	proc_dointvec,
2860 	},
2861 	{
2862 		.procname	=	"mtu_expires",
2863 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2864 		.maxlen		=	sizeof(int),
2865 		.mode		=	0644,
2866 		.proc_handler	=	proc_dointvec_jiffies,
2867 	},
2868 	{
2869 		.procname	=	"min_adv_mss",
2870 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2871 		.maxlen		=	sizeof(int),
2872 		.mode		=	0644,
2873 		.proc_handler	=	proc_dointvec,
2874 	},
2875 	{
2876 		.procname	=	"gc_min_interval_ms",
2877 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2878 		.maxlen		=	sizeof(int),
2879 		.mode		=	0644,
2880 		.proc_handler	=	proc_dointvec_ms_jiffies,
2881 	},
2882 	{ }
2883 };
2884 
ipv6_route_sysctl_init(struct net * net)2885 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2886 {
2887 	struct ctl_table *table;
2888 
2889 	table = kmemdup(ipv6_route_table_template,
2890 			sizeof(ipv6_route_table_template),
2891 			GFP_KERNEL);
2892 
2893 	if (table) {
2894 		table[0].data = &net->ipv6.sysctl.flush_delay;
2895 		table[0].extra1 = net;
2896 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2897 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2898 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2899 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2900 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2901 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2902 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2903 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2904 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2905 	}
2906 
2907 	return table;
2908 }
2909 #endif
2910 
ip6_route_net_init(struct net * net)2911 static int __net_init ip6_route_net_init(struct net *net)
2912 {
2913 	int ret = -ENOMEM;
2914 
2915 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2916 	       sizeof(net->ipv6.ip6_dst_ops));
2917 
2918 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2919 		goto out_ip6_dst_ops;
2920 
2921 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2922 					   sizeof(*net->ipv6.ip6_null_entry),
2923 					   GFP_KERNEL);
2924 	if (!net->ipv6.ip6_null_entry)
2925 		goto out_ip6_dst_entries;
2926 	net->ipv6.ip6_null_entry->dst.path =
2927 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2928 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2929 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2930 			 ip6_template_metrics, true);
2931 
2932 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2933 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2934 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2935 					       GFP_KERNEL);
2936 	if (!net->ipv6.ip6_prohibit_entry)
2937 		goto out_ip6_null_entry;
2938 	net->ipv6.ip6_prohibit_entry->dst.path =
2939 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2940 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2941 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2942 			 ip6_template_metrics, true);
2943 
2944 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2945 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2946 					       GFP_KERNEL);
2947 	if (!net->ipv6.ip6_blk_hole_entry)
2948 		goto out_ip6_prohibit_entry;
2949 	net->ipv6.ip6_blk_hole_entry->dst.path =
2950 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2951 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2952 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2953 			 ip6_template_metrics, true);
2954 #endif
2955 
2956 	net->ipv6.sysctl.flush_delay = 0;
2957 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2958 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2959 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2960 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2961 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2962 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2963 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2964 
2965 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2966 
2967 	ret = 0;
2968 out:
2969 	return ret;
2970 
2971 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2972 out_ip6_prohibit_entry:
2973 	kfree(net->ipv6.ip6_prohibit_entry);
2974 out_ip6_null_entry:
2975 	kfree(net->ipv6.ip6_null_entry);
2976 #endif
2977 out_ip6_dst_entries:
2978 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2979 out_ip6_dst_ops:
2980 	goto out;
2981 }
2982 
ip6_route_net_exit(struct net * net)2983 static void __net_exit ip6_route_net_exit(struct net *net)
2984 {
2985 	kfree(net->ipv6.ip6_null_entry);
2986 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2987 	kfree(net->ipv6.ip6_prohibit_entry);
2988 	kfree(net->ipv6.ip6_blk_hole_entry);
2989 #endif
2990 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2991 }
2992 
ip6_route_net_init_late(struct net * net)2993 static int __net_init ip6_route_net_init_late(struct net *net)
2994 {
2995 #ifdef CONFIG_PROC_FS
2996 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2997 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2998 #endif
2999 	return 0;
3000 }
3001 
ip6_route_net_exit_late(struct net * net)3002 static void __net_exit ip6_route_net_exit_late(struct net *net)
3003 {
3004 #ifdef CONFIG_PROC_FS
3005 	proc_net_remove(net, "ipv6_route");
3006 	proc_net_remove(net, "rt6_stats");
3007 #endif
3008 }
3009 
3010 static struct pernet_operations ip6_route_net_ops = {
3011 	.init = ip6_route_net_init,
3012 	.exit = ip6_route_net_exit,
3013 };
3014 
3015 static struct pernet_operations ip6_route_net_late_ops = {
3016 	.init = ip6_route_net_init_late,
3017 	.exit = ip6_route_net_exit_late,
3018 };
3019 
3020 static struct notifier_block ip6_route_dev_notifier = {
3021 	.notifier_call = ip6_route_dev_notify,
3022 	.priority = 0,
3023 };
3024 
ip6_route_init(void)3025 int __init ip6_route_init(void)
3026 {
3027 	int ret;
3028 
3029 	ret = -ENOMEM;
3030 	ip6_dst_ops_template.kmem_cachep =
3031 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3032 				  SLAB_HWCACHE_ALIGN, NULL);
3033 	if (!ip6_dst_ops_template.kmem_cachep)
3034 		goto out;
3035 
3036 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
3037 	if (ret)
3038 		goto out_kmem_cache;
3039 
3040 	ret = register_pernet_subsys(&ip6_route_net_ops);
3041 	if (ret)
3042 		goto out_dst_entries;
3043 
3044 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3045 
3046 	/* Registering of the loopback is done before this portion of code,
3047 	 * the loopback reference in rt6_info will not be taken, do it
3048 	 * manually for init_net */
3049 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3050 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3051   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3052 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3053 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3054 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3055 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3056   #endif
3057 	ret = fib6_init();
3058 	if (ret)
3059 		goto out_register_subsys;
3060 
3061 	ret = xfrm6_init();
3062 	if (ret)
3063 		goto out_fib6_init;
3064 
3065 	ret = fib6_rules_init();
3066 	if (ret)
3067 		goto xfrm6_init;
3068 
3069 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3070 	if (ret)
3071 		goto fib6_rules_init;
3072 
3073 	ret = -ENOBUFS;
3074 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3075 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3076 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3077 		goto out_register_late_subsys;
3078 
3079 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3080 	if (ret)
3081 		goto out_register_late_subsys;
3082 
3083 out:
3084 	return ret;
3085 
3086 out_register_late_subsys:
3087 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3088 fib6_rules_init:
3089 	fib6_rules_cleanup();
3090 xfrm6_init:
3091 	xfrm6_fini();
3092 out_fib6_init:
3093 	fib6_gc_cleanup();
3094 out_register_subsys:
3095 	unregister_pernet_subsys(&ip6_route_net_ops);
3096 out_dst_entries:
3097 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3098 out_kmem_cache:
3099 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3100 	goto out;
3101 }
3102 
ip6_route_cleanup(void)3103 void ip6_route_cleanup(void)
3104 {
3105 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3106 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3107 	fib6_rules_cleanup();
3108 	xfrm6_fini();
3109 	fib6_gc_cleanup();
3110 	unregister_pernet_subsys(&ip6_route_net_ops);
3111 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3112 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3113 }
3114