1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<roque@di.fc.ul.pt>
7  *
8  *	This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  */
13 
14 /*	Changes:
15  *
16  *	YOSHIFUJI Hideaki @USAGI
17  *		reworked default router selection.
18  *		- respect outgoing interface
19  *		- select from (probably) reachable routers (i.e.
20  *		routers in REACHABLE, STALE, DELAY or PROBE states).
21  *		- always select the same router if it is (probably)
22  *		reachable.  otherwise, round-robin the list.
23  *	Ville Nuorvala
24  *		Fixed routing subtrees.
25  */
26 
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57 
58 #include <asm/uaccess.h>
59 
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63 
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66 
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74 
75 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void		ip6_dst_destroy(struct dst_entry *);
81 static void		ip6_dst_ifdown(struct dst_entry *,
82 				       struct net_device *dev, int how);
83 static int		 ip6_dst_gc(struct dst_ops *ops);
84 
85 static int		ip6_pkt_discard(struct sk_buff *skb);
86 static int		ip6_pkt_discard_out(struct sk_buff *skb);
87 static void		ip6_link_failure(struct sk_buff *skb);
88 static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89 
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 					   struct in6_addr *prefix, int prefixlen,
93 					   struct in6_addr *gwaddr, int ifindex,
94 					   unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 					   struct in6_addr *prefix, int prefixlen,
97 					   struct in6_addr *gwaddr, int ifindex);
98 #endif
99 
ipv6_cow_metrics(struct dst_entry * dst,unsigned long old)100 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101 {
102 	struct rt6_info *rt = (struct rt6_info *) dst;
103 	struct inet_peer *peer;
104 	u32 *p = NULL;
105 
106 	if (!rt->rt6i_peer)
107 		rt6_bind_peer(rt, 1);
108 
109 	peer = rt->rt6i_peer;
110 	if (peer) {
111 		u32 *old_p = __DST_METRICS_PTR(old);
112 		unsigned long prev, new;
113 
114 		p = peer->metrics;
115 		if (inet_metrics_new(peer))
116 			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117 
118 		new = (unsigned long) p;
119 		prev = cmpxchg(&dst->_metrics, old, new);
120 
121 		if (prev != old) {
122 			p = __DST_METRICS_PTR(prev);
123 			if (prev & DST_METRICS_READ_ONLY)
124 				p = NULL;
125 		}
126 	}
127 	return p;
128 }
129 
130 static struct dst_ops ip6_dst_ops_template = {
131 	.family			=	AF_INET6,
132 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
133 	.gc			=	ip6_dst_gc,
134 	.gc_thresh		=	1024,
135 	.check			=	ip6_dst_check,
136 	.default_advmss		=	ip6_default_advmss,
137 	.default_mtu		=	ip6_default_mtu,
138 	.cow_metrics		=	ipv6_cow_metrics,
139 	.destroy		=	ip6_dst_destroy,
140 	.ifdown			=	ip6_dst_ifdown,
141 	.negative_advice	=	ip6_negative_advice,
142 	.link_failure		=	ip6_link_failure,
143 	.update_pmtu		=	ip6_rt_update_pmtu,
144 	.local_out		=	__ip6_local_out,
145 };
146 
ip6_blackhole_default_mtu(const struct dst_entry * dst)147 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148 {
149 	return 0;
150 }
151 
ip6_rt_blackhole_update_pmtu(struct dst_entry * dst,u32 mtu)152 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153 {
154 }
155 
ip6_rt_blackhole_cow_metrics(struct dst_entry * dst,unsigned long old)156 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
157 					 unsigned long old)
158 {
159 	return NULL;
160 }
161 
162 static struct dst_ops ip6_dst_blackhole_ops = {
163 	.family			=	AF_INET6,
164 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
165 	.destroy		=	ip6_dst_destroy,
166 	.check			=	ip6_dst_check,
167 	.default_mtu		=	ip6_blackhole_default_mtu,
168 	.default_advmss		=	ip6_default_advmss,
169 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
170 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
171 };
172 
173 static const u32 ip6_template_metrics[RTAX_MAX] = {
174 	[RTAX_HOPLIMIT - 1] = 255,
175 };
176 
177 static struct rt6_info ip6_null_entry_template = {
178 	.dst = {
179 		.__refcnt	= ATOMIC_INIT(1),
180 		.__use		= 1,
181 		.obsolete	= -1,
182 		.error		= -ENETUNREACH,
183 		.input		= ip6_pkt_discard,
184 		.output		= ip6_pkt_discard_out,
185 	},
186 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
187 	.rt6i_protocol  = RTPROT_KERNEL,
188 	.rt6i_metric	= ~(u32) 0,
189 	.rt6i_ref	= ATOMIC_INIT(1),
190 };
191 
192 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
193 
194 static int ip6_pkt_prohibit(struct sk_buff *skb);
195 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
196 
197 static struct rt6_info ip6_prohibit_entry_template = {
198 	.dst = {
199 		.__refcnt	= ATOMIC_INIT(1),
200 		.__use		= 1,
201 		.obsolete	= -1,
202 		.error		= -EACCES,
203 		.input		= ip6_pkt_prohibit,
204 		.output		= ip6_pkt_prohibit_out,
205 	},
206 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
207 	.rt6i_protocol  = RTPROT_KERNEL,
208 	.rt6i_metric	= ~(u32) 0,
209 	.rt6i_ref	= ATOMIC_INIT(1),
210 };
211 
212 static struct rt6_info ip6_blk_hole_entry_template = {
213 	.dst = {
214 		.__refcnt	= ATOMIC_INIT(1),
215 		.__use		= 1,
216 		.obsolete	= -1,
217 		.error		= -EINVAL,
218 		.input		= dst_discard,
219 		.output		= dst_discard,
220 	},
221 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
222 	.rt6i_protocol  = RTPROT_KERNEL,
223 	.rt6i_metric	= ~(u32) 0,
224 	.rt6i_ref	= ATOMIC_INIT(1),
225 };
226 
227 #endif
228 
229 /* allocate dst with ip6_dst_ops */
ip6_dst_alloc(struct dst_ops * ops)230 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
231 {
232 	return (struct rt6_info *)dst_alloc(ops, 0);
233 }
234 
ip6_dst_destroy(struct dst_entry * dst)235 static void ip6_dst_destroy(struct dst_entry *dst)
236 {
237 	struct rt6_info *rt = (struct rt6_info *)dst;
238 	struct inet6_dev *idev = rt->rt6i_idev;
239 	struct inet_peer *peer = rt->rt6i_peer;
240 
241 	if (idev != NULL) {
242 		rt->rt6i_idev = NULL;
243 		in6_dev_put(idev);
244 	}
245 	if (peer) {
246 		rt->rt6i_peer = NULL;
247 		inet_putpeer(peer);
248 	}
249 }
250 
251 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
252 
rt6_peer_genid(void)253 static u32 rt6_peer_genid(void)
254 {
255 	return atomic_read(&__rt6_peer_genid);
256 }
257 
rt6_bind_peer(struct rt6_info * rt,int create)258 void rt6_bind_peer(struct rt6_info *rt, int create)
259 {
260 	struct inet_peer *peer;
261 
262 	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
263 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
264 		inet_putpeer(peer);
265 	else
266 		rt->rt6i_peer_genid = rt6_peer_genid();
267 }
268 
ip6_dst_ifdown(struct dst_entry * dst,struct net_device * dev,int how)269 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
270 			   int how)
271 {
272 	struct rt6_info *rt = (struct rt6_info *)dst;
273 	struct inet6_dev *idev = rt->rt6i_idev;
274 	struct net_device *loopback_dev =
275 		dev_net(dev)->loopback_dev;
276 
277 	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
278 		struct inet6_dev *loopback_idev =
279 			in6_dev_get(loopback_dev);
280 		if (loopback_idev != NULL) {
281 			rt->rt6i_idev = loopback_idev;
282 			in6_dev_put(idev);
283 		}
284 	}
285 }
286 
rt6_check_expired(const struct rt6_info * rt)287 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
288 {
289 	return (rt->rt6i_flags & RTF_EXPIRES) &&
290 		time_after(jiffies, rt->rt6i_expires);
291 }
292 
rt6_need_strict(struct in6_addr * daddr)293 static inline int rt6_need_strict(struct in6_addr *daddr)
294 {
295 	return ipv6_addr_type(daddr) &
296 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
297 }
298 
299 /*
300  *	Route lookup. Any table->tb6_lock is implied.
301  */
302 
rt6_device_match(struct net * net,struct rt6_info * rt,struct in6_addr * saddr,int oif,int flags)303 static inline struct rt6_info *rt6_device_match(struct net *net,
304 						    struct rt6_info *rt,
305 						    struct in6_addr *saddr,
306 						    int oif,
307 						    int flags)
308 {
309 	struct rt6_info *local = NULL;
310 	struct rt6_info *sprt;
311 
312 	if (!oif && ipv6_addr_any(saddr))
313 		goto out;
314 
315 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
316 		struct net_device *dev = sprt->rt6i_dev;
317 
318 		if (oif) {
319 			if (dev->ifindex == oif)
320 				return sprt;
321 			if (dev->flags & IFF_LOOPBACK) {
322 				if (sprt->rt6i_idev == NULL ||
323 				    sprt->rt6i_idev->dev->ifindex != oif) {
324 					if (flags & RT6_LOOKUP_F_IFACE && oif)
325 						continue;
326 					if (local && (!oif ||
327 						      local->rt6i_idev->dev->ifindex == oif))
328 						continue;
329 				}
330 				local = sprt;
331 			}
332 		} else {
333 			if (ipv6_chk_addr(net, saddr, dev,
334 					  flags & RT6_LOOKUP_F_IFACE))
335 				return sprt;
336 		}
337 	}
338 
339 	if (oif) {
340 		if (local)
341 			return local;
342 
343 		if (flags & RT6_LOOKUP_F_IFACE)
344 			return net->ipv6.ip6_null_entry;
345 	}
346 out:
347 	return rt;
348 }
349 
350 #ifdef CONFIG_IPV6_ROUTER_PREF
rt6_probe(struct rt6_info * rt)351 static void rt6_probe(struct rt6_info *rt)
352 {
353 	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
354 	/*
355 	 * Okay, this does not seem to be appropriate
356 	 * for now, however, we need to check if it
357 	 * is really so; aka Router Reachability Probing.
358 	 *
359 	 * Router Reachability Probe MUST be rate-limited
360 	 * to no more than one per minute.
361 	 */
362 	if (!neigh || (neigh->nud_state & NUD_VALID))
363 		return;
364 	read_lock_bh(&neigh->lock);
365 	if (!(neigh->nud_state & NUD_VALID) &&
366 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
367 		struct in6_addr mcaddr;
368 		struct in6_addr *target;
369 
370 		neigh->updated = jiffies;
371 		read_unlock_bh(&neigh->lock);
372 
373 		target = (struct in6_addr *)&neigh->primary_key;
374 		addrconf_addr_solict_mult(target, &mcaddr);
375 		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
376 	} else
377 		read_unlock_bh(&neigh->lock);
378 }
379 #else
rt6_probe(struct rt6_info * rt)380 static inline void rt6_probe(struct rt6_info *rt)
381 {
382 }
383 #endif
384 
385 /*
386  * Default Router Selection (RFC 2461 6.3.6)
387  */
rt6_check_dev(struct rt6_info * rt,int oif)388 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
389 {
390 	struct net_device *dev = rt->rt6i_dev;
391 	if (!oif || dev->ifindex == oif)
392 		return 2;
393 	if ((dev->flags & IFF_LOOPBACK) &&
394 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
395 		return 1;
396 	return 0;
397 }
398 
rt6_check_neigh(struct rt6_info * rt)399 static inline int rt6_check_neigh(struct rt6_info *rt)
400 {
401 	struct neighbour *neigh = rt->rt6i_nexthop;
402 	int m;
403 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
404 	    !(rt->rt6i_flags & RTF_GATEWAY))
405 		m = 1;
406 	else if (neigh) {
407 		read_lock_bh(&neigh->lock);
408 		if (neigh->nud_state & NUD_VALID)
409 			m = 2;
410 #ifdef CONFIG_IPV6_ROUTER_PREF
411 		else if (neigh->nud_state & NUD_FAILED)
412 			m = 0;
413 #endif
414 		else
415 			m = 1;
416 		read_unlock_bh(&neigh->lock);
417 	} else
418 		m = 0;
419 	return m;
420 }
421 
rt6_score_route(struct rt6_info * rt,int oif,int strict)422 static int rt6_score_route(struct rt6_info *rt, int oif,
423 			   int strict)
424 {
425 	int m, n;
426 
427 	m = rt6_check_dev(rt, oif);
428 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
429 		return -1;
430 #ifdef CONFIG_IPV6_ROUTER_PREF
431 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
432 #endif
433 	n = rt6_check_neigh(rt);
434 	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
435 		return -1;
436 	return m;
437 }
438 
find_match(struct rt6_info * rt,int oif,int strict,int * mpri,struct rt6_info * match)439 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
440 				   int *mpri, struct rt6_info *match)
441 {
442 	int m;
443 
444 	if (rt6_check_expired(rt))
445 		goto out;
446 
447 	m = rt6_score_route(rt, oif, strict);
448 	if (m < 0)
449 		goto out;
450 
451 	if (m > *mpri) {
452 		if (strict & RT6_LOOKUP_F_REACHABLE)
453 			rt6_probe(match);
454 		*mpri = m;
455 		match = rt;
456 	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
457 		rt6_probe(rt);
458 	}
459 
460 out:
461 	return match;
462 }
463 
find_rr_leaf(struct fib6_node * fn,struct rt6_info * rr_head,u32 metric,int oif,int strict)464 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
465 				     struct rt6_info *rr_head,
466 				     u32 metric, int oif, int strict)
467 {
468 	struct rt6_info *rt, *match;
469 	int mpri = -1;
470 
471 	match = NULL;
472 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
473 	     rt = rt->dst.rt6_next)
474 		match = find_match(rt, oif, strict, &mpri, match);
475 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
476 	     rt = rt->dst.rt6_next)
477 		match = find_match(rt, oif, strict, &mpri, match);
478 
479 	return match;
480 }
481 
rt6_select(struct fib6_node * fn,int oif,int strict)482 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
483 {
484 	struct rt6_info *match, *rt0;
485 	struct net *net;
486 
487 	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
488 		  __func__, fn->leaf, oif);
489 
490 	rt0 = fn->rr_ptr;
491 	if (!rt0)
492 		fn->rr_ptr = rt0 = fn->leaf;
493 
494 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
495 
496 	if (!match &&
497 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
498 		struct rt6_info *next = rt0->dst.rt6_next;
499 
500 		/* no entries matched; do round-robin */
501 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
502 			next = fn->leaf;
503 
504 		if (next != rt0)
505 			fn->rr_ptr = next;
506 	}
507 
508 	RT6_TRACE("%s() => %p\n",
509 		  __func__, match);
510 
511 	net = dev_net(rt0->rt6i_dev);
512 	return match ? match : net->ipv6.ip6_null_entry;
513 }
514 
515 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_route_rcv(struct net_device * dev,u8 * opt,int len,struct in6_addr * gwaddr)516 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
517 		  struct in6_addr *gwaddr)
518 {
519 	struct net *net = dev_net(dev);
520 	struct route_info *rinfo = (struct route_info *) opt;
521 	struct in6_addr prefix_buf, *prefix;
522 	unsigned int pref;
523 	unsigned long lifetime;
524 	struct rt6_info *rt;
525 
526 	if (len < sizeof(struct route_info)) {
527 		return -EINVAL;
528 	}
529 
530 	/* Sanity check for prefix_len and length */
531 	if (rinfo->length > 3) {
532 		return -EINVAL;
533 	} else if (rinfo->prefix_len > 128) {
534 		return -EINVAL;
535 	} else if (rinfo->prefix_len > 64) {
536 		if (rinfo->length < 2) {
537 			return -EINVAL;
538 		}
539 	} else if (rinfo->prefix_len > 0) {
540 		if (rinfo->length < 1) {
541 			return -EINVAL;
542 		}
543 	}
544 
545 	pref = rinfo->route_pref;
546 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
547 		return -EINVAL;
548 
549 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
550 
551 	if (rinfo->length == 3)
552 		prefix = (struct in6_addr *)rinfo->prefix;
553 	else {
554 		/* this function is safe */
555 		ipv6_addr_prefix(&prefix_buf,
556 				 (struct in6_addr *)rinfo->prefix,
557 				 rinfo->prefix_len);
558 		prefix = &prefix_buf;
559 	}
560 
561 	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
562 				dev->ifindex);
563 
564 	if (rt && !lifetime) {
565 		ip6_del_rt(rt);
566 		rt = NULL;
567 	}
568 
569 	if (!rt && lifetime)
570 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
571 					pref);
572 	else if (rt)
573 		rt->rt6i_flags = RTF_ROUTEINFO |
574 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
575 
576 	if (rt) {
577 		if (!addrconf_finite_timeout(lifetime)) {
578 			rt->rt6i_flags &= ~RTF_EXPIRES;
579 		} else {
580 			rt->rt6i_expires = jiffies + HZ * lifetime;
581 			rt->rt6i_flags |= RTF_EXPIRES;
582 		}
583 		dst_release(&rt->dst);
584 	}
585 	return 0;
586 }
587 #endif
588 
589 #define BACKTRACK(__net, saddr)			\
590 do { \
591 	if (rt == __net->ipv6.ip6_null_entry) {	\
592 		struct fib6_node *pn; \
593 		while (1) { \
594 			if (fn->fn_flags & RTN_TL_ROOT) \
595 				goto out; \
596 			pn = fn->parent; \
597 			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
598 				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
599 			else \
600 				fn = pn; \
601 			if (fn->fn_flags & RTN_RTINFO) \
602 				goto restart; \
603 		} \
604 	} \
605 } while(0)
606 
ip6_pol_route_lookup(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)607 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
608 					     struct fib6_table *table,
609 					     struct flowi6 *fl6, int flags)
610 {
611 	struct fib6_node *fn;
612 	struct rt6_info *rt;
613 
614 	read_lock_bh(&table->tb6_lock);
615 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
616 restart:
617 	rt = fn->leaf;
618 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
619 	BACKTRACK(net, &fl6->saddr);
620 out:
621 	dst_use(&rt->dst, jiffies);
622 	read_unlock_bh(&table->tb6_lock);
623 	return rt;
624 
625 }
626 
rt6_lookup(struct net * net,const struct in6_addr * daddr,const struct in6_addr * saddr,int oif,int strict)627 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
628 			    const struct in6_addr *saddr, int oif, int strict)
629 {
630 	struct flowi6 fl6 = {
631 		.flowi6_oif = oif,
632 		.daddr = *daddr,
633 	};
634 	struct dst_entry *dst;
635 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
636 
637 	if (saddr) {
638 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
639 		flags |= RT6_LOOKUP_F_HAS_SADDR;
640 	}
641 
642 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
643 	if (dst->error == 0)
644 		return (struct rt6_info *) dst;
645 
646 	dst_release(dst);
647 
648 	return NULL;
649 }
650 
651 EXPORT_SYMBOL(rt6_lookup);
652 
653 /* ip6_ins_rt is called with FREE table->tb6_lock.
654    It takes new route entry, the addition fails by any reason the
655    route is freed. In any case, if caller does not hold it, it may
656    be destroyed.
657  */
658 
__ip6_ins_rt(struct rt6_info * rt,struct nl_info * info)659 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
660 {
661 	int err;
662 	struct fib6_table *table;
663 
664 	table = rt->rt6i_table;
665 	write_lock_bh(&table->tb6_lock);
666 	err = fib6_add(&table->tb6_root, rt, info);
667 	write_unlock_bh(&table->tb6_lock);
668 
669 	return err;
670 }
671 
ip6_ins_rt(struct rt6_info * rt)672 int ip6_ins_rt(struct rt6_info *rt)
673 {
674 	struct nl_info info = {
675 		.nl_net = dev_net(rt->rt6i_dev),
676 	};
677 	return __ip6_ins_rt(rt, &info);
678 }
679 
rt6_alloc_cow(struct rt6_info * ort,struct in6_addr * daddr,struct in6_addr * saddr)680 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
681 				      struct in6_addr *saddr)
682 {
683 	struct rt6_info *rt;
684 
685 	/*
686 	 *	Clone the route.
687 	 */
688 
689 	rt = ip6_rt_copy(ort);
690 
691 	if (rt) {
692 		struct neighbour *neigh;
693 		int attempts = !in_softirq();
694 
695 		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
696 			if (rt->rt6i_dst.plen != 128 &&
697 			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
698 				rt->rt6i_flags |= RTF_ANYCAST;
699 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
700 		}
701 
702 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
703 		rt->rt6i_dst.plen = 128;
704 		rt->rt6i_flags |= RTF_CACHE;
705 		rt->dst.flags |= DST_HOST;
706 
707 #ifdef CONFIG_IPV6_SUBTREES
708 		if (rt->rt6i_src.plen && saddr) {
709 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
710 			rt->rt6i_src.plen = 128;
711 		}
712 #endif
713 
714 	retry:
715 		neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
716 		if (IS_ERR(neigh)) {
717 			struct net *net = dev_net(rt->rt6i_dev);
718 			int saved_rt_min_interval =
719 				net->ipv6.sysctl.ip6_rt_gc_min_interval;
720 			int saved_rt_elasticity =
721 				net->ipv6.sysctl.ip6_rt_gc_elasticity;
722 
723 			if (attempts-- > 0) {
724 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
725 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
726 
727 				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
728 
729 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
730 					saved_rt_elasticity;
731 				net->ipv6.sysctl.ip6_rt_gc_min_interval =
732 					saved_rt_min_interval;
733 				goto retry;
734 			}
735 
736 			if (net_ratelimit())
737 				printk(KERN_WARNING
738 				       "ipv6: Neighbour table overflow.\n");
739 			dst_free(&rt->dst);
740 			return NULL;
741 		}
742 		rt->rt6i_nexthop = neigh;
743 
744 	}
745 
746 	return rt;
747 }
748 
rt6_alloc_clone(struct rt6_info * ort,struct in6_addr * daddr)749 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
750 {
751 	struct rt6_info *rt = ip6_rt_copy(ort);
752 	if (rt) {
753 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
754 		rt->rt6i_dst.plen = 128;
755 		rt->rt6i_flags |= RTF_CACHE;
756 		rt->dst.flags |= DST_HOST;
757 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
758 	}
759 	return rt;
760 }
761 
ip6_pol_route(struct net * net,struct fib6_table * table,int oif,struct flowi6 * fl6,int flags)762 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
763 				      struct flowi6 *fl6, int flags)
764 {
765 	struct fib6_node *fn;
766 	struct rt6_info *rt, *nrt;
767 	int strict = 0;
768 	int attempts = 3;
769 	int err;
770 	int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
771 
772 	strict |= flags & RT6_LOOKUP_F_IFACE;
773 
774 relookup:
775 	read_lock_bh(&table->tb6_lock);
776 
777 restart_2:
778 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
779 
780 restart:
781 	rt = rt6_select(fn, oif, strict | reachable);
782 
783 	BACKTRACK(net, &fl6->saddr);
784 	if (rt == net->ipv6.ip6_null_entry ||
785 	    rt->rt6i_flags & RTF_CACHE)
786 		goto out;
787 
788 	dst_hold(&rt->dst);
789 	read_unlock_bh(&table->tb6_lock);
790 
791 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
792 		nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
793 	else if (!(rt->dst.flags & DST_HOST))
794 		nrt = rt6_alloc_clone(rt, &fl6->daddr);
795 	else
796 		goto out2;
797 
798 	dst_release(&rt->dst);
799 	rt = nrt ? : net->ipv6.ip6_null_entry;
800 
801 	dst_hold(&rt->dst);
802 	if (nrt) {
803 		err = ip6_ins_rt(nrt);
804 		if (!err)
805 			goto out2;
806 	}
807 
808 	if (--attempts <= 0)
809 		goto out2;
810 
811 	/*
812 	 * Race condition! In the gap, when table->tb6_lock was
813 	 * released someone could insert this route.  Relookup.
814 	 */
815 	dst_release(&rt->dst);
816 	goto relookup;
817 
818 out:
819 	if (reachable) {
820 		reachable = 0;
821 		goto restart_2;
822 	}
823 	dst_hold(&rt->dst);
824 	read_unlock_bh(&table->tb6_lock);
825 out2:
826 	rt->dst.lastuse = jiffies;
827 	rt->dst.__use++;
828 
829 	return rt;
830 }
831 
ip6_pol_route_input(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)832 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
833 					    struct flowi6 *fl6, int flags)
834 {
835 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
836 }
837 
ip6_route_input(struct sk_buff * skb)838 void ip6_route_input(struct sk_buff *skb)
839 {
840 	struct ipv6hdr *iph = ipv6_hdr(skb);
841 	struct net *net = dev_net(skb->dev);
842 	int flags = RT6_LOOKUP_F_HAS_SADDR;
843 	struct flowi6 fl6 = {
844 		.flowi6_iif = skb->dev->ifindex,
845 		.daddr = iph->daddr,
846 		.saddr = iph->saddr,
847 		.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
848 		.flowi6_mark = skb->mark,
849 		.flowi6_proto = iph->nexthdr,
850 	};
851 
852 	if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
853 		flags |= RT6_LOOKUP_F_IFACE;
854 
855 	skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
856 }
857 
ip6_pol_route_output(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)858 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
859 					     struct flowi6 *fl6, int flags)
860 {
861 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
862 }
863 
ip6_route_output(struct net * net,const struct sock * sk,struct flowi6 * fl6)864 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
865 				    struct flowi6 *fl6)
866 {
867 	int flags = 0;
868 
869 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
870 		flags |= RT6_LOOKUP_F_IFACE;
871 
872 	if (!ipv6_addr_any(&fl6->saddr))
873 		flags |= RT6_LOOKUP_F_HAS_SADDR;
874 	else if (sk)
875 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
876 
877 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
878 }
879 
880 EXPORT_SYMBOL(ip6_route_output);
881 
ip6_blackhole_route(struct net * net,struct dst_entry * dst_orig)882 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
883 {
884 	struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1);
885 	struct rt6_info *ort = (struct rt6_info *) dst_orig;
886 	struct dst_entry *new = NULL;
887 
888 	if (rt) {
889 		new = &rt->dst;
890 
891 		new->__use = 1;
892 		new->input = dst_discard;
893 		new->output = dst_discard;
894 
895 		dst_copy_metrics(new, &ort->dst);
896 		new->dev = ort->dst.dev;
897 		if (new->dev)
898 			dev_hold(new->dev);
899 		rt->rt6i_idev = ort->rt6i_idev;
900 		if (rt->rt6i_idev)
901 			in6_dev_hold(rt->rt6i_idev);
902 		rt->rt6i_expires = 0;
903 
904 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
905 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
906 		rt->rt6i_metric = 0;
907 
908 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
909 #ifdef CONFIG_IPV6_SUBTREES
910 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
911 #endif
912 
913 		dst_free(new);
914 	}
915 
916 	dst_release(dst_orig);
917 	return new ? new : ERR_PTR(-ENOMEM);
918 }
919 
920 /*
921  *	Destination cache support functions
922  */
923 
ip6_dst_check(struct dst_entry * dst,u32 cookie)924 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
925 {
926 	struct rt6_info *rt;
927 
928 	rt = (struct rt6_info *) dst;
929 
930 	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
931 		if (rt->rt6i_peer_genid != rt6_peer_genid()) {
932 			if (!rt->rt6i_peer)
933 				rt6_bind_peer(rt, 0);
934 			rt->rt6i_peer_genid = rt6_peer_genid();
935 		}
936 		return dst;
937 	}
938 	return NULL;
939 }
940 
ip6_negative_advice(struct dst_entry * dst)941 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
942 {
943 	struct rt6_info *rt = (struct rt6_info *) dst;
944 
945 	if (rt) {
946 		if (rt->rt6i_flags & RTF_CACHE) {
947 			if (rt6_check_expired(rt)) {
948 				ip6_del_rt(rt);
949 				dst = NULL;
950 			}
951 		} else {
952 			dst_release(dst);
953 			dst = NULL;
954 		}
955 	}
956 	return dst;
957 }
958 
ip6_link_failure(struct sk_buff * skb)959 static void ip6_link_failure(struct sk_buff *skb)
960 {
961 	struct rt6_info *rt;
962 
963 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
964 
965 	rt = (struct rt6_info *) skb_dst(skb);
966 	if (rt) {
967 		if (rt->rt6i_flags&RTF_CACHE) {
968 			dst_set_expires(&rt->dst, 0);
969 			rt->rt6i_flags |= RTF_EXPIRES;
970 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
971 			rt->rt6i_node->fn_sernum = -1;
972 	}
973 }
974 
ip6_rt_update_pmtu(struct dst_entry * dst,u32 mtu)975 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
976 {
977 	struct rt6_info *rt6 = (struct rt6_info*)dst;
978 
979 	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
980 		rt6->rt6i_flags |= RTF_MODIFIED;
981 		if (mtu < IPV6_MIN_MTU) {
982 			u32 features = dst_metric(dst, RTAX_FEATURES);
983 			mtu = IPV6_MIN_MTU;
984 			features |= RTAX_FEATURE_ALLFRAG;
985 			dst_metric_set(dst, RTAX_FEATURES, features);
986 		}
987 		dst_metric_set(dst, RTAX_MTU, mtu);
988 	}
989 }
990 
ip6_default_advmss(const struct dst_entry * dst)991 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
992 {
993 	struct net_device *dev = dst->dev;
994 	unsigned int mtu = dst_mtu(dst);
995 	struct net *net = dev_net(dev);
996 
997 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
998 
999 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1000 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1001 
1002 	/*
1003 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1004 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1005 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
1006 	 * rely only on pmtu discovery"
1007 	 */
1008 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1009 		mtu = IPV6_MAXPLEN;
1010 	return mtu;
1011 }
1012 
ip6_default_mtu(const struct dst_entry * dst)1013 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1014 {
1015 	unsigned int mtu = IPV6_MIN_MTU;
1016 	struct inet6_dev *idev;
1017 
1018 	rcu_read_lock();
1019 	idev = __in6_dev_get(dst->dev);
1020 	if (idev)
1021 		mtu = idev->cnf.mtu6;
1022 	rcu_read_unlock();
1023 
1024 	return mtu;
1025 }
1026 
1027 static struct dst_entry *icmp6_dst_gc_list;
1028 static DEFINE_SPINLOCK(icmp6_dst_lock);
1029 
icmp6_dst_alloc(struct net_device * dev,struct neighbour * neigh,const struct in6_addr * addr)1030 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1031 				  struct neighbour *neigh,
1032 				  const struct in6_addr *addr)
1033 {
1034 	struct rt6_info *rt;
1035 	struct inet6_dev *idev = in6_dev_get(dev);
1036 	struct net *net = dev_net(dev);
1037 
1038 	if (unlikely(idev == NULL))
1039 		return NULL;
1040 
1041 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1042 	if (unlikely(rt == NULL)) {
1043 		in6_dev_put(idev);
1044 		goto out;
1045 	}
1046 
1047 	dev_hold(dev);
1048 	if (neigh)
1049 		neigh_hold(neigh);
1050 	else {
1051 		neigh = ndisc_get_neigh(dev, addr);
1052 		if (IS_ERR(neigh))
1053 			neigh = NULL;
1054 	}
1055 
1056 	rt->rt6i_dev	  = dev;
1057 	rt->rt6i_idev     = idev;
1058 	rt->rt6i_nexthop  = neigh;
1059 	atomic_set(&rt->dst.__refcnt, 1);
1060 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1061 	rt->dst.output  = ip6_output;
1062 
1063 #if 0	/* there's no chance to use these for ndisc */
1064 	rt->dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1065 				? DST_HOST
1066 				: 0;
1067 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1068 	rt->rt6i_dst.plen = 128;
1069 #endif
1070 
1071 	spin_lock_bh(&icmp6_dst_lock);
1072 	rt->dst.next = icmp6_dst_gc_list;
1073 	icmp6_dst_gc_list = &rt->dst;
1074 	spin_unlock_bh(&icmp6_dst_lock);
1075 
1076 	fib6_force_start_gc(net);
1077 
1078 out:
1079 	return &rt->dst;
1080 }
1081 
icmp6_dst_gc(void)1082 int icmp6_dst_gc(void)
1083 {
1084 	struct dst_entry *dst, **pprev;
1085 	int more = 0;
1086 
1087 	spin_lock_bh(&icmp6_dst_lock);
1088 	pprev = &icmp6_dst_gc_list;
1089 
1090 	while ((dst = *pprev) != NULL) {
1091 		if (!atomic_read(&dst->__refcnt)) {
1092 			*pprev = dst->next;
1093 			dst_free(dst);
1094 		} else {
1095 			pprev = &dst->next;
1096 			++more;
1097 		}
1098 	}
1099 
1100 	spin_unlock_bh(&icmp6_dst_lock);
1101 
1102 	return more;
1103 }
1104 
icmp6_clean_all(int (* func)(struct rt6_info * rt,void * arg),void * arg)1105 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1106 			    void *arg)
1107 {
1108 	struct dst_entry *dst, **pprev;
1109 
1110 	spin_lock_bh(&icmp6_dst_lock);
1111 	pprev = &icmp6_dst_gc_list;
1112 	while ((dst = *pprev) != NULL) {
1113 		struct rt6_info *rt = (struct rt6_info *) dst;
1114 		if (func(rt, arg)) {
1115 			*pprev = dst->next;
1116 			dst_free(dst);
1117 		} else {
1118 			pprev = &dst->next;
1119 		}
1120 	}
1121 	spin_unlock_bh(&icmp6_dst_lock);
1122 }
1123 
ip6_dst_gc(struct dst_ops * ops)1124 static int ip6_dst_gc(struct dst_ops *ops)
1125 {
1126 	unsigned long now = jiffies;
1127 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1128 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1129 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1130 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1131 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1132 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1133 	int entries;
1134 
1135 	entries = dst_entries_get_fast(ops);
1136 	if (time_after(rt_last_gc + rt_min_interval, now) &&
1137 	    entries <= rt_max_size)
1138 		goto out;
1139 
1140 	net->ipv6.ip6_rt_gc_expire++;
1141 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1142 	net->ipv6.ip6_rt_last_gc = now;
1143 	entries = dst_entries_get_slow(ops);
1144 	if (entries < ops->gc_thresh)
1145 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1146 out:
1147 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1148 	return entries > rt_max_size;
1149 }
1150 
1151 /* Clean host part of a prefix. Not necessary in radix tree,
1152    but results in cleaner routing tables.
1153 
1154    Remove it only when all the things will work!
1155  */
1156 
ip6_dst_hoplimit(struct dst_entry * dst)1157 int ip6_dst_hoplimit(struct dst_entry *dst)
1158 {
1159 	int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1160 	if (hoplimit == 0) {
1161 		struct net_device *dev = dst->dev;
1162 		struct inet6_dev *idev;
1163 
1164 		rcu_read_lock();
1165 		idev = __in6_dev_get(dev);
1166 		if (idev)
1167 			hoplimit = idev->cnf.hop_limit;
1168 		else
1169 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1170 		rcu_read_unlock();
1171 	}
1172 	return hoplimit;
1173 }
1174 EXPORT_SYMBOL(ip6_dst_hoplimit);
1175 
1176 /*
1177  *
1178  */
1179 
ip6_route_add(struct fib6_config * cfg)1180 int ip6_route_add(struct fib6_config *cfg)
1181 {
1182 	int err;
1183 	struct net *net = cfg->fc_nlinfo.nl_net;
1184 	struct rt6_info *rt = NULL;
1185 	struct net_device *dev = NULL;
1186 	struct inet6_dev *idev = NULL;
1187 	struct fib6_table *table;
1188 	int addr_type;
1189 
1190 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1191 		return -EINVAL;
1192 #ifndef CONFIG_IPV6_SUBTREES
1193 	if (cfg->fc_src_len)
1194 		return -EINVAL;
1195 #endif
1196 	if (cfg->fc_ifindex) {
1197 		err = -ENODEV;
1198 		dev = dev_get_by_index(net, cfg->fc_ifindex);
1199 		if (!dev)
1200 			goto out;
1201 		idev = in6_dev_get(dev);
1202 		if (!idev)
1203 			goto out;
1204 	}
1205 
1206 	if (cfg->fc_metric == 0)
1207 		cfg->fc_metric = IP6_RT_PRIO_USER;
1208 
1209 	table = fib6_new_table(net, cfg->fc_table);
1210 	if (table == NULL) {
1211 		err = -ENOBUFS;
1212 		goto out;
1213 	}
1214 
1215 	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1216 
1217 	if (rt == NULL) {
1218 		err = -ENOMEM;
1219 		goto out;
1220 	}
1221 
1222 	rt->dst.obsolete = -1;
1223 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1224 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1225 				0;
1226 
1227 	if (cfg->fc_protocol == RTPROT_UNSPEC)
1228 		cfg->fc_protocol = RTPROT_BOOT;
1229 	rt->rt6i_protocol = cfg->fc_protocol;
1230 
1231 	addr_type = ipv6_addr_type(&cfg->fc_dst);
1232 
1233 	if (addr_type & IPV6_ADDR_MULTICAST)
1234 		rt->dst.input = ip6_mc_input;
1235 	else if (cfg->fc_flags & RTF_LOCAL)
1236 		rt->dst.input = ip6_input;
1237 	else
1238 		rt->dst.input = ip6_forward;
1239 
1240 	rt->dst.output = ip6_output;
1241 
1242 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1243 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1244 	if (rt->rt6i_dst.plen == 128)
1245 	       rt->dst.flags = DST_HOST;
1246 
1247 #ifdef CONFIG_IPV6_SUBTREES
1248 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1249 	rt->rt6i_src.plen = cfg->fc_src_len;
1250 #endif
1251 
1252 	rt->rt6i_metric = cfg->fc_metric;
1253 
1254 	/* We cannot add true routes via loopback here,
1255 	   they would result in kernel looping; promote them to reject routes
1256 	 */
1257 	if ((cfg->fc_flags & RTF_REJECT) ||
1258 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1259 					      && !(cfg->fc_flags&RTF_LOCAL))) {
1260 		/* hold loopback dev/idev if we haven't done so. */
1261 		if (dev != net->loopback_dev) {
1262 			if (dev) {
1263 				dev_put(dev);
1264 				in6_dev_put(idev);
1265 			}
1266 			dev = net->loopback_dev;
1267 			dev_hold(dev);
1268 			idev = in6_dev_get(dev);
1269 			if (!idev) {
1270 				err = -ENODEV;
1271 				goto out;
1272 			}
1273 		}
1274 		rt->dst.output = ip6_pkt_discard_out;
1275 		rt->dst.input = ip6_pkt_discard;
1276 		rt->dst.error = -ENETUNREACH;
1277 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1278 		goto install_route;
1279 	}
1280 
1281 	if (cfg->fc_flags & RTF_GATEWAY) {
1282 		struct in6_addr *gw_addr;
1283 		int gwa_type;
1284 
1285 		gw_addr = &cfg->fc_gateway;
1286 		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1287 		gwa_type = ipv6_addr_type(gw_addr);
1288 
1289 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1290 			struct rt6_info *grt;
1291 
1292 			/* IPv6 strictly inhibits using not link-local
1293 			   addresses as nexthop address.
1294 			   Otherwise, router will not able to send redirects.
1295 			   It is very good, but in some (rare!) circumstances
1296 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
1297 			   some exceptions. --ANK
1298 			 */
1299 			err = -EINVAL;
1300 			if (!(gwa_type&IPV6_ADDR_UNICAST))
1301 				goto out;
1302 
1303 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1304 
1305 			err = -EHOSTUNREACH;
1306 			if (grt == NULL)
1307 				goto out;
1308 			if (dev) {
1309 				if (dev != grt->rt6i_dev) {
1310 					dst_release(&grt->dst);
1311 					goto out;
1312 				}
1313 			} else {
1314 				dev = grt->rt6i_dev;
1315 				idev = grt->rt6i_idev;
1316 				dev_hold(dev);
1317 				in6_dev_hold(grt->rt6i_idev);
1318 			}
1319 			if (!(grt->rt6i_flags&RTF_GATEWAY))
1320 				err = 0;
1321 			dst_release(&grt->dst);
1322 
1323 			if (err)
1324 				goto out;
1325 		}
1326 		err = -EINVAL;
1327 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1328 			goto out;
1329 	}
1330 
1331 	err = -ENODEV;
1332 	if (dev == NULL)
1333 		goto out;
1334 
1335 	if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1336 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1337 		if (IS_ERR(rt->rt6i_nexthop)) {
1338 			err = PTR_ERR(rt->rt6i_nexthop);
1339 			rt->rt6i_nexthop = NULL;
1340 			goto out;
1341 		}
1342 	}
1343 
1344 	rt->rt6i_flags = cfg->fc_flags;
1345 
1346 install_route:
1347 	if (cfg->fc_mx) {
1348 		struct nlattr *nla;
1349 		int remaining;
1350 
1351 		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1352 			int type = nla_type(nla);
1353 
1354 			if (type) {
1355 				if (type > RTAX_MAX) {
1356 					err = -EINVAL;
1357 					goto out;
1358 				}
1359 
1360 				dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1361 			}
1362 		}
1363 	}
1364 
1365 	rt->dst.dev = dev;
1366 	rt->rt6i_idev = idev;
1367 	rt->rt6i_table = table;
1368 
1369 	cfg->fc_nlinfo.nl_net = dev_net(dev);
1370 
1371 	return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1372 
1373 out:
1374 	if (dev)
1375 		dev_put(dev);
1376 	if (idev)
1377 		in6_dev_put(idev);
1378 	if (rt)
1379 		dst_free(&rt->dst);
1380 	return err;
1381 }
1382 
__ip6_del_rt(struct rt6_info * rt,struct nl_info * info)1383 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1384 {
1385 	int err;
1386 	struct fib6_table *table;
1387 	struct net *net = dev_net(rt->rt6i_dev);
1388 
1389 	if (rt == net->ipv6.ip6_null_entry)
1390 		return -ENOENT;
1391 
1392 	table = rt->rt6i_table;
1393 	write_lock_bh(&table->tb6_lock);
1394 
1395 	err = fib6_del(rt, info);
1396 	dst_release(&rt->dst);
1397 
1398 	write_unlock_bh(&table->tb6_lock);
1399 
1400 	return err;
1401 }
1402 
ip6_del_rt(struct rt6_info * rt)1403 int ip6_del_rt(struct rt6_info *rt)
1404 {
1405 	struct nl_info info = {
1406 		.nl_net = dev_net(rt->rt6i_dev),
1407 	};
1408 	return __ip6_del_rt(rt, &info);
1409 }
1410 
ip6_route_del(struct fib6_config * cfg)1411 static int ip6_route_del(struct fib6_config *cfg)
1412 {
1413 	struct fib6_table *table;
1414 	struct fib6_node *fn;
1415 	struct rt6_info *rt;
1416 	int err = -ESRCH;
1417 
1418 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1419 	if (table == NULL)
1420 		return err;
1421 
1422 	read_lock_bh(&table->tb6_lock);
1423 
1424 	fn = fib6_locate(&table->tb6_root,
1425 			 &cfg->fc_dst, cfg->fc_dst_len,
1426 			 &cfg->fc_src, cfg->fc_src_len);
1427 
1428 	if (fn) {
1429 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1430 			if (cfg->fc_ifindex &&
1431 			    (rt->rt6i_dev == NULL ||
1432 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1433 				continue;
1434 			if (cfg->fc_flags & RTF_GATEWAY &&
1435 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1436 				continue;
1437 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1438 				continue;
1439 			dst_hold(&rt->dst);
1440 			read_unlock_bh(&table->tb6_lock);
1441 
1442 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1443 		}
1444 	}
1445 	read_unlock_bh(&table->tb6_lock);
1446 
1447 	return err;
1448 }
1449 
1450 /*
1451  *	Handle redirects
1452  */
1453 struct ip6rd_flowi {
1454 	struct flowi6 fl6;
1455 	struct in6_addr gateway;
1456 };
1457 
__ip6_route_redirect(struct net * net,struct fib6_table * table,struct flowi6 * fl6,int flags)1458 static struct rt6_info *__ip6_route_redirect(struct net *net,
1459 					     struct fib6_table *table,
1460 					     struct flowi6 *fl6,
1461 					     int flags)
1462 {
1463 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1464 	struct rt6_info *rt;
1465 	struct fib6_node *fn;
1466 
1467 	/*
1468 	 * Get the "current" route for this destination and
1469 	 * check if the redirect has come from approriate router.
1470 	 *
1471 	 * RFC 2461 specifies that redirects should only be
1472 	 * accepted if they come from the nexthop to the target.
1473 	 * Due to the way the routes are chosen, this notion
1474 	 * is a bit fuzzy and one might need to check all possible
1475 	 * routes.
1476 	 */
1477 
1478 	read_lock_bh(&table->tb6_lock);
1479 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1480 restart:
1481 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1482 		/*
1483 		 * Current route is on-link; redirect is always invalid.
1484 		 *
1485 		 * Seems, previous statement is not true. It could
1486 		 * be node, which looks for us as on-link (f.e. proxy ndisc)
1487 		 * But then router serving it might decide, that we should
1488 		 * know truth 8)8) --ANK (980726).
1489 		 */
1490 		if (rt6_check_expired(rt))
1491 			continue;
1492 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1493 			continue;
1494 		if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1495 			continue;
1496 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1497 			continue;
1498 		break;
1499 	}
1500 
1501 	if (!rt)
1502 		rt = net->ipv6.ip6_null_entry;
1503 	BACKTRACK(net, &fl6->saddr);
1504 out:
1505 	dst_hold(&rt->dst);
1506 
1507 	read_unlock_bh(&table->tb6_lock);
1508 
1509 	return rt;
1510 };
1511 
ip6_route_redirect(struct in6_addr * dest,struct in6_addr * src,struct in6_addr * gateway,struct net_device * dev)1512 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1513 					   struct in6_addr *src,
1514 					   struct in6_addr *gateway,
1515 					   struct net_device *dev)
1516 {
1517 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1518 	struct net *net = dev_net(dev);
1519 	struct ip6rd_flowi rdfl = {
1520 		.fl6 = {
1521 			.flowi6_oif = dev->ifindex,
1522 			.daddr = *dest,
1523 			.saddr = *src,
1524 		},
1525 	};
1526 
1527 	ipv6_addr_copy(&rdfl.gateway, gateway);
1528 
1529 	if (rt6_need_strict(dest))
1530 		flags |= RT6_LOOKUP_F_IFACE;
1531 
1532 	return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1533 						   flags, __ip6_route_redirect);
1534 }
1535 
rt6_redirect(struct in6_addr * dest,struct in6_addr * src,struct in6_addr * saddr,struct neighbour * neigh,u8 * lladdr,int on_link)1536 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1537 		  struct in6_addr *saddr,
1538 		  struct neighbour *neigh, u8 *lladdr, int on_link)
1539 {
1540 	struct rt6_info *rt, *nrt = NULL;
1541 	struct netevent_redirect netevent;
1542 	struct net *net = dev_net(neigh->dev);
1543 
1544 	rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1545 
1546 	if (rt == net->ipv6.ip6_null_entry) {
1547 		if (net_ratelimit())
1548 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1549 			       "for redirect target\n");
1550 		goto out;
1551 	}
1552 
1553 	/*
1554 	 *	We have finally decided to accept it.
1555 	 */
1556 
1557 	neigh_update(neigh, lladdr, NUD_STALE,
1558 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
1559 		     NEIGH_UPDATE_F_OVERRIDE|
1560 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1561 				     NEIGH_UPDATE_F_ISROUTER))
1562 		     );
1563 
1564 	/*
1565 	 * Redirect received -> path was valid.
1566 	 * Look, redirects are sent only in response to data packets,
1567 	 * so that this nexthop apparently is reachable. --ANK
1568 	 */
1569 	dst_confirm(&rt->dst);
1570 
1571 	/* Duplicate redirect: silently ignore. */
1572 	if (neigh == rt->dst.neighbour)
1573 		goto out;
1574 
1575 	nrt = ip6_rt_copy(rt);
1576 	if (nrt == NULL)
1577 		goto out;
1578 
1579 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1580 	if (on_link)
1581 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1582 
1583 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1584 	nrt->rt6i_dst.plen = 128;
1585 	nrt->dst.flags |= DST_HOST;
1586 
1587 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1588 	nrt->rt6i_nexthop = neigh_clone(neigh);
1589 
1590 	if (ip6_ins_rt(nrt))
1591 		goto out;
1592 
1593 	netevent.old = &rt->dst;
1594 	netevent.new = &nrt->dst;
1595 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1596 
1597 	if (rt->rt6i_flags&RTF_CACHE) {
1598 		ip6_del_rt(rt);
1599 		return;
1600 	}
1601 
1602 out:
1603 	dst_release(&rt->dst);
1604 }
1605 
1606 /*
1607  *	Handle ICMP "packet too big" messages
1608  *	i.e. Path MTU discovery
1609  */
1610 
rt6_do_pmtu_disc(struct in6_addr * daddr,struct in6_addr * saddr,struct net * net,u32 pmtu,int ifindex)1611 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1612 			     struct net *net, u32 pmtu, int ifindex)
1613 {
1614 	struct rt6_info *rt, *nrt;
1615 	int allfrag = 0;
1616 again:
1617 	rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1618 	if (rt == NULL)
1619 		return;
1620 
1621 	if (rt6_check_expired(rt)) {
1622 		ip6_del_rt(rt);
1623 		goto again;
1624 	}
1625 
1626 	if (pmtu >= dst_mtu(&rt->dst))
1627 		goto out;
1628 
1629 	if (pmtu < IPV6_MIN_MTU) {
1630 		/*
1631 		 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1632 		 * MTU (1280) and a fragment header should always be included
1633 		 * after a node receiving Too Big message reporting PMTU is
1634 		 * less than the IPv6 Minimum Link MTU.
1635 		 */
1636 		pmtu = IPV6_MIN_MTU;
1637 		allfrag = 1;
1638 	}
1639 
1640 	/* New mtu received -> path was valid.
1641 	   They are sent only in response to data packets,
1642 	   so that this nexthop apparently is reachable. --ANK
1643 	 */
1644 	dst_confirm(&rt->dst);
1645 
1646 	/* Host route. If it is static, it would be better
1647 	   not to override it, but add new one, so that
1648 	   when cache entry will expire old pmtu
1649 	   would return automatically.
1650 	 */
1651 	if (rt->rt6i_flags & RTF_CACHE) {
1652 		dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1653 		if (allfrag) {
1654 			u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1655 			features |= RTAX_FEATURE_ALLFRAG;
1656 			dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1657 		}
1658 		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1659 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1660 		goto out;
1661 	}
1662 
1663 	/* Network route.
1664 	   Two cases are possible:
1665 	   1. It is connected route. Action: COW
1666 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1667 	 */
1668 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1669 		nrt = rt6_alloc_cow(rt, daddr, saddr);
1670 	else
1671 		nrt = rt6_alloc_clone(rt, daddr);
1672 
1673 	if (nrt) {
1674 		dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1675 		if (allfrag) {
1676 			u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1677 			features |= RTAX_FEATURE_ALLFRAG;
1678 			dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1679 		}
1680 
1681 		/* According to RFC 1981, detecting PMTU increase shouldn't be
1682 		 * happened within 5 mins, the recommended timer is 10 mins.
1683 		 * Here this route expiration time is set to ip6_rt_mtu_expires
1684 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
1685 		 * and detecting PMTU increase will be automatically happened.
1686 		 */
1687 		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1688 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1689 
1690 		ip6_ins_rt(nrt);
1691 	}
1692 out:
1693 	dst_release(&rt->dst);
1694 }
1695 
rt6_pmtu_discovery(struct in6_addr * daddr,struct in6_addr * saddr,struct net_device * dev,u32 pmtu)1696 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1697 			struct net_device *dev, u32 pmtu)
1698 {
1699 	struct net *net = dev_net(dev);
1700 
1701 	/*
1702 	 * RFC 1981 states that a node "MUST reduce the size of the packets it
1703 	 * is sending along the path" that caused the Packet Too Big message.
1704 	 * Since it's not possible in the general case to determine which
1705 	 * interface was used to send the original packet, we update the MTU
1706 	 * on the interface that will be used to send future packets. We also
1707 	 * update the MTU on the interface that received the Packet Too Big in
1708 	 * case the original packet was forced out that interface with
1709 	 * SO_BINDTODEVICE or similar. This is the next best thing to the
1710 	 * correct behaviour, which would be to update the MTU on all
1711 	 * interfaces.
1712 	 */
1713 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1714 	rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1715 }
1716 
1717 /*
1718  *	Misc support functions
1719  */
1720 
ip6_rt_copy(struct rt6_info * ort)1721 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1722 {
1723 	struct net *net = dev_net(ort->rt6i_dev);
1724 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1725 
1726 	if (rt) {
1727 		rt->dst.input = ort->dst.input;
1728 		rt->dst.output = ort->dst.output;
1729 
1730 		dst_copy_metrics(&rt->dst, &ort->dst);
1731 		rt->dst.error = ort->dst.error;
1732 		rt->dst.dev = ort->dst.dev;
1733 		if (rt->dst.dev)
1734 			dev_hold(rt->dst.dev);
1735 		rt->rt6i_idev = ort->rt6i_idev;
1736 		if (rt->rt6i_idev)
1737 			in6_dev_hold(rt->rt6i_idev);
1738 		rt->dst.lastuse = jiffies;
1739 		rt->rt6i_expires = 0;
1740 
1741 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1742 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1743 		rt->rt6i_metric = 0;
1744 
1745 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1746 #ifdef CONFIG_IPV6_SUBTREES
1747 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1748 #endif
1749 		rt->rt6i_table = ort->rt6i_table;
1750 	}
1751 	return rt;
1752 }
1753 
1754 #ifdef CONFIG_IPV6_ROUTE_INFO
rt6_get_route_info(struct net * net,struct in6_addr * prefix,int prefixlen,struct in6_addr * gwaddr,int ifindex)1755 static struct rt6_info *rt6_get_route_info(struct net *net,
1756 					   struct in6_addr *prefix, int prefixlen,
1757 					   struct in6_addr *gwaddr, int ifindex)
1758 {
1759 	struct fib6_node *fn;
1760 	struct rt6_info *rt = NULL;
1761 	struct fib6_table *table;
1762 
1763 	table = fib6_get_table(net, RT6_TABLE_INFO);
1764 	if (table == NULL)
1765 		return NULL;
1766 
1767 	write_lock_bh(&table->tb6_lock);
1768 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1769 	if (!fn)
1770 		goto out;
1771 
1772 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1773 		if (rt->rt6i_dev->ifindex != ifindex)
1774 			continue;
1775 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1776 			continue;
1777 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1778 			continue;
1779 		dst_hold(&rt->dst);
1780 		break;
1781 	}
1782 out:
1783 	write_unlock_bh(&table->tb6_lock);
1784 	return rt;
1785 }
1786 
rt6_add_route_info(struct net * net,struct in6_addr * prefix,int prefixlen,struct in6_addr * gwaddr,int ifindex,unsigned pref)1787 static struct rt6_info *rt6_add_route_info(struct net *net,
1788 					   struct in6_addr *prefix, int prefixlen,
1789 					   struct in6_addr *gwaddr, int ifindex,
1790 					   unsigned pref)
1791 {
1792 	struct fib6_config cfg = {
1793 		.fc_table	= RT6_TABLE_INFO,
1794 		.fc_metric	= IP6_RT_PRIO_USER,
1795 		.fc_ifindex	= ifindex,
1796 		.fc_dst_len	= prefixlen,
1797 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1798 				  RTF_UP | RTF_PREF(pref),
1799 		.fc_nlinfo.pid = 0,
1800 		.fc_nlinfo.nlh = NULL,
1801 		.fc_nlinfo.nl_net = net,
1802 	};
1803 
1804 	ipv6_addr_copy(&cfg.fc_dst, prefix);
1805 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1806 
1807 	/* We should treat it as a default route if prefix length is 0. */
1808 	if (!prefixlen)
1809 		cfg.fc_flags |= RTF_DEFAULT;
1810 
1811 	ip6_route_add(&cfg);
1812 
1813 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1814 }
1815 #endif
1816 
rt6_get_dflt_router(struct in6_addr * addr,struct net_device * dev)1817 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1818 {
1819 	struct rt6_info *rt;
1820 	struct fib6_table *table;
1821 
1822 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1823 	if (table == NULL)
1824 		return NULL;
1825 
1826 	write_lock_bh(&table->tb6_lock);
1827 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1828 		if (dev == rt->rt6i_dev &&
1829 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1830 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
1831 			break;
1832 	}
1833 	if (rt)
1834 		dst_hold(&rt->dst);
1835 	write_unlock_bh(&table->tb6_lock);
1836 	return rt;
1837 }
1838 
rt6_add_dflt_router(struct in6_addr * gwaddr,struct net_device * dev,unsigned int pref)1839 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1840 				     struct net_device *dev,
1841 				     unsigned int pref)
1842 {
1843 	struct fib6_config cfg = {
1844 		.fc_table	= RT6_TABLE_DFLT,
1845 		.fc_metric	= IP6_RT_PRIO_USER,
1846 		.fc_ifindex	= dev->ifindex,
1847 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1848 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1849 		.fc_nlinfo.pid = 0,
1850 		.fc_nlinfo.nlh = NULL,
1851 		.fc_nlinfo.nl_net = dev_net(dev),
1852 	};
1853 
1854 	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1855 
1856 	ip6_route_add(&cfg);
1857 
1858 	return rt6_get_dflt_router(gwaddr, dev);
1859 }
1860 
rt6_purge_dflt_routers(struct net * net)1861 void rt6_purge_dflt_routers(struct net *net)
1862 {
1863 	struct rt6_info *rt;
1864 	struct fib6_table *table;
1865 
1866 	/* NOTE: Keep consistent with rt6_get_dflt_router */
1867 	table = fib6_get_table(net, RT6_TABLE_DFLT);
1868 	if (table == NULL)
1869 		return;
1870 
1871 restart:
1872 	read_lock_bh(&table->tb6_lock);
1873 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1874 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1875 			dst_hold(&rt->dst);
1876 			read_unlock_bh(&table->tb6_lock);
1877 			ip6_del_rt(rt);
1878 			goto restart;
1879 		}
1880 	}
1881 	read_unlock_bh(&table->tb6_lock);
1882 }
1883 
rtmsg_to_fib6_config(struct net * net,struct in6_rtmsg * rtmsg,struct fib6_config * cfg)1884 static void rtmsg_to_fib6_config(struct net *net,
1885 				 struct in6_rtmsg *rtmsg,
1886 				 struct fib6_config *cfg)
1887 {
1888 	memset(cfg, 0, sizeof(*cfg));
1889 
1890 	cfg->fc_table = RT6_TABLE_MAIN;
1891 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1892 	cfg->fc_metric = rtmsg->rtmsg_metric;
1893 	cfg->fc_expires = rtmsg->rtmsg_info;
1894 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1895 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
1896 	cfg->fc_flags = rtmsg->rtmsg_flags;
1897 
1898 	cfg->fc_nlinfo.nl_net = net;
1899 
1900 	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1901 	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1902 	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1903 }
1904 
ipv6_route_ioctl(struct net * net,unsigned int cmd,void __user * arg)1905 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1906 {
1907 	struct fib6_config cfg;
1908 	struct in6_rtmsg rtmsg;
1909 	int err;
1910 
1911 	switch(cmd) {
1912 	case SIOCADDRT:		/* Add a route */
1913 	case SIOCDELRT:		/* Delete a route */
1914 		if (!capable(CAP_NET_ADMIN))
1915 			return -EPERM;
1916 		err = copy_from_user(&rtmsg, arg,
1917 				     sizeof(struct in6_rtmsg));
1918 		if (err)
1919 			return -EFAULT;
1920 
1921 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1922 
1923 		rtnl_lock();
1924 		switch (cmd) {
1925 		case SIOCADDRT:
1926 			err = ip6_route_add(&cfg);
1927 			break;
1928 		case SIOCDELRT:
1929 			err = ip6_route_del(&cfg);
1930 			break;
1931 		default:
1932 			err = -EINVAL;
1933 		}
1934 		rtnl_unlock();
1935 
1936 		return err;
1937 	}
1938 
1939 	return -EINVAL;
1940 }
1941 
1942 /*
1943  *	Drop the packet on the floor
1944  */
1945 
ip6_pkt_drop(struct sk_buff * skb,u8 code,int ipstats_mib_noroutes)1946 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1947 {
1948 	int type;
1949 	struct dst_entry *dst = skb_dst(skb);
1950 	switch (ipstats_mib_noroutes) {
1951 	case IPSTATS_MIB_INNOROUTES:
1952 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1953 		if (type == IPV6_ADDR_ANY) {
1954 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1955 				      IPSTATS_MIB_INADDRERRORS);
1956 			break;
1957 		}
1958 		/* FALLTHROUGH */
1959 	case IPSTATS_MIB_OUTNOROUTES:
1960 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1961 			      ipstats_mib_noroutes);
1962 		break;
1963 	}
1964 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1965 	kfree_skb(skb);
1966 	return 0;
1967 }
1968 
ip6_pkt_discard(struct sk_buff * skb)1969 static int ip6_pkt_discard(struct sk_buff *skb)
1970 {
1971 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1972 }
1973 
ip6_pkt_discard_out(struct sk_buff * skb)1974 static int ip6_pkt_discard_out(struct sk_buff *skb)
1975 {
1976 	skb->dev = skb_dst(skb)->dev;
1977 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1978 }
1979 
1980 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1981 
ip6_pkt_prohibit(struct sk_buff * skb)1982 static int ip6_pkt_prohibit(struct sk_buff *skb)
1983 {
1984 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1985 }
1986 
ip6_pkt_prohibit_out(struct sk_buff * skb)1987 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1988 {
1989 	skb->dev = skb_dst(skb)->dev;
1990 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1991 }
1992 
1993 #endif
1994 
1995 /*
1996  *	Allocate a dst for local (unicast / anycast) address.
1997  */
1998 
addrconf_dst_alloc(struct inet6_dev * idev,const struct in6_addr * addr,int anycast)1999 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2000 				    const struct in6_addr *addr,
2001 				    int anycast)
2002 {
2003 	struct net *net = dev_net(idev->dev);
2004 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
2005 	struct neighbour *neigh;
2006 
2007 	if (rt == NULL) {
2008 		if (net_ratelimit())
2009 			pr_warning("IPv6:  Maximum number of routes reached,"
2010 				   " consider increasing route/max_size.\n");
2011 		return ERR_PTR(-ENOMEM);
2012 	}
2013 
2014 	dev_hold(net->loopback_dev);
2015 	in6_dev_hold(idev);
2016 
2017 	rt->dst.flags = DST_HOST;
2018 	rt->dst.input = ip6_input;
2019 	rt->dst.output = ip6_output;
2020 	rt->rt6i_dev = net->loopback_dev;
2021 	rt->rt6i_idev = idev;
2022 	rt->dst.obsolete = -1;
2023 
2024 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2025 	if (anycast)
2026 		rt->rt6i_flags |= RTF_ANYCAST;
2027 	else
2028 		rt->rt6i_flags |= RTF_LOCAL;
2029 	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2030 	if (IS_ERR(neigh)) {
2031 		dst_free(&rt->dst);
2032 
2033 		return ERR_CAST(neigh);
2034 	}
2035 	rt->rt6i_nexthop = neigh;
2036 
2037 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2038 	rt->rt6i_dst.plen = 128;
2039 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2040 
2041 	atomic_set(&rt->dst.__refcnt, 1);
2042 
2043 	return rt;
2044 }
2045 
2046 struct arg_dev_net {
2047 	struct net_device *dev;
2048 	struct net *net;
2049 };
2050 
fib6_ifdown(struct rt6_info * rt,void * arg)2051 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2052 {
2053 	const struct arg_dev_net *adn = arg;
2054 	const struct net_device *dev = adn->dev;
2055 
2056 	if ((rt->rt6i_dev == dev || dev == NULL) &&
2057 	    rt != adn->net->ipv6.ip6_null_entry) {
2058 		RT6_TRACE("deleted by ifdown %p\n", rt);
2059 		return -1;
2060 	}
2061 	return 0;
2062 }
2063 
rt6_ifdown(struct net * net,struct net_device * dev)2064 void rt6_ifdown(struct net *net, struct net_device *dev)
2065 {
2066 	struct arg_dev_net adn = {
2067 		.dev = dev,
2068 		.net = net,
2069 	};
2070 
2071 	fib6_clean_all(net, fib6_ifdown, 0, &adn);
2072 	icmp6_clean_all(fib6_ifdown, &adn);
2073 }
2074 
2075 struct rt6_mtu_change_arg
2076 {
2077 	struct net_device *dev;
2078 	unsigned mtu;
2079 };
2080 
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)2081 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2082 {
2083 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2084 	struct inet6_dev *idev;
2085 
2086 	/* In IPv6 pmtu discovery is not optional,
2087 	   so that RTAX_MTU lock cannot disable it.
2088 	   We still use this lock to block changes
2089 	   caused by addrconf/ndisc.
2090 	*/
2091 
2092 	idev = __in6_dev_get(arg->dev);
2093 	if (idev == NULL)
2094 		return 0;
2095 
2096 	/* For administrative MTU increase, there is no way to discover
2097 	   IPv6 PMTU increase, so PMTU increase should be updated here.
2098 	   Since RFC 1981 doesn't include administrative MTU increase
2099 	   update PMTU increase is a MUST. (i.e. jumbo frame)
2100 	 */
2101 	/*
2102 	   If new MTU is less than route PMTU, this new MTU will be the
2103 	   lowest MTU in the path, update the route PMTU to reflect PMTU
2104 	   decreases; if new MTU is greater than route PMTU, and the
2105 	   old MTU is the lowest MTU in the path, update the route PMTU
2106 	   to reflect the increase. In this case if the other nodes' MTU
2107 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
2108 	   PMTU discouvery.
2109 	 */
2110 	if (rt->rt6i_dev == arg->dev &&
2111 	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2112 	    (dst_mtu(&rt->dst) >= arg->mtu ||
2113 	     (dst_mtu(&rt->dst) < arg->mtu &&
2114 	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2115 		dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2116 	}
2117 	return 0;
2118 }
2119 
rt6_mtu_change(struct net_device * dev,unsigned mtu)2120 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2121 {
2122 	struct rt6_mtu_change_arg arg = {
2123 		.dev = dev,
2124 		.mtu = mtu,
2125 	};
2126 
2127 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2128 }
2129 
2130 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2131 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
2132 	[RTA_OIF]               = { .type = NLA_U32 },
2133 	[RTA_IIF]		= { .type = NLA_U32 },
2134 	[RTA_PRIORITY]          = { .type = NLA_U32 },
2135 	[RTA_METRICS]           = { .type = NLA_NESTED },
2136 };
2137 
rtm_to_fib6_config(struct sk_buff * skb,struct nlmsghdr * nlh,struct fib6_config * cfg)2138 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2139 			      struct fib6_config *cfg)
2140 {
2141 	struct rtmsg *rtm;
2142 	struct nlattr *tb[RTA_MAX+1];
2143 	int err;
2144 
2145 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2146 	if (err < 0)
2147 		goto errout;
2148 
2149 	err = -EINVAL;
2150 	rtm = nlmsg_data(nlh);
2151 	memset(cfg, 0, sizeof(*cfg));
2152 
2153 	cfg->fc_table = rtm->rtm_table;
2154 	cfg->fc_dst_len = rtm->rtm_dst_len;
2155 	cfg->fc_src_len = rtm->rtm_src_len;
2156 	cfg->fc_flags = RTF_UP;
2157 	cfg->fc_protocol = rtm->rtm_protocol;
2158 
2159 	if (rtm->rtm_type == RTN_UNREACHABLE)
2160 		cfg->fc_flags |= RTF_REJECT;
2161 
2162 	if (rtm->rtm_type == RTN_LOCAL)
2163 		cfg->fc_flags |= RTF_LOCAL;
2164 
2165 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2166 	cfg->fc_nlinfo.nlh = nlh;
2167 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2168 
2169 	if (tb[RTA_GATEWAY]) {
2170 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2171 		cfg->fc_flags |= RTF_GATEWAY;
2172 	}
2173 
2174 	if (tb[RTA_DST]) {
2175 		int plen = (rtm->rtm_dst_len + 7) >> 3;
2176 
2177 		if (nla_len(tb[RTA_DST]) < plen)
2178 			goto errout;
2179 
2180 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2181 	}
2182 
2183 	if (tb[RTA_SRC]) {
2184 		int plen = (rtm->rtm_src_len + 7) >> 3;
2185 
2186 		if (nla_len(tb[RTA_SRC]) < plen)
2187 			goto errout;
2188 
2189 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2190 	}
2191 
2192 	if (tb[RTA_OIF])
2193 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2194 
2195 	if (tb[RTA_PRIORITY])
2196 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2197 
2198 	if (tb[RTA_METRICS]) {
2199 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2200 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2201 	}
2202 
2203 	if (tb[RTA_TABLE])
2204 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2205 
2206 	err = 0;
2207 errout:
2208 	return err;
2209 }
2210 
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)2211 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2212 {
2213 	struct fib6_config cfg;
2214 	int err;
2215 
2216 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2217 	if (err < 0)
2218 		return err;
2219 
2220 	return ip6_route_del(&cfg);
2221 }
2222 
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)2223 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2224 {
2225 	struct fib6_config cfg;
2226 	int err;
2227 
2228 	err = rtm_to_fib6_config(skb, nlh, &cfg);
2229 	if (err < 0)
2230 		return err;
2231 
2232 	return ip6_route_add(&cfg);
2233 }
2234 
rt6_nlmsg_size(void)2235 static inline size_t rt6_nlmsg_size(void)
2236 {
2237 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2238 	       + nla_total_size(16) /* RTA_SRC */
2239 	       + nla_total_size(16) /* RTA_DST */
2240 	       + nla_total_size(16) /* RTA_GATEWAY */
2241 	       + nla_total_size(16) /* RTA_PREFSRC */
2242 	       + nla_total_size(4) /* RTA_TABLE */
2243 	       + nla_total_size(4) /* RTA_IIF */
2244 	       + nla_total_size(4) /* RTA_OIF */
2245 	       + nla_total_size(4) /* RTA_PRIORITY */
2246 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2247 	       + nla_total_size(sizeof(struct rta_cacheinfo));
2248 }
2249 
rt6_fill_node(struct net * net,struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 pid,u32 seq,int prefix,int nowait,unsigned int flags)2250 static int rt6_fill_node(struct net *net,
2251 			 struct sk_buff *skb, struct rt6_info *rt,
2252 			 struct in6_addr *dst, struct in6_addr *src,
2253 			 int iif, int type, u32 pid, u32 seq,
2254 			 int prefix, int nowait, unsigned int flags)
2255 {
2256 	struct rtmsg *rtm;
2257 	struct nlmsghdr *nlh;
2258 	long expires;
2259 	u32 table;
2260 
2261 	if (prefix) {	/* user wants prefix routes only */
2262 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2263 			/* success since this is not a prefix route */
2264 			return 1;
2265 		}
2266 	}
2267 
2268 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2269 	if (nlh == NULL)
2270 		return -EMSGSIZE;
2271 
2272 	rtm = nlmsg_data(nlh);
2273 	rtm->rtm_family = AF_INET6;
2274 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
2275 	rtm->rtm_src_len = rt->rt6i_src.plen;
2276 	rtm->rtm_tos = 0;
2277 	if (rt->rt6i_table)
2278 		table = rt->rt6i_table->tb6_id;
2279 	else
2280 		table = RT6_TABLE_UNSPEC;
2281 	rtm->rtm_table = table;
2282 	NLA_PUT_U32(skb, RTA_TABLE, table);
2283 	if (rt->rt6i_flags&RTF_REJECT)
2284 		rtm->rtm_type = RTN_UNREACHABLE;
2285 	else if (rt->rt6i_flags&RTF_LOCAL)
2286 		rtm->rtm_type = RTN_LOCAL;
2287 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2288 		rtm->rtm_type = RTN_LOCAL;
2289 	else
2290 		rtm->rtm_type = RTN_UNICAST;
2291 	rtm->rtm_flags = 0;
2292 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2293 	rtm->rtm_protocol = rt->rt6i_protocol;
2294 	if (rt->rt6i_flags&RTF_DYNAMIC)
2295 		rtm->rtm_protocol = RTPROT_REDIRECT;
2296 	else if (rt->rt6i_flags & RTF_ADDRCONF)
2297 		rtm->rtm_protocol = RTPROT_KERNEL;
2298 	else if (rt->rt6i_flags&RTF_DEFAULT)
2299 		rtm->rtm_protocol = RTPROT_RA;
2300 
2301 	if (rt->rt6i_flags&RTF_CACHE)
2302 		rtm->rtm_flags |= RTM_F_CLONED;
2303 
2304 	if (dst) {
2305 		NLA_PUT(skb, RTA_DST, 16, dst);
2306 		rtm->rtm_dst_len = 128;
2307 	} else if (rtm->rtm_dst_len)
2308 		NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2309 #ifdef CONFIG_IPV6_SUBTREES
2310 	if (src) {
2311 		NLA_PUT(skb, RTA_SRC, 16, src);
2312 		rtm->rtm_src_len = 128;
2313 	} else if (rtm->rtm_src_len)
2314 		NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2315 #endif
2316 	if (iif) {
2317 #ifdef CONFIG_IPV6_MROUTE
2318 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2319 			int err = ip6mr_get_route(net, skb, rtm, nowait);
2320 			if (err <= 0) {
2321 				if (!nowait) {
2322 					if (err == 0)
2323 						return 0;
2324 					goto nla_put_failure;
2325 				} else {
2326 					if (err == -EMSGSIZE)
2327 						goto nla_put_failure;
2328 				}
2329 			}
2330 		} else
2331 #endif
2332 			NLA_PUT_U32(skb, RTA_IIF, iif);
2333 	} else if (dst) {
2334 		struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2335 		struct in6_addr saddr_buf;
2336 		if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2337 				       dst, 0, &saddr_buf) == 0)
2338 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2339 	}
2340 
2341 	if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2342 		goto nla_put_failure;
2343 
2344 	if (rt->dst.neighbour)
2345 		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2346 
2347 	if (rt->dst.dev)
2348 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2349 
2350 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2351 
2352 	if (!(rt->rt6i_flags & RTF_EXPIRES))
2353 		expires = 0;
2354 	else if (rt->rt6i_expires - jiffies < INT_MAX)
2355 		expires = rt->rt6i_expires - jiffies;
2356 	else
2357 		expires = INT_MAX;
2358 
2359 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2360 			       expires, rt->dst.error) < 0)
2361 		goto nla_put_failure;
2362 
2363 	return nlmsg_end(skb, nlh);
2364 
2365 nla_put_failure:
2366 	nlmsg_cancel(skb, nlh);
2367 	return -EMSGSIZE;
2368 }
2369 
rt6_dump_route(struct rt6_info * rt,void * p_arg)2370 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2371 {
2372 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2373 	int prefix;
2374 
2375 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2376 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2377 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2378 	} else
2379 		prefix = 0;
2380 
2381 	return rt6_fill_node(arg->net,
2382 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2383 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2384 		     prefix, 0, NLM_F_MULTI);
2385 }
2386 
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh,void * arg)2387 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2388 {
2389 	struct net *net = sock_net(in_skb->sk);
2390 	struct nlattr *tb[RTA_MAX+1];
2391 	struct rt6_info *rt;
2392 	struct sk_buff *skb;
2393 	struct rtmsg *rtm;
2394 	struct flowi6 fl6;
2395 	int err, iif = 0;
2396 
2397 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2398 	if (err < 0)
2399 		goto errout;
2400 
2401 	err = -EINVAL;
2402 	memset(&fl6, 0, sizeof(fl6));
2403 
2404 	if (tb[RTA_SRC]) {
2405 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2406 			goto errout;
2407 
2408 		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2409 	}
2410 
2411 	if (tb[RTA_DST]) {
2412 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2413 			goto errout;
2414 
2415 		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2416 	}
2417 
2418 	if (tb[RTA_IIF])
2419 		iif = nla_get_u32(tb[RTA_IIF]);
2420 
2421 	if (tb[RTA_OIF])
2422 		fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2423 
2424 	if (iif) {
2425 		struct net_device *dev;
2426 		dev = __dev_get_by_index(net, iif);
2427 		if (!dev) {
2428 			err = -ENODEV;
2429 			goto errout;
2430 		}
2431 	}
2432 
2433 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2434 	if (skb == NULL) {
2435 		err = -ENOBUFS;
2436 		goto errout;
2437 	}
2438 
2439 	/* Reserve room for dummy headers, this skb can pass
2440 	   through good chunk of routing engine.
2441 	 */
2442 	skb_reset_mac_header(skb);
2443 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2444 
2445 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2446 	skb_dst_set(skb, &rt->dst);
2447 
2448 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2449 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2450 			    nlh->nlmsg_seq, 0, 0, 0);
2451 	if (err < 0) {
2452 		kfree_skb(skb);
2453 		goto errout;
2454 	}
2455 
2456 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2457 errout:
2458 	return err;
2459 }
2460 
inet6_rt_notify(int event,struct rt6_info * rt,struct nl_info * info)2461 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2462 {
2463 	struct sk_buff *skb;
2464 	struct net *net = info->nl_net;
2465 	u32 seq;
2466 	int err;
2467 
2468 	err = -ENOBUFS;
2469 	seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2470 
2471 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2472 	if (skb == NULL)
2473 		goto errout;
2474 
2475 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2476 				event, info->pid, seq, 0, 0, 0);
2477 	if (err < 0) {
2478 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2479 		WARN_ON(err == -EMSGSIZE);
2480 		kfree_skb(skb);
2481 		goto errout;
2482 	}
2483 	rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2484 		    info->nlh, gfp_any());
2485 	return;
2486 errout:
2487 	if (err < 0)
2488 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2489 }
2490 
ip6_route_dev_notify(struct notifier_block * this,unsigned long event,void * data)2491 static int ip6_route_dev_notify(struct notifier_block *this,
2492 				unsigned long event, void *data)
2493 {
2494 	struct net_device *dev = (struct net_device *)data;
2495 	struct net *net = dev_net(dev);
2496 
2497 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2498 		net->ipv6.ip6_null_entry->dst.dev = dev;
2499 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2500 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2501 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2502 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2503 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2504 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2505 #endif
2506 	}
2507 
2508 	return NOTIFY_OK;
2509 }
2510 
2511 /*
2512  *	/proc
2513  */
2514 
2515 #ifdef CONFIG_PROC_FS
2516 
2517 struct rt6_proc_arg
2518 {
2519 	char *buffer;
2520 	int offset;
2521 	int length;
2522 	int skip;
2523 	int len;
2524 };
2525 
rt6_info_route(struct rt6_info * rt,void * p_arg)2526 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2527 {
2528 	struct seq_file *m = p_arg;
2529 
2530 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2531 
2532 #ifdef CONFIG_IPV6_SUBTREES
2533 	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2534 #else
2535 	seq_puts(m, "00000000000000000000000000000000 00 ");
2536 #endif
2537 
2538 	if (rt->rt6i_nexthop) {
2539 		seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2540 	} else {
2541 		seq_puts(m, "00000000000000000000000000000000");
2542 	}
2543 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
2544 		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2545 		   rt->dst.__use, rt->rt6i_flags,
2546 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
2547 	return 0;
2548 }
2549 
ipv6_route_show(struct seq_file * m,void * v)2550 static int ipv6_route_show(struct seq_file *m, void *v)
2551 {
2552 	struct net *net = (struct net *)m->private;
2553 	fib6_clean_all(net, rt6_info_route, 0, m);
2554 	return 0;
2555 }
2556 
ipv6_route_open(struct inode * inode,struct file * file)2557 static int ipv6_route_open(struct inode *inode, struct file *file)
2558 {
2559 	return single_open_net(inode, file, ipv6_route_show);
2560 }
2561 
2562 static const struct file_operations ipv6_route_proc_fops = {
2563 	.owner		= THIS_MODULE,
2564 	.open		= ipv6_route_open,
2565 	.read		= seq_read,
2566 	.llseek		= seq_lseek,
2567 	.release	= single_release_net,
2568 };
2569 
rt6_stats_seq_show(struct seq_file * seq,void * v)2570 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2571 {
2572 	struct net *net = (struct net *)seq->private;
2573 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2574 		   net->ipv6.rt6_stats->fib_nodes,
2575 		   net->ipv6.rt6_stats->fib_route_nodes,
2576 		   net->ipv6.rt6_stats->fib_rt_alloc,
2577 		   net->ipv6.rt6_stats->fib_rt_entries,
2578 		   net->ipv6.rt6_stats->fib_rt_cache,
2579 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2580 		   net->ipv6.rt6_stats->fib_discarded_routes);
2581 
2582 	return 0;
2583 }
2584 
rt6_stats_seq_open(struct inode * inode,struct file * file)2585 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2586 {
2587 	return single_open_net(inode, file, rt6_stats_seq_show);
2588 }
2589 
2590 static const struct file_operations rt6_stats_seq_fops = {
2591 	.owner	 = THIS_MODULE,
2592 	.open	 = rt6_stats_seq_open,
2593 	.read	 = seq_read,
2594 	.llseek	 = seq_lseek,
2595 	.release = single_release_net,
2596 };
2597 #endif	/* CONFIG_PROC_FS */
2598 
2599 #ifdef CONFIG_SYSCTL
2600 
2601 static
ipv6_sysctl_rtcache_flush(ctl_table * ctl,int write,void __user * buffer,size_t * lenp,loff_t * ppos)2602 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2603 			      void __user *buffer, size_t *lenp, loff_t *ppos)
2604 {
2605 	struct net *net;
2606 	int delay;
2607 	if (!write)
2608 		return -EINVAL;
2609 
2610 	net = (struct net *)ctl->extra1;
2611 	delay = net->ipv6.sysctl.flush_delay;
2612 	proc_dointvec(ctl, write, buffer, lenp, ppos);
2613 	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2614 	return 0;
2615 }
2616 
2617 ctl_table ipv6_route_table_template[] = {
2618 	{
2619 		.procname	=	"flush",
2620 		.data		=	&init_net.ipv6.sysctl.flush_delay,
2621 		.maxlen		=	sizeof(int),
2622 		.mode		=	0200,
2623 		.proc_handler	=	ipv6_sysctl_rtcache_flush
2624 	},
2625 	{
2626 		.procname	=	"gc_thresh",
2627 		.data		=	&ip6_dst_ops_template.gc_thresh,
2628 		.maxlen		=	sizeof(int),
2629 		.mode		=	0644,
2630 		.proc_handler	=	proc_dointvec,
2631 	},
2632 	{
2633 		.procname	=	"max_size",
2634 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
2635 		.maxlen		=	sizeof(int),
2636 		.mode		=	0644,
2637 		.proc_handler	=	proc_dointvec,
2638 	},
2639 	{
2640 		.procname	=	"gc_min_interval",
2641 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2642 		.maxlen		=	sizeof(int),
2643 		.mode		=	0644,
2644 		.proc_handler	=	proc_dointvec_jiffies,
2645 	},
2646 	{
2647 		.procname	=	"gc_timeout",
2648 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2649 		.maxlen		=	sizeof(int),
2650 		.mode		=	0644,
2651 		.proc_handler	=	proc_dointvec_jiffies,
2652 	},
2653 	{
2654 		.procname	=	"gc_interval",
2655 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
2656 		.maxlen		=	sizeof(int),
2657 		.mode		=	0644,
2658 		.proc_handler	=	proc_dointvec_jiffies,
2659 	},
2660 	{
2661 		.procname	=	"gc_elasticity",
2662 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2663 		.maxlen		=	sizeof(int),
2664 		.mode		=	0644,
2665 		.proc_handler	=	proc_dointvec,
2666 	},
2667 	{
2668 		.procname	=	"mtu_expires",
2669 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2670 		.maxlen		=	sizeof(int),
2671 		.mode		=	0644,
2672 		.proc_handler	=	proc_dointvec_jiffies,
2673 	},
2674 	{
2675 		.procname	=	"min_adv_mss",
2676 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
2677 		.maxlen		=	sizeof(int),
2678 		.mode		=	0644,
2679 		.proc_handler	=	proc_dointvec,
2680 	},
2681 	{
2682 		.procname	=	"gc_min_interval_ms",
2683 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2684 		.maxlen		=	sizeof(int),
2685 		.mode		=	0644,
2686 		.proc_handler	=	proc_dointvec_ms_jiffies,
2687 	},
2688 	{ }
2689 };
2690 
ipv6_route_sysctl_init(struct net * net)2691 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2692 {
2693 	struct ctl_table *table;
2694 
2695 	table = kmemdup(ipv6_route_table_template,
2696 			sizeof(ipv6_route_table_template),
2697 			GFP_KERNEL);
2698 
2699 	if (table) {
2700 		table[0].data = &net->ipv6.sysctl.flush_delay;
2701 		table[0].extra1 = net;
2702 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2703 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2704 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2705 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2706 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2707 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2708 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2709 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2710 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2711 	}
2712 
2713 	return table;
2714 }
2715 #endif
2716 
ip6_route_net_init(struct net * net)2717 static int __net_init ip6_route_net_init(struct net *net)
2718 {
2719 	int ret = -ENOMEM;
2720 
2721 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2722 	       sizeof(net->ipv6.ip6_dst_ops));
2723 
2724 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2725 		goto out_ip6_dst_ops;
2726 
2727 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2728 					   sizeof(*net->ipv6.ip6_null_entry),
2729 					   GFP_KERNEL);
2730 	if (!net->ipv6.ip6_null_entry)
2731 		goto out_ip6_dst_entries;
2732 	net->ipv6.ip6_null_entry->dst.path =
2733 		(struct dst_entry *)net->ipv6.ip6_null_entry;
2734 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2735 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2736 			 ip6_template_metrics, true);
2737 
2738 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2739 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2740 					       sizeof(*net->ipv6.ip6_prohibit_entry),
2741 					       GFP_KERNEL);
2742 	if (!net->ipv6.ip6_prohibit_entry)
2743 		goto out_ip6_null_entry;
2744 	net->ipv6.ip6_prohibit_entry->dst.path =
2745 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2746 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2747 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2748 			 ip6_template_metrics, true);
2749 
2750 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2751 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
2752 					       GFP_KERNEL);
2753 	if (!net->ipv6.ip6_blk_hole_entry)
2754 		goto out_ip6_prohibit_entry;
2755 	net->ipv6.ip6_blk_hole_entry->dst.path =
2756 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2757 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2758 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2759 			 ip6_template_metrics, true);
2760 #endif
2761 
2762 	net->ipv6.sysctl.flush_delay = 0;
2763 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
2764 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2765 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2766 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2767 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2768 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2769 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2770 
2771 #ifdef CONFIG_PROC_FS
2772 	proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2773 	proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2774 #endif
2775 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
2776 
2777 	ret = 0;
2778 out:
2779 	return ret;
2780 
2781 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2782 out_ip6_prohibit_entry:
2783 	kfree(net->ipv6.ip6_prohibit_entry);
2784 out_ip6_null_entry:
2785 	kfree(net->ipv6.ip6_null_entry);
2786 #endif
2787 out_ip6_dst_entries:
2788 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2789 out_ip6_dst_ops:
2790 	goto out;
2791 }
2792 
ip6_route_net_exit(struct net * net)2793 static void __net_exit ip6_route_net_exit(struct net *net)
2794 {
2795 #ifdef CONFIG_PROC_FS
2796 	proc_net_remove(net, "ipv6_route");
2797 	proc_net_remove(net, "rt6_stats");
2798 #endif
2799 	kfree(net->ipv6.ip6_null_entry);
2800 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2801 	kfree(net->ipv6.ip6_prohibit_entry);
2802 	kfree(net->ipv6.ip6_blk_hole_entry);
2803 #endif
2804 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2805 }
2806 
2807 static struct pernet_operations ip6_route_net_ops = {
2808 	.init = ip6_route_net_init,
2809 	.exit = ip6_route_net_exit,
2810 };
2811 
2812 static struct notifier_block ip6_route_dev_notifier = {
2813 	.notifier_call = ip6_route_dev_notify,
2814 	.priority = 0,
2815 };
2816 
ip6_route_init(void)2817 int __init ip6_route_init(void)
2818 {
2819 	int ret;
2820 
2821 	ret = -ENOMEM;
2822 	ip6_dst_ops_template.kmem_cachep =
2823 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2824 				  SLAB_HWCACHE_ALIGN, NULL);
2825 	if (!ip6_dst_ops_template.kmem_cachep)
2826 		goto out;
2827 
2828 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
2829 	if (ret)
2830 		goto out_kmem_cache;
2831 
2832 	ret = register_pernet_subsys(&ip6_route_net_ops);
2833 	if (ret)
2834 		goto out_dst_entries;
2835 
2836 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2837 
2838 	/* Registering of the loopback is done before this portion of code,
2839 	 * the loopback reference in rt6_info will not be taken, do it
2840 	 * manually for init_net */
2841 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2842 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2843   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2844 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2845 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2846 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2847 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2848   #endif
2849 	ret = fib6_init();
2850 	if (ret)
2851 		goto out_register_subsys;
2852 
2853 	ret = xfrm6_init();
2854 	if (ret)
2855 		goto out_fib6_init;
2856 
2857 	ret = fib6_rules_init();
2858 	if (ret)
2859 		goto xfrm6_init;
2860 
2861 	ret = -ENOBUFS;
2862 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2863 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2864 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2865 		goto fib6_rules_init;
2866 
2867 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2868 	if (ret)
2869 		goto fib6_rules_init;
2870 
2871 out:
2872 	return ret;
2873 
2874 fib6_rules_init:
2875 	fib6_rules_cleanup();
2876 xfrm6_init:
2877 	xfrm6_fini();
2878 out_fib6_init:
2879 	fib6_gc_cleanup();
2880 out_register_subsys:
2881 	unregister_pernet_subsys(&ip6_route_net_ops);
2882 out_dst_entries:
2883 	dst_entries_destroy(&ip6_dst_blackhole_ops);
2884 out_kmem_cache:
2885 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2886 	goto out;
2887 }
2888 
ip6_route_cleanup(void)2889 void ip6_route_cleanup(void)
2890 {
2891 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
2892 	fib6_rules_cleanup();
2893 	xfrm6_fini();
2894 	fib6_gc_cleanup();
2895 	unregister_pernet_subsys(&ip6_route_net_ops);
2896 	dst_entries_destroy(&ip6_dst_blackhole_ops);
2897 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2898 }
2899