1 /*
2  *	Linux INET6 implementation
3  *	FIB front-end.
4  *
5  *	Authors:
6  *	Pedro Roque		<pedro_m@yahoo.com>
7  *
8  *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *	This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15 
16 /*	Changes:
17  *
18  *	YOSHIFUJI Hideaki @USAGI
19  *		reworked default router selection.
20  *		- respect outgoing interface
21  *		- select from (probably) reachable routers (i.e.
22  *		routers in REACHABLE, STALE, DELAY or PROBE states).
23  *		- always select the same router if it is (probably)
24  *		reachable.  otherwise, round-robin the list.
25  */
26 
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/route.h>
34 #include <linux/netdevice.h>
35 #include <linux/in6.h>
36 #include <linux/init.h>
37 #include <linux/netlink.h>
38 #include <linux/if_arp.h>
39 
40 #ifdef 	CONFIG_PROC_FS
41 #include <linux/proc_fs.h>
42 #endif
43 
44 #include <net/snmp.h>
45 #include <net/ipv6.h>
46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h>
48 #include <net/ndisc.h>
49 #include <net/addrconf.h>
50 #include <net/tcp.h>
51 #include <linux/rtnetlink.h>
52 
53 #include <asm/uaccess.h>
54 
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 
59 #undef CONFIG_RT6_POLICY
60 
61 /* Set to 3 to get tracing. */
62 #define RT6_DEBUG 2
63 
64 #if RT6_DEBUG >= 3
65 #define RDBG(x) printk x
66 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
67 #else
68 #define RDBG(x)
69 #define RT6_TRACE(x...) do { ; } while (0)
70 #endif
71 
72 
73 int ip6_rt_max_size = 4096;
74 int ip6_rt_gc_min_interval = HZ / 2;
75 int ip6_rt_gc_timeout = 60*HZ;
76 int ip6_rt_gc_interval = 30*HZ;
77 int ip6_rt_gc_elasticity = 9;
78 int ip6_rt_mtu_expires = 10*60*HZ;
79 int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
80 
81 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
82 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
83 static struct dst_entry	*ip6_dst_reroute(struct dst_entry *dst,
84 					 struct sk_buff *skb);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static int		 ip6_dst_gc(void);
87 
88 static int		ip6_pkt_discard(struct sk_buff *skb);
89 static void		ip6_link_failure(struct sk_buff *skb);
90 
91 struct dst_ops ip6_dst_ops = {
92 	AF_INET6,
93 	__constant_htons(ETH_P_IPV6),
94 	1024,
95 
96         ip6_dst_gc,
97 	ip6_dst_check,
98 	ip6_dst_reroute,
99 	NULL,
100 	ip6_negative_advice,
101 	ip6_link_failure,
102 	sizeof(struct rt6_info),
103 };
104 
105 struct rt6_info ip6_null_entry = {
106 	{{NULL, ATOMIC_INIT(1), 1, &loopback_dev,
107 	  -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
108 	  -ENETUNREACH, NULL, NULL,
109 	  ip6_pkt_discard, ip6_pkt_discard,
110 #ifdef CONFIG_NET_CLS_ROUTE
111 	  0,
112 #endif
113 	  &ip6_dst_ops}},
114 	NULL, {{{0}}}, RTF_REJECT|RTF_NONEXTHOP, ~0U,
115 	255, ATOMIC_INIT(1), {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
116 };
117 
118 struct fib6_node ip6_routing_table = {
119 	NULL, NULL, NULL, NULL,
120 	&ip6_null_entry,
121 	0, RTN_ROOT|RTN_TL_ROOT|RTN_RTINFO, 0
122 };
123 
124 #ifdef CONFIG_RT6_POLICY
125 int	ip6_rt_policy = 0;
126 
127 struct pol_chain *rt6_pol_list = NULL;
128 
129 
130 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb);
131 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk);
132 
133 static struct rt6_info	*rt6_flow_lookup(struct rt6_info *rt,
134 					 struct in6_addr *daddr,
135 					 struct in6_addr *saddr,
136 					 struct fl_acc_args *args);
137 
138 #else
139 #define ip6_rt_policy (0)
140 #endif
141 
142 /* Protects all the ip6 fib */
143 
144 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
145 
146 
147 /*
148  *	Route lookup. Any rt6_lock is implied.
149  */
150 
rt6_device_match(struct rt6_info * rt,int oif,int strict)151 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
152 						    int oif,
153 						    int strict)
154 {
155 	struct rt6_info *local = NULL;
156 	struct rt6_info *sprt;
157 
158 	if (oif) {
159 		for (sprt = rt; sprt; sprt = sprt->u.next) {
160 			struct net_device *dev = sprt->rt6i_dev;
161 			if (dev->ifindex == oif)
162 				return sprt;
163 			if (dev->flags&IFF_LOOPBACK)
164 				local = sprt;
165 		}
166 
167 		if (local)
168 			return local;
169 
170 		if (strict)
171 			return &ip6_null_entry;
172 	}
173 	return rt;
174 }
175 
176 /*
177  *	pointer to the last default router chosen. BH is disabled locally.
178  */
179 static struct rt6_info *rt6_dflt_pointer = NULL;
180 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
181 
182 /* Default Router Selection (RFC 2461 6.3.6) */
rt6_best_dflt(struct rt6_info * rt,int oif)183 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
184 {
185 	struct rt6_info *match = NULL;
186 	struct rt6_info *sprt;
187 	int mpri = 0;
188 
189 	for (sprt = rt; sprt; sprt = sprt->u.next) {
190 		struct neighbour *neigh;
191 		int m = 0;
192 
193 		if (!oif ||
194 		    (sprt->rt6i_dev &&
195 		     sprt->rt6i_dev->ifindex == oif))
196 			m += 8;
197 
198 		if (sprt == rt6_dflt_pointer)
199 			m += 4;
200 
201 		if ((neigh = sprt->rt6i_nexthop) != NULL) {
202 			read_lock_bh(&neigh->lock);
203 			switch (neigh->nud_state) {
204 			case NUD_REACHABLE:
205 				m += 3;
206 				break;
207 
208 			case NUD_STALE:
209 			case NUD_DELAY:
210 			case NUD_PROBE:
211 				m += 2;
212 				break;
213 
214 			case NUD_NOARP:
215 			case NUD_PERMANENT:
216 				m += 1;
217 				break;
218 
219 			case NUD_INCOMPLETE:
220 			default:
221 				read_unlock_bh(&neigh->lock);
222 				continue;
223 			}
224 			read_unlock_bh(&neigh->lock);
225 		} else {
226 			continue;
227 		}
228 
229 		if (m > mpri || m >= 12) {
230 			match = sprt;
231 			mpri = m;
232 			if (m >= 12) {
233 				/* we choose the lastest default router if it
234 				 * is in (probably) reachable state.
235 				 * If route changed, we should do pmtu
236 				 * discovery. --yoshfuji
237 				 */
238 				break;
239 			}
240 		}
241 	}
242 
243 	spin_lock(&rt6_dflt_lock);
244 	if (!match) {
245 		/*
246 		 *	No default routers are known to be reachable.
247 		 *	SHOULD round robin
248 		 */
249 		if (rt6_dflt_pointer) {
250 			for (sprt = rt6_dflt_pointer->u.next;
251 			     sprt; sprt = sprt->u.next) {
252 				if (sprt->u.dst.obsolete <= 0 &&
253 				    sprt->u.dst.error == 0) {
254 					match = sprt;
255 					break;
256 				}
257 			}
258 			for (sprt = rt;
259 			     !match && sprt;
260 			     sprt = sprt->u.next) {
261 				if (sprt->u.dst.obsolete <= 0 &&
262 				    sprt->u.dst.error == 0) {
263 					match = sprt;
264 					break;
265 				}
266 				if (sprt == rt6_dflt_pointer)
267 					break;
268 			}
269 		}
270 	}
271 
272 	if (match)
273 		rt6_dflt_pointer = match;
274 
275 	spin_unlock(&rt6_dflt_lock);
276 
277 	if (!match) {
278 		/*
279 		 * Last Resort: if no default routers found,
280 		 * use addrconf default route.
281 		 * We don't record this route.
282 		 */
283 		for (sprt = ip6_routing_table.leaf;
284 		     sprt; sprt = sprt->u.next) {
285 			if ((sprt->rt6i_flags & RTF_DEFAULT) &&
286 			    (!oif ||
287 			     (sprt->rt6i_dev &&
288 			      sprt->rt6i_dev->ifindex == oif))) {
289 				match = sprt;
290 				break;
291 			}
292 		}
293 		if (!match) {
294 			/* no default route.  give up. */
295 			match = &ip6_null_entry;
296 		}
297 	}
298 
299 	return match;
300 }
301 
rt6_lookup(struct in6_addr * daddr,struct in6_addr * saddr,int oif,int strict)302 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
303 			    int oif, int strict)
304 {
305 	struct fib6_node *fn;
306 	struct rt6_info *rt;
307 
308 	read_lock_bh(&rt6_lock);
309 	fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
310 	rt = rt6_device_match(fn->leaf, oif, strict);
311 	dst_hold(&rt->u.dst);
312 	rt->u.dst.__use++;
313 	read_unlock_bh(&rt6_lock);
314 
315 	rt->u.dst.lastuse = jiffies;
316 	if (rt->u.dst.error == 0)
317 		return rt;
318 	dst_release(&rt->u.dst);
319 	return NULL;
320 }
321 
322 /* rt6_ins is called with FREE rt6_lock.
323    It takes new route entry, the addition fails by any reason the
324    route is freed. In any case, if caller does not hold it, it may
325    be destroyed.
326  */
327 
rt6_ins(struct rt6_info * rt,struct nlmsghdr * nlh,struct netlink_skb_parms * req)328 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
329 {
330 	int err;
331 
332 	write_lock_bh(&rt6_lock);
333 	err = fib6_add(&ip6_routing_table, rt, nlh, req);
334 	write_unlock_bh(&rt6_lock);
335 
336 	return err;
337 }
338 
339 /* No rt6_lock! If COW failed, the function returns dead route entry
340    with dst->error set to errno value.
341  */
342 
rt6_cow(struct rt6_info * ort,struct in6_addr * daddr,struct in6_addr * saddr,struct netlink_skb_parms * req)343 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
344 				struct in6_addr *saddr, struct netlink_skb_parms *req)
345 {
346 	int err;
347 	struct rt6_info *rt;
348 
349 	/*
350 	 *	Clone the route.
351 	 */
352 
353 	rt = ip6_rt_copy(ort);
354 
355 	if (rt) {
356 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
357 
358 		if (!(rt->rt6i_flags&RTF_GATEWAY))
359 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
360 
361 		rt->rt6i_dst.plen = 128;
362 		rt->rt6i_flags |= RTF_CACHE;
363 		rt->u.dst.flags |= DST_HOST;
364 
365 #ifdef CONFIG_IPV6_SUBTREES
366 		if (rt->rt6i_src.plen && saddr) {
367 			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
368 			rt->rt6i_src.plen = 128;
369 		}
370 #endif
371 
372 		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
373 
374 		dst_hold(&rt->u.dst);
375 
376 		err = rt6_ins(rt, NULL, req);
377 		if (err == 0)
378 			return rt;
379 
380 		rt->u.dst.error = err;
381 
382 		return rt;
383 	}
384 	dst_hold(&ip6_null_entry.u.dst);
385 	return &ip6_null_entry;
386 }
387 
388 #ifdef CONFIG_RT6_POLICY
rt6_flow_lookup_in(struct rt6_info * rt,struct sk_buff * skb)389 static __inline__ struct rt6_info *rt6_flow_lookup_in(struct rt6_info *rt,
390 						      struct sk_buff *skb)
391 {
392 	struct in6_addr *daddr, *saddr;
393 	struct fl_acc_args arg;
394 
395 	arg.type = FL_ARG_FORWARD;
396 	arg.fl_u.skb = skb;
397 
398 	saddr = &skb->nh.ipv6h->saddr;
399 	daddr = &skb->nh.ipv6h->daddr;
400 
401 	return rt6_flow_lookup(rt, daddr, saddr, &arg);
402 }
403 
rt6_flow_lookup_out(struct rt6_info * rt,struct sock * sk,struct flowi * fl)404 static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt,
405 						       struct sock *sk,
406 						       struct flowi *fl)
407 {
408 	struct fl_acc_args arg;
409 
410 	arg.type = FL_ARG_ORIGIN;
411 	arg.fl_u.fl_o.sk = sk;
412 	arg.fl_u.fl_o.flow = fl;
413 
414 	return rt6_flow_lookup(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr,
415 			       &arg);
416 }
417 
418 #endif
419 
420 #define BACKTRACK() \
421 if (rt == &ip6_null_entry && strict) { \
422        while ((fn = fn->parent) != NULL) { \
423 		if (fn->fn_flags & RTN_ROOT) { \
424 			dst_hold(&rt->u.dst); \
425 			goto out; \
426 		} \
427 		if (fn->fn_flags & RTN_RTINFO) \
428 			goto restart; \
429 	} \
430 }
431 
432 
ip6_route_input(struct sk_buff * skb)433 void ip6_route_input(struct sk_buff *skb)
434 {
435 	struct fib6_node *fn;
436 	struct rt6_info *rt;
437 	int strict;
438 	int attempts = 3;
439 
440 	strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
441 
442 relookup:
443 	read_lock_bh(&rt6_lock);
444 
445 	fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
446 			 &skb->nh.ipv6h->saddr);
447 
448 restart:
449 	rt = fn->leaf;
450 
451 	if ((rt->rt6i_flags & RTF_CACHE)) {
452 		if (ip6_rt_policy == 0) {
453 			rt = rt6_device_match(rt, skb->dev->ifindex, strict);
454 			BACKTRACK();
455 			dst_hold(&rt->u.dst);
456 			goto out;
457 		}
458 
459 #ifdef CONFIG_RT6_POLICY
460 		if ((rt->rt6i_flags & RTF_FLOW)) {
461 			struct rt6_info *sprt;
462 
463 			for (sprt = rt; sprt; sprt = sprt->u.next) {
464 				if (rt6_flow_match_in(sprt, skb)) {
465 					rt = sprt;
466 					dst_hold(&rt->u.dst);
467 					goto out;
468 				}
469 			}
470 		}
471 #endif
472 	}
473 
474 	rt = rt6_device_match(rt, skb->dev->ifindex, 0);
475 	BACKTRACK();
476 
477 	if (ip6_rt_policy == 0) {
478 		if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
479 			read_unlock_bh(&rt6_lock);
480 
481 			rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
482 				     &skb->nh.ipv6h->saddr,
483 				     &NETLINK_CB(skb));
484 
485 			if (rt->u.dst.error != -EEXIST || --attempts <= 0)
486 				goto out2;
487 			/* Race condition! In the gap, when rt6_lock was
488 			   released someone could insert this route.  Relookup.
489 			 */
490 			goto relookup;
491 		}
492 		dst_hold(&rt->u.dst);
493 	} else {
494 #ifdef CONFIG_RT6_POLICY
495 		rt = rt6_flow_lookup_in(rt, skb);
496 #else
497 		/* NEVER REACHED */
498 #endif
499 	}
500 
501 out:
502 	read_unlock_bh(&rt6_lock);
503 out2:
504 	rt->u.dst.lastuse = jiffies;
505 	rt->u.dst.__use++;
506 	skb->dst = (struct dst_entry *) rt;
507 }
508 
ip6_route_output(struct sock * sk,struct flowi * fl)509 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
510 {
511 	struct fib6_node *fn;
512 	struct rt6_info *rt;
513 	int strict;
514 	int attempts = 3;
515 
516 	strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
517 
518 relookup:
519 	read_lock_bh(&rt6_lock);
520 
521 	fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr,
522 			 fl->nl_u.ip6_u.saddr);
523 
524 restart:
525 	rt = fn->leaf;
526 
527 	if ((rt->rt6i_flags & RTF_CACHE)) {
528 		if (ip6_rt_policy == 0) {
529 			rt = rt6_device_match(rt, fl->oif, strict);
530 			BACKTRACK();
531 			dst_hold(&rt->u.dst);
532 			goto out;
533 		}
534 
535 #ifdef CONFIG_RT6_POLICY
536 		if ((rt->rt6i_flags & RTF_FLOW)) {
537 			struct rt6_info *sprt;
538 
539 			for (sprt = rt; sprt; sprt = sprt->u.next) {
540 				if (rt6_flow_match_out(sprt, sk)) {
541 					rt = sprt;
542 					dst_hold(&rt->u.dst);
543 					goto out;
544 				}
545 			}
546 		}
547 #endif
548 	}
549 	if (rt->rt6i_flags & RTF_DEFAULT) {
550 		if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
551 			rt = rt6_best_dflt(rt, fl->oif);
552 	} else {
553 		rt = rt6_device_match(rt, fl->oif, strict);
554 		BACKTRACK();
555 	}
556 
557 	if (ip6_rt_policy == 0) {
558 		if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
559 			read_unlock_bh(&rt6_lock);
560 
561 			rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr,
562 				     fl->nl_u.ip6_u.saddr, NULL);
563 
564 			if (rt->u.dst.error != -EEXIST || --attempts <= 0)
565 				goto out2;
566 
567 			/* Race condition! In the gap, when rt6_lock was
568 			   released someone could insert this route.  Relookup.
569 			 */
570 			goto relookup;
571 		}
572 		dst_hold(&rt->u.dst);
573 	} else {
574 #ifdef CONFIG_RT6_POLICY
575 		rt = rt6_flow_lookup_out(rt, sk, fl);
576 #else
577 		/* NEVER REACHED */
578 #endif
579 	}
580 
581 out:
582 	read_unlock_bh(&rt6_lock);
583 out2:
584 	rt->u.dst.lastuse = jiffies;
585 	rt->u.dst.__use++;
586 	return &rt->u.dst;
587 }
588 
589 
590 /*
591  *	Destination cache support functions
592  */
593 
ip6_dst_check(struct dst_entry * dst,u32 cookie)594 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
595 {
596 	struct rt6_info *rt;
597 
598 	rt = (struct rt6_info *) dst;
599 
600 	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
601 		return dst;
602 
603 	dst_release(dst);
604 	return NULL;
605 }
606 
ip6_dst_reroute(struct dst_entry * dst,struct sk_buff * skb)607 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
608 {
609 	/*
610 	 *	FIXME
611 	 */
612 	RDBG(("ip6_dst_reroute(%p,%p)[%p] (AIEEE)\n", dst, skb,
613 	      __builtin_return_address(0)));
614 	return NULL;
615 }
616 
ip6_negative_advice(struct dst_entry * dst)617 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
618 {
619 	struct rt6_info *rt = (struct rt6_info *) dst;
620 
621 	if (rt) {
622 		if (rt->rt6i_flags & RTF_CACHE)
623 			ip6_del_rt(rt, NULL, NULL);
624 		else
625 			dst_release(dst);
626 	}
627 	return NULL;
628 }
629 
ip6_link_failure(struct sk_buff * skb)630 static void ip6_link_failure(struct sk_buff *skb)
631 {
632 	struct rt6_info *rt;
633 
634 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
635 
636 	rt = (struct rt6_info *) skb->dst;
637 	if (rt) {
638 		if (rt->rt6i_flags&RTF_CACHE) {
639 			dst_set_expires(&rt->u.dst, 0);
640 			rt->rt6i_flags |= RTF_EXPIRES;
641 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
642 			rt->rt6i_node->fn_sernum = -1;
643 	}
644 }
645 
ip6_dst_gc()646 static int ip6_dst_gc()
647 {
648 	static unsigned expire = 30*HZ;
649 	static unsigned long last_gc;
650 	unsigned long now = jiffies;
651 
652 	if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
653 	    atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
654 		goto out;
655 
656 	expire++;
657 	fib6_run_gc(expire);
658 	last_gc = now;
659 	if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
660 		expire = ip6_rt_gc_timeout>>1;
661 
662 out:
663 	expire -= expire>>ip6_rt_gc_elasticity;
664 	return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
665 }
666 
667 /* Clean host part of a prefix. Not necessary in radix tree,
668    but results in cleaner routing tables.
669 
670    Remove it only when all the things will work!
671  */
672 
ipv6_addr_prefix(struct in6_addr * pfx,const struct in6_addr * addr,int plen)673 static void ipv6_addr_prefix(struct in6_addr *pfx,
674 			     const struct in6_addr *addr, int plen)
675 {
676 	int b = plen&0x7;
677 	int o = plen>>3;
678 
679 	memcpy(pfx->s6_addr, addr, o);
680 	if (o < 16)
681 		memset(pfx->s6_addr + o, 0, 16 - o);
682 	if (b != 0)
683 		pfx->s6_addr[o] = addr->s6_addr[o]&(0xff00 >> b);
684 }
685 
ipv6_get_mtu(struct net_device * dev)686 static int ipv6_get_mtu(struct net_device *dev)
687 {
688 	int mtu = IPV6_MIN_MTU;
689 	struct inet6_dev *idev;
690 
691 	idev = in6_dev_get(dev);
692 	if (idev) {
693 		mtu = idev->cnf.mtu6;
694 		in6_dev_put(idev);
695 	}
696 	return mtu;
697 }
698 
ipv6_get_hoplimit(struct net_device * dev)699 static int ipv6_get_hoplimit(struct net_device *dev)
700 {
701 	int hoplimit = ipv6_devconf.hop_limit;
702 	struct inet6_dev *idev;
703 
704 	idev = in6_dev_get(dev);
705 	if (idev) {
706 		hoplimit = idev->cnf.hop_limit;
707 		in6_dev_put(idev);
708 	}
709 	return hoplimit;
710 }
711 
712 /*
713  *
714  */
715 
ip6_route_add(struct in6_rtmsg * rtmsg,struct nlmsghdr * nlh,struct netlink_skb_parms * req)716 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
717 {
718 	int err;
719 	struct rtmsg *r;
720 	struct rt6_info *rt;
721 	struct net_device *dev = NULL;
722 	int addr_type;
723 
724 	if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
725 		return -EINVAL;
726 #ifndef CONFIG_IPV6_SUBTREES
727 	if (rtmsg->rtmsg_src_len)
728 		return -EINVAL;
729 #endif
730 	if (rtmsg->rtmsg_metric == 0)
731 		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
732 
733 	rt = dst_alloc(&ip6_dst_ops);
734 
735 	if (rt == NULL)
736 		return -ENOMEM;
737 
738 	rt->u.dst.obsolete = -1;
739 	rt->rt6i_expires = rtmsg->rtmsg_info;
740 	if (nlh && (r = NLMSG_DATA(nlh))) {
741 		rt->rt6i_protocol = r->rtm_protocol;
742 	} else {
743 		rt->rt6i_protocol = RTPROT_BOOT;
744 	}
745 
746 	addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
747 
748 	if (addr_type & IPV6_ADDR_MULTICAST)
749 		rt->u.dst.input = ip6_mc_input;
750 	else
751 		rt->u.dst.input = ip6_forward;
752 
753 	rt->u.dst.output = ip6_output;
754 
755 	if (rtmsg->rtmsg_ifindex) {
756 		dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
757 		err = -ENODEV;
758 		if (dev == NULL)
759 			goto out;
760 	}
761 
762 	ipv6_addr_prefix(&rt->rt6i_dst.addr,
763 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
764 	rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
765 	if (rt->rt6i_dst.plen == 128)
766 	       rt->u.dst.flags = DST_HOST;
767 
768 #ifdef CONFIG_IPV6_SUBTREES
769 	ipv6_addr_prefix(&rt->rt6i_src.addr,
770 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
771 	rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
772 #endif
773 
774 	rt->rt6i_metric = rtmsg->rtmsg_metric;
775 
776 	/* We cannot add true routes via loopback here,
777 	   they would result in kernel looping; promote them to reject routes
778 	 */
779 	if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
780 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
781 		if (dev)
782 			dev_put(dev);
783 		dev = &loopback_dev;
784 		dev_hold(dev);
785 		rt->u.dst.output = ip6_pkt_discard;
786 		rt->u.dst.input = ip6_pkt_discard;
787 		rt->u.dst.error = -ENETUNREACH;
788 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
789 		goto install_route;
790 	}
791 
792 	if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
793 		struct in6_addr *gw_addr;
794 		int gwa_type;
795 
796 		gw_addr = &rtmsg->rtmsg_gateway;
797 		ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
798 		gwa_type = ipv6_addr_type(gw_addr);
799 
800 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
801 			struct rt6_info *grt;
802 
803 			/* IPv6 strictly inhibits using not link-local
804 			   addresses as nexthop address.
805 			   Otherwise, router will not able to send redirects.
806 			   It is very good, but in some (rare!) curcumstances
807 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
808 			   some exceptions. --ANK
809 			 */
810 			err = -EINVAL;
811 			if (!(gwa_type&IPV6_ADDR_UNICAST))
812 				goto out;
813 
814 			grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
815 
816 			err = -EHOSTUNREACH;
817 			if (grt == NULL)
818 				goto out;
819 			if (dev) {
820 				if (dev != grt->rt6i_dev) {
821 					dst_release(&grt->u.dst);
822 					goto out;
823 				}
824 			} else {
825 				dev = grt->rt6i_dev;
826 				dev_hold(dev);
827 			}
828 			if (!(grt->rt6i_flags&RTF_GATEWAY))
829 				err = 0;
830 			dst_release(&grt->u.dst);
831 
832 			if (err)
833 				goto out;
834 		}
835 		err = -EINVAL;
836 		if (dev == NULL || (dev->flags&IFF_LOOPBACK))
837 			goto out;
838 	}
839 
840 	err = -ENODEV;
841 	if (dev == NULL)
842 		goto out;
843 
844 	if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
845 		rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
846 		if (IS_ERR(rt->rt6i_nexthop)) {
847 			err = PTR_ERR(rt->rt6i_nexthop);
848 			rt->rt6i_nexthop = NULL;
849 			goto out;
850 		}
851 	}
852 
853 	if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
854 		rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS;
855 	else
856 		rt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
857 	rt->rt6i_flags = rtmsg->rtmsg_flags;
858 
859 install_route:
860 	rt->u.dst.pmtu = ipv6_get_mtu(dev);
861 	rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
862 	/* Maximal non-jumbo IPv6 payload is 65535 and corresponding
863 	   MSS is 65535 - tcp_header_size. 65535 is also valid and
864 	   means: "any MSS, rely only on pmtu discovery"
865 	 */
866 	if (rt->u.dst.advmss > 65535-20)
867 		rt->u.dst.advmss = 65535;
868 	rt->u.dst.dev = dev;
869 	return rt6_ins(rt, nlh, req);
870 
871 out:
872 	if (dev)
873 		dev_put(dev);
874 	dst_free((struct dst_entry *) rt);
875 	return err;
876 }
877 
ip6_del_rt(struct rt6_info * rt,struct nlmsghdr * nlh,struct netlink_skb_parms * req)878 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
879 {
880 	int err;
881 
882 	write_lock_bh(&rt6_lock);
883 
884 	spin_lock_bh(&rt6_dflt_lock);
885 	rt6_dflt_pointer = NULL;
886 	spin_unlock_bh(&rt6_dflt_lock);
887 
888 	dst_release(&rt->u.dst);
889 
890 	err = fib6_del(rt, nlh, req);
891 	write_unlock_bh(&rt6_lock);
892 
893 	return err;
894 }
895 
ip6_route_del(struct in6_rtmsg * rtmsg,struct nlmsghdr * nlh,struct netlink_skb_parms * req)896 int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
897 {
898 	struct fib6_node *fn;
899 	struct rt6_info *rt;
900 	int err = -ESRCH;
901 
902 	read_lock_bh(&rt6_lock);
903 
904 	fn = fib6_locate(&ip6_routing_table,
905 			 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
906 			 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
907 
908 	if (fn) {
909 		for (rt = fn->leaf; rt; rt = rt->u.next) {
910 			if (rtmsg->rtmsg_ifindex &&
911 			    (rt->rt6i_dev == NULL ||
912 			     rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
913 				continue;
914 			if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
915 			    ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
916 				continue;
917 			if (rtmsg->rtmsg_metric &&
918 			    rtmsg->rtmsg_metric != rt->rt6i_metric)
919 				continue;
920 			dst_hold(&rt->u.dst);
921 			read_unlock_bh(&rt6_lock);
922 
923 			return ip6_del_rt(rt, nlh, req);
924 		}
925 	}
926 	read_unlock_bh(&rt6_lock);
927 
928 	return err;
929 }
930 
931 /*
932  *	Handle redirects
933  */
rt6_redirect(struct in6_addr * dest,struct in6_addr * saddr,struct neighbour * neigh,int on_link)934 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
935 		  struct neighbour *neigh, int on_link)
936 {
937 	struct rt6_info *rt, *nrt;
938 
939 	/* Locate old route to this destination. */
940 	rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
941 
942 	if (rt == NULL)
943 		return;
944 
945 	if (neigh->dev != rt->rt6i_dev)
946 		goto out;
947 
948 	/* Redirect received -> path was valid.
949 	   Look, redirects are sent only in response to data packets,
950 	   so that this nexthop apparently is reachable. --ANK
951 	 */
952 	dst_confirm(&rt->u.dst);
953 
954 	/* Duplicate redirect: silently ignore. */
955 	if (neigh == rt->u.dst.neighbour)
956 		goto out;
957 
958 	/* Current route is on-link; redirect is always invalid.
959 
960 	   Seems, previous statement is not true. It could
961 	   be node, which looks for us as on-link (f.e. proxy ndisc)
962 	   But then router serving it might decide, that we should
963 	   know truth 8)8) --ANK (980726).
964 	 */
965 	if (!(rt->rt6i_flags&RTF_GATEWAY))
966 		goto out;
967 
968 	/*
969 	 *	RFC 1970 specifies that redirects should only be
970 	 *	accepted if they come from the nexthop to the target.
971 	 *	Due to the way default routers are chosen, this notion
972 	 *	is a bit fuzzy and one might need to check all default
973 	 *	routers.
974 	 */
975 
976 	if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
977 		if (rt->rt6i_flags & RTF_DEFAULT) {
978 			struct rt6_info *rt1;
979 
980 			read_lock(&rt6_lock);
981 			for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
982 				if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
983 					dst_hold(&rt1->u.dst);
984 					dst_release(&rt->u.dst);
985 					read_unlock(&rt6_lock);
986 					rt = rt1;
987 					goto source_ok;
988 				}
989 			}
990 			read_unlock(&rt6_lock);
991 		}
992 		if (net_ratelimit())
993 			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
994 			       "for redirect target\n");
995 		goto out;
996 	}
997 
998 source_ok:
999 
1000 	/*
1001 	 *	We have finally decided to accept it.
1002 	 */
1003 
1004 	nrt = ip6_rt_copy(rt);
1005 	if (nrt == NULL)
1006 		goto out;
1007 
1008 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1009 	if (on_link)
1010 		nrt->rt6i_flags &= ~RTF_GATEWAY;
1011 
1012 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1013 	nrt->rt6i_dst.plen = 128;
1014 	nrt->u.dst.flags |= DST_HOST;
1015 
1016 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1017 	nrt->rt6i_nexthop = neigh_clone(neigh);
1018 	/* Reset pmtu, it may be better */
1019 	nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev);
1020 	nrt->u.dst.advmss = max_t(unsigned int, nrt->u.dst.pmtu - 60, ip6_rt_min_advmss);
1021 	if (rt->u.dst.advmss > 65535-20)
1022 		rt->u.dst.advmss = 65535;
1023 	nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev);
1024 
1025 	if (rt6_ins(nrt, NULL, NULL))
1026 		goto out;
1027 
1028 	if (rt->rt6i_flags&RTF_CACHE) {
1029 		ip6_del_rt(rt, NULL, NULL);
1030 		return;
1031 	}
1032 
1033 out:
1034         dst_release(&rt->u.dst);
1035 	return;
1036 }
1037 
1038 /*
1039  *	Handle ICMP "packet too big" messages
1040  *	i.e. Path MTU discovery
1041  */
1042 
rt6_pmtu_discovery(struct in6_addr * daddr,struct in6_addr * saddr,struct net_device * dev,u32 pmtu)1043 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1044 			struct net_device *dev, u32 pmtu)
1045 {
1046 	struct rt6_info *rt, *nrt;
1047 
1048 	if (pmtu < IPV6_MIN_MTU) {
1049 		if (net_ratelimit())
1050 			printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1051 			       pmtu);
1052 		/* According to RFC1981, the PMTU is set to the IPv6 minimum
1053 		   link MTU if the node receives a Packet Too Big message
1054 		   reporting next-hop MTU that is less than the IPv6 minimum MTU.
1055 		 */
1056 		pmtu = IPV6_MIN_MTU;
1057 	}
1058 
1059 	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1060 
1061 	if (rt == NULL)
1062 		return;
1063 
1064 	if (pmtu >= rt->u.dst.pmtu)
1065 		goto out;
1066 
1067 	/* New mtu received -> path was valid.
1068 	   They are sent only in response to data packets,
1069 	   so that this nexthop apparently is reachable. --ANK
1070 	 */
1071 	dst_confirm(&rt->u.dst);
1072 
1073 	/* Host route. If it is static, it would be better
1074 	   not to override it, but add new one, so that
1075 	   when cache entry will expire old pmtu
1076 	   would return automatically.
1077 	 */
1078 	if (rt->rt6i_flags & RTF_CACHE) {
1079 		rt->u.dst.pmtu = pmtu;
1080 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1081 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1082 		goto out;
1083 	}
1084 
1085 	/* Network route.
1086 	   Two cases are possible:
1087 	   1. It is connected route. Action: COW
1088 	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1089 	 */
1090 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1091 		nrt = rt6_cow(rt, daddr, saddr, NULL);
1092 		if (!nrt->u.dst.error) {
1093 			nrt->u.dst.pmtu = pmtu;
1094 			/* According to RFC 1981, detecting PMTU increase shouldn't be
1095 			   happened within 5 mins, the recommended timer is 10 mins.
1096 			   Here this route expiration time is set to ip6_rt_mtu_expires
1097 			   which is 10 mins. After 10 mins the decreased pmtu is expired
1098 			   and detecting PMTU increase will be automatically happened.
1099 			 */
1100 			dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1101 			nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1102 			dst_release(&nrt->u.dst);
1103 		}
1104 	} else {
1105 		nrt = ip6_rt_copy(rt);
1106 		if (nrt == NULL)
1107 			goto out;
1108 		ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1109 		nrt->rt6i_dst.plen = 128;
1110 		nrt->u.dst.flags |= DST_HOST;
1111 		nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1112 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1113 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1114 		nrt->u.dst.pmtu = pmtu;
1115 		rt6_ins(nrt, NULL, NULL);
1116 	}
1117 
1118 out:
1119 	dst_release(&rt->u.dst);
1120 }
1121 
1122 /*
1123  *	Misc support functions
1124  */
1125 
ip6_rt_copy(struct rt6_info * ort)1126 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1127 {
1128 	struct rt6_info *rt;
1129 
1130 	rt = dst_alloc(&ip6_dst_ops);
1131 
1132 	if (rt) {
1133 		rt->u.dst.input = ort->u.dst.input;
1134 		rt->u.dst.output = ort->u.dst.output;
1135 
1136 		memcpy(&rt->u.dst.mxlock, &ort->u.dst.mxlock, RTAX_MAX*sizeof(unsigned));
1137 		rt->u.dst.dev = ort->u.dst.dev;
1138 		if (rt->u.dst.dev)
1139 			dev_hold(rt->u.dst.dev);
1140 		rt->u.dst.lastuse = jiffies;
1141 		rt->rt6i_hoplimit = ort->rt6i_hoplimit;
1142 		rt->rt6i_expires = 0;
1143 
1144 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1145 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1146 		rt->rt6i_metric = 0;
1147 
1148 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1149 #ifdef CONFIG_IPV6_SUBTREES
1150 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1151 #endif
1152 	}
1153 	return rt;
1154 }
1155 
rt6_get_dflt_router(struct in6_addr * addr,struct net_device * dev)1156 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1157 {
1158 	struct rt6_info *rt;
1159 	struct fib6_node *fn;
1160 
1161 	fn = &ip6_routing_table;
1162 
1163 	write_lock_bh(&rt6_lock);
1164 	for (rt = fn->leaf; rt; rt=rt->u.next) {
1165 		if (dev == rt->rt6i_dev &&
1166 		    ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1167 			break;
1168 	}
1169 	if (rt)
1170 		dst_hold(&rt->u.dst);
1171 	write_unlock_bh(&rt6_lock);
1172 	return rt;
1173 }
1174 
rt6_add_dflt_router(struct in6_addr * gwaddr,struct net_device * dev)1175 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1176 				     struct net_device *dev)
1177 {
1178 	struct in6_rtmsg rtmsg;
1179 
1180 	memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1181 	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1182 	ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1183 	rtmsg.rtmsg_metric = 1024;
1184 	rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1185 
1186 	rtmsg.rtmsg_ifindex = dev->ifindex;
1187 
1188 	ip6_route_add(&rtmsg, NULL, NULL);
1189 	return rt6_get_dflt_router(gwaddr, dev);
1190 }
1191 
rt6_purge_dflt_routers(int last_resort)1192 void rt6_purge_dflt_routers(int last_resort)
1193 {
1194 	struct rt6_info *rt;
1195 	u32 flags;
1196 
1197 	if (last_resort)
1198 		flags = RTF_ALLONLINK;
1199 	else
1200 		flags = RTF_DEFAULT | RTF_ADDRCONF;
1201 
1202 restart:
1203 	read_lock_bh(&rt6_lock);
1204 	for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1205 		if (rt->rt6i_flags & flags) {
1206 			dst_hold(&rt->u.dst);
1207 
1208 			spin_lock_bh(&rt6_dflt_lock);
1209 			rt6_dflt_pointer = NULL;
1210 			spin_unlock_bh(&rt6_dflt_lock);
1211 
1212 			read_unlock_bh(&rt6_lock);
1213 
1214 			ip6_del_rt(rt, NULL, NULL);
1215 
1216 			goto restart;
1217 		}
1218 	}
1219 	read_unlock_bh(&rt6_lock);
1220 }
1221 
ipv6_route_ioctl(unsigned int cmd,void * arg)1222 int ipv6_route_ioctl(unsigned int cmd, void *arg)
1223 {
1224 	struct in6_rtmsg rtmsg;
1225 	int err;
1226 
1227 	switch(cmd) {
1228 	case SIOCADDRT:		/* Add a route */
1229 	case SIOCDELRT:		/* Delete a route */
1230 		if (!capable(CAP_NET_ADMIN))
1231 			return -EPERM;
1232 		err = copy_from_user(&rtmsg, arg,
1233 				     sizeof(struct in6_rtmsg));
1234 		if (err)
1235 			return -EFAULT;
1236 
1237 		rtnl_lock();
1238 		switch (cmd) {
1239 		case SIOCADDRT:
1240 			err = ip6_route_add(&rtmsg, NULL, NULL);
1241 			break;
1242 		case SIOCDELRT:
1243 			err = ip6_route_del(&rtmsg, NULL, NULL);
1244 			break;
1245 		default:
1246 			err = -EINVAL;
1247 		}
1248 		rtnl_unlock();
1249 
1250 		return err;
1251 	};
1252 
1253 	return -EINVAL;
1254 }
1255 
1256 /*
1257  *	Drop the packet on the floor
1258  */
1259 
ip6_pkt_discard(struct sk_buff * skb)1260 int ip6_pkt_discard(struct sk_buff *skb)
1261 {
1262 	IP6_INC_STATS(Ip6OutNoRoutes);
1263 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1264 	kfree_skb(skb);
1265 	return 0;
1266 }
1267 
1268 /*
1269  *	Add address
1270  */
1271 
ip6_rt_addr_add(struct in6_addr * addr,struct net_device * dev)1272 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev)
1273 {
1274 	struct rt6_info *rt;
1275 
1276 	rt = dst_alloc(&ip6_dst_ops);
1277 	if (rt == NULL)
1278 		return -ENOMEM;
1279 
1280 	rt->u.dst.flags = DST_HOST;
1281 	rt->u.dst.input = ip6_input;
1282 	rt->u.dst.output = ip6_output;
1283 	rt->rt6i_dev = dev_get_by_name("lo");
1284 	rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev);
1285 	rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
1286 	if (rt->u.dst.advmss > 65535-20)
1287 		rt->u.dst.advmss = 65535;
1288 	rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev);
1289 	rt->u.dst.obsolete = -1;
1290 
1291 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1292 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1293 	if (rt->rt6i_nexthop == NULL) {
1294 		dst_free((struct dst_entry *) rt);
1295 		return -ENOMEM;
1296 	}
1297 
1298 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1299 	rt->rt6i_dst.plen = 128;
1300 	rt6_ins(rt, NULL, NULL);
1301 
1302 	return 0;
1303 }
1304 
1305 /* Delete address. Warning: you should check that this address
1306    disappeared before calling this function.
1307  */
1308 
ip6_rt_addr_del(struct in6_addr * addr,struct net_device * dev)1309 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1310 {
1311 	struct rt6_info *rt;
1312 	int err = -ENOENT;
1313 
1314 	rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1315 	if (rt) {
1316 		if (rt->rt6i_dst.plen == 128)
1317 			err = ip6_del_rt(rt, NULL, NULL);
1318 		else
1319 			dst_release(&rt->u.dst);
1320 	}
1321 
1322 	return err;
1323 }
1324 
1325 #ifdef CONFIG_RT6_POLICY
1326 
rt6_flow_match_in(struct rt6_info * rt,struct sk_buff * skb)1327 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb)
1328 {
1329 	struct flow_filter *frule;
1330 	struct pkt_filter *filter;
1331 	int res = 1;
1332 
1333 	if ((frule = rt->rt6i_filter) == NULL)
1334 		goto out;
1335 
1336 	if (frule->type != FLR_INPUT) {
1337 		res = 0;
1338 		goto out;
1339 	}
1340 
1341 	for (filter = frule->u.filter; filter; filter = filter->next) {
1342 		__u32 *word;
1343 
1344 		word = (__u32 *) skb->h.raw;
1345 		word += filter->offset;
1346 
1347 		if ((*word ^ filter->value) & filter->mask) {
1348 			res = 0;
1349 			break;
1350 		}
1351 	}
1352 
1353 out:
1354 	return res;
1355 }
1356 
rt6_flow_match_out(struct rt6_info * rt,struct sock * sk)1357 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk)
1358 {
1359 	struct flow_filter *frule;
1360 	int res = 1;
1361 
1362 	if ((frule = rt->rt6i_filter) == NULL)
1363 		goto out;
1364 
1365 	if (frule->type != FLR_INPUT) {
1366 		res = 0;
1367 		goto out;
1368 	}
1369 
1370 	if (frule->u.sk != sk)
1371 		res = 0;
1372 out:
1373 	return res;
1374 }
1375 
rt6_flow_lookup(struct rt6_info * rt,struct in6_addr * daddr,struct in6_addr * saddr,struct fl_acc_args * args)1376 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
1377 					struct in6_addr *daddr,
1378 					struct in6_addr *saddr,
1379 					struct fl_acc_args *args)
1380 {
1381 	struct flow_rule *frule;
1382 	struct rt6_info *nrt = NULL;
1383 	struct pol_chain *pol;
1384 
1385 	for (pol = rt6_pol_list; pol; pol = pol->next) {
1386 		struct fib6_node *fn;
1387 		struct rt6_info *sprt;
1388 
1389 		fn = fib6_lookup(pol->rules, daddr, saddr);
1390 
1391 		do {
1392 			for (sprt = fn->leaf; sprt; sprt=sprt->u.next) {
1393 				int res;
1394 
1395 				frule = sprt->rt6i_flowr;
1396 #if RT6_DEBUG >= 2
1397 				if (frule == NULL) {
1398 					printk(KERN_DEBUG "NULL flowr\n");
1399 					goto error;
1400 				}
1401 #endif
1402 				res = frule->ops->accept(rt, sprt, args, &nrt);
1403 
1404 				switch (res) {
1405 				case FLOWR_SELECT:
1406 					goto found;
1407 				case FLOWR_CLEAR:
1408 					goto next_policy;
1409 				case FLOWR_NODECISION:
1410 					break;
1411 				default:
1412 					goto error;
1413 				};
1414 			}
1415 
1416 			fn = fn->parent;
1417 
1418 		} while ((fn->fn_flags & RTN_TL_ROOT) == 0);
1419 
1420 	next_policy:
1421 	}
1422 
1423 error:
1424 	dst_hold(&ip6_null_entry.u.dst);
1425 	return &ip6_null_entry;
1426 
1427 found:
1428 	if (nrt == NULL)
1429 		goto error;
1430 
1431 	nrt->rt6i_flags |= RTF_CACHE;
1432 	dst_hold(&nrt->u.dst);
1433 	err = rt6_ins(nrt, NULL, NULL);
1434 	if (err)
1435 		nrt->u.dst.error = err;
1436 	return nrt;
1437 }
1438 #endif
1439 
fib6_ifdown(struct rt6_info * rt,void * arg)1440 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1441 {
1442 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1443 	    rt != &ip6_null_entry) {
1444 		RT6_TRACE("deleted by ifdown %p\n", rt);
1445 		return -1;
1446 	}
1447 	return 0;
1448 }
1449 
rt6_ifdown(struct net_device * dev)1450 void rt6_ifdown(struct net_device *dev)
1451 {
1452 	write_lock_bh(&rt6_lock);
1453 	fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1454 	write_unlock_bh(&rt6_lock);
1455 }
1456 
1457 struct rt6_mtu_change_arg
1458 {
1459 	struct net_device *dev;
1460 	unsigned mtu;
1461 };
1462 
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)1463 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1464 {
1465 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1466 	struct inet6_dev *idev;
1467 	/* In IPv6 pmtu discovery is not optional,
1468 	   so that RTAX_MTU lock cannot disable it.
1469 	   We still use this lock to block changes
1470 	   caused by addrconf/ndisc.
1471 	*/
1472 	idev = __in6_dev_get(arg->dev);
1473 	if (idev == NULL)
1474 		return 0;
1475 
1476 	/* For administrative MTU increase, there is no way to discover
1477 	   IPv6 PMTU increase, so PMTU increase should be updated here.
1478 	   Since RFC 1981 doesn't include administrative MTU increase
1479 	   update PMTU increase is a MUST. (i.e. jumbo frame)
1480 	 */
1481 	/*
1482 	   If new MTU is less than route PMTU, this new MTU will be the
1483 	   lowest MTU in the path, update the route PMTU to refect PMTU
1484 	   decreases; if new MTU is greater than route PMTU, and the
1485 	   old MTU is the lowest MTU in the path, update the route PMTU
1486 	   to refect the increase. In this case if the other nodes' MTU
1487 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
1488 	   PMTU discouvery.
1489 	 */
1490 	if (rt->rt6i_dev == arg->dev &&
1491 	    !(rt->u.dst.mxlock&(1<<RTAX_MTU)) &&
1492 	      (rt->u.dst.pmtu > arg->mtu ||
1493 	       (rt->u.dst.pmtu < arg->mtu &&
1494 		rt->u.dst.pmtu == idev->cnf.mtu6)))
1495 		rt->u.dst.pmtu = arg->mtu;
1496 	rt->u.dst.advmss = max_t(unsigned int, arg->mtu - 60, ip6_rt_min_advmss);
1497 	if (rt->u.dst.advmss > 65535-20)
1498 		rt->u.dst.advmss = 65535;
1499 	return 0;
1500 }
1501 
rt6_mtu_change(struct net_device * dev,unsigned mtu)1502 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1503 {
1504 	struct rt6_mtu_change_arg arg;
1505 
1506 	arg.dev = dev;
1507 	arg.mtu = mtu;
1508 	read_lock_bh(&rt6_lock);
1509 	fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1510 	read_unlock_bh(&rt6_lock);
1511 }
1512 
inet6_rtm_to_rtmsg(struct rtmsg * r,struct rtattr ** rta,struct in6_rtmsg * rtmsg)1513 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1514 			      struct in6_rtmsg *rtmsg)
1515 {
1516 	memset(rtmsg, 0, sizeof(*rtmsg));
1517 
1518 	rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1519 	rtmsg->rtmsg_src_len = r->rtm_src_len;
1520 	rtmsg->rtmsg_flags = RTF_UP;
1521 	if (r->rtm_type == RTN_UNREACHABLE)
1522 		rtmsg->rtmsg_flags |= RTF_REJECT;
1523 
1524 	if (rta[RTA_GATEWAY-1]) {
1525 		if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1526 			return -EINVAL;
1527 		memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1528 		rtmsg->rtmsg_flags |= RTF_GATEWAY;
1529 	}
1530 	if (rta[RTA_DST-1]) {
1531 		if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1532 			return -EINVAL;
1533 		memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1534 	}
1535 	if (rta[RTA_SRC-1]) {
1536 		if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1537 			return -EINVAL;
1538 		memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1539 	}
1540 	if (rta[RTA_OIF-1]) {
1541 		if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1542 			return -EINVAL;
1543 		memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1544 	}
1545 	if (rta[RTA_PRIORITY-1]) {
1546 		if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1547 			return -EINVAL;
1548 		memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1549 	}
1550 	return 0;
1551 }
1552 
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)1553 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1554 {
1555 	struct rtmsg *r = NLMSG_DATA(nlh);
1556 	struct in6_rtmsg rtmsg;
1557 
1558 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1559 		return -EINVAL;
1560 	return ip6_route_del(&rtmsg, nlh, &NETLINK_CB(skb));
1561 }
1562 
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)1563 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1564 {
1565 	struct rtmsg *r = NLMSG_DATA(nlh);
1566 	struct in6_rtmsg rtmsg;
1567 
1568 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1569 		return -EINVAL;
1570 	return ip6_route_add(&rtmsg, nlh, &NETLINK_CB(skb));
1571 }
1572 
1573 struct rt6_rtnl_dump_arg
1574 {
1575 	struct sk_buff *skb;
1576 	struct netlink_callback *cb;
1577 };
1578 
rt6_fill_node(struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 pid,u32 seq,int prefix)1579 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1580 			 struct in6_addr *dst, struct in6_addr *src,
1581 			 int iif, int type, u32 pid, u32 seq, int prefix)
1582 {
1583 	struct rtmsg *rtm;
1584 	struct nlmsghdr  *nlh;
1585 	unsigned char	 *b = skb->tail;
1586 	struct rta_cacheinfo ci;
1587 
1588 	if (prefix) {	/* user wants prefix routes only */
1589 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1590 			/* success since this is not a prefix route */
1591 			return 1;
1592 		}
1593 	}
1594 
1595 	nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1596 	rtm = NLMSG_DATA(nlh);
1597 	rtm->rtm_family = AF_INET6;
1598 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
1599 	rtm->rtm_src_len = rt->rt6i_src.plen;
1600 	rtm->rtm_tos = 0;
1601 	rtm->rtm_table = RT_TABLE_MAIN;
1602 	if (rt->rt6i_flags&RTF_REJECT)
1603 		rtm->rtm_type = RTN_UNREACHABLE;
1604 	else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1605 		rtm->rtm_type = RTN_LOCAL;
1606 	else
1607 		rtm->rtm_type = RTN_UNICAST;
1608 	rtm->rtm_flags = 0;
1609 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1610 	rtm->rtm_protocol = rt->rt6i_protocol;
1611 	if (rt->rt6i_flags&RTF_DYNAMIC)
1612 		rtm->rtm_protocol = RTPROT_REDIRECT;
1613 	else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1614 		rtm->rtm_protocol = RTPROT_KERNEL;
1615 	else if (rt->rt6i_flags&RTF_DEFAULT)
1616 		rtm->rtm_protocol = RTPROT_RA;
1617 
1618 	if (rt->rt6i_flags&RTF_CACHE)
1619 		rtm->rtm_flags |= RTM_F_CLONED;
1620 
1621 	if (dst) {
1622 		RTA_PUT(skb, RTA_DST, 16, dst);
1623 	        rtm->rtm_dst_len = 128;
1624 	} else if (rtm->rtm_dst_len)
1625 		RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1626 #ifdef CONFIG_IPV6_SUBTREES
1627 	if (src) {
1628 		RTA_PUT(skb, RTA_SRC, 16, src);
1629 	        rtm->rtm_src_len = 128;
1630 	} else if (rtm->rtm_src_len)
1631 		RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1632 #endif
1633 	if (iif)
1634 		RTA_PUT(skb, RTA_IIF, 4, &iif);
1635 	else if (dst) {
1636 		struct in6_addr saddr_buf;
1637 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1638 			RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1639 	}
1640 	if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0)
1641 		goto rtattr_failure;
1642 	if (rt->u.dst.neighbour)
1643 		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1644 	if (rt->u.dst.dev)
1645 		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1646 	RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1647 	ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
1648 	if (rt->rt6i_expires)
1649 		ci.rta_expires = rt->rt6i_expires - jiffies;
1650 	else
1651 		ci.rta_expires = 0;
1652 	ci.rta_used = rt->u.dst.__use;
1653 	ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1654 	ci.rta_error = rt->u.dst.error;
1655 	ci.rta_id = 0;
1656 	ci.rta_ts = 0;
1657 	ci.rta_tsage = 0;
1658 	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1659 	nlh->nlmsg_len = skb->tail - b;
1660 	return skb->len;
1661 
1662 nlmsg_failure:
1663 rtattr_failure:
1664 	skb_trim(skb, b - skb->data);
1665 	return -1;
1666 }
1667 
rt6_dump_route(struct rt6_info * rt,void * p_arg)1668 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1669 {
1670 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1671 	int prefix;
1672 
1673 	if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1674 		struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1675 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1676 	} else
1677 		prefix = 0;
1678 
1679 	return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1680 		     NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1681 		     prefix);
1682 }
1683 
fib6_dump_node(struct fib6_walker_t * w)1684 static int fib6_dump_node(struct fib6_walker_t *w)
1685 {
1686 	int res;
1687 	struct rt6_info *rt;
1688 
1689 	for (rt = w->leaf; rt; rt = rt->u.next) {
1690 		res = rt6_dump_route(rt, w->args);
1691 		if (res < 0) {
1692 			/* Frame is full, suspend walking */
1693 			w->leaf = rt;
1694 			return 1;
1695 		}
1696 		BUG_TRAP(res!=0);
1697 	}
1698 	w->leaf = NULL;
1699 	return 0;
1700 }
1701 
fib6_dump_end(struct netlink_callback * cb)1702 static void fib6_dump_end(struct netlink_callback *cb)
1703 {
1704 	struct fib6_walker_t *w = (void*)cb->args[0];
1705 
1706 	if (w) {
1707 		cb->args[0] = 0;
1708 		fib6_walker_unlink(w);
1709 		kfree(w);
1710 	}
1711 	if (cb->args[1]) {
1712 		cb->done = (void*)cb->args[1];
1713 		cb->args[1] = 0;
1714 	}
1715 }
1716 
fib6_dump_done(struct netlink_callback * cb)1717 static int fib6_dump_done(struct netlink_callback *cb)
1718 {
1719 	fib6_dump_end(cb);
1720 	return cb->done(cb);
1721 }
1722 
inet6_dump_fib(struct sk_buff * skb,struct netlink_callback * cb)1723 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1724 {
1725 	struct rt6_rtnl_dump_arg arg;
1726 	struct fib6_walker_t *w;
1727 	int res;
1728 
1729 	arg.skb = skb;
1730 	arg.cb = cb;
1731 
1732 	w = (void*)cb->args[0];
1733 	if (w == NULL) {
1734 		/* New dump:
1735 		 *
1736 		 * 1. hook callback destructor.
1737 		 */
1738 		cb->args[1] = (long)cb->done;
1739 		cb->done = fib6_dump_done;
1740 
1741 		/*
1742 		 * 2. allocate and initialize walker.
1743 		 */
1744 		w = kmalloc(sizeof(*w), GFP_ATOMIC);
1745 		if (w == NULL)
1746 			return -ENOMEM;
1747 		RT6_TRACE("dump<%p", w);
1748 		memset(w, 0, sizeof(*w));
1749 		w->root = &ip6_routing_table;
1750 		w->func = fib6_dump_node;
1751 		w->args = &arg;
1752 		cb->args[0] = (long)w;
1753 		read_lock_bh(&rt6_lock);
1754 		res = fib6_walk(w);
1755 		read_unlock_bh(&rt6_lock);
1756 	} else {
1757 		w->args = &arg;
1758 		read_lock_bh(&rt6_lock);
1759 		res = fib6_walk_continue(w);
1760 		read_unlock_bh(&rt6_lock);
1761 	}
1762 #if RT6_DEBUG >= 3
1763 	if (res <= 0 && skb->len == 0)
1764 		RT6_TRACE("%p>dump end\n", w);
1765 #endif
1766 	res = res < 0 ? res : skb->len;
1767 	/* res < 0 is an error. (really, impossible)
1768 	   res == 0 means that dump is complete, but skb still can contain data.
1769 	   res > 0 dump is not complete, but frame is full.
1770 	 */
1771 	/* Destroy walker, if dump of this table is complete. */
1772 	if (res <= 0)
1773 		fib6_dump_end(cb);
1774 	return res;
1775 }
1776 
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh,void * arg)1777 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1778 {
1779 	struct rtattr **rta = arg;
1780 	int iif = 0;
1781 	int err = -ENOBUFS;
1782 	struct sk_buff *skb;
1783 	struct flowi fl;
1784 	struct rt6_info *rt;
1785 
1786 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1787 	if (skb == NULL)
1788 		goto out;
1789 
1790 	/* Reserve room for dummy headers, this skb can pass
1791 	   through good chunk of routing engine.
1792 	 */
1793 	skb->mac.raw = skb->data;
1794 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1795 
1796 	fl.proto = 0;
1797 	fl.nl_u.ip6_u.daddr = NULL;
1798 	fl.nl_u.ip6_u.saddr = NULL;
1799 	fl.uli_u.icmpt.type = 0;
1800 	fl.uli_u.icmpt.code = 0;
1801 	if (rta[RTA_SRC-1])
1802 		fl.nl_u.ip6_u.saddr = (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]);
1803 	if (rta[RTA_DST-1])
1804 		fl.nl_u.ip6_u.daddr = (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]);
1805 
1806 	if (rta[RTA_IIF-1])
1807 		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1808 
1809 	if (iif) {
1810 		struct net_device *dev;
1811 		dev = __dev_get_by_index(iif);
1812 		if (!dev) {
1813 			err = -ENODEV;
1814 			goto out_free;
1815 		}
1816 	}
1817 
1818 	fl.oif = 0;
1819 	if (rta[RTA_OIF-1])
1820 		memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1821 
1822 	rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1823 
1824 	skb->dst = &rt->u.dst;
1825 
1826 	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1827 	err = rt6_fill_node(skb, rt,
1828 			    fl.nl_u.ip6_u.daddr,
1829 			    fl.nl_u.ip6_u.saddr,
1830 			    iif,
1831 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1832 			    nlh->nlmsg_seq, 0);
1833 	if (err < 0) {
1834 		err = -EMSGSIZE;
1835 		goto out_free;
1836 	}
1837 
1838 	err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1839 	if (err > 0)
1840 		err = 0;
1841 out:
1842 	return err;
1843 out_free:
1844 	kfree_skb(skb);
1845 	goto out;
1846 }
1847 
inet6_rt_notify(int event,struct rt6_info * rt,struct nlmsghdr * nlh,struct netlink_skb_parms * req)1848 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1849 		      struct netlink_skb_parms *req)
1850 {
1851 	struct sk_buff *skb;
1852 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1853 	u32 pid = current->pid;
1854 	u32 seq = 0;
1855 
1856 	if (req)
1857 		pid = req->pid;
1858 	if (nlh)
1859 		seq = nlh->nlmsg_seq;
1860 
1861 	skb = alloc_skb(size, gfp_any());
1862 	if (!skb) {
1863 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1864 		return;
1865 	}
1866 	if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0) < 0) {
1867 		kfree_skb(skb);
1868 		netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1869 		return;
1870 	}
1871 	NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1872 	netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1873 }
1874 
1875 /*
1876  *	/proc
1877  */
1878 
1879 #ifdef CONFIG_PROC_FS
1880 
1881 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1882 
1883 struct rt6_proc_arg
1884 {
1885 	char *buffer;
1886 	int offset;
1887 	int length;
1888 	int skip;
1889 	int len;
1890 };
1891 
rt6_info_route(struct rt6_info * rt,void * p_arg)1892 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1893 {
1894 	struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1895 	int i;
1896 
1897 	if (arg->skip < arg->offset / RT6_INFO_LEN) {
1898 		arg->skip++;
1899 		return 0;
1900 	}
1901 
1902 	if (arg->len >= arg->length)
1903 		return 0;
1904 
1905 	for (i=0; i<16; i++) {
1906 		sprintf(arg->buffer + arg->len, "%02x",
1907 			rt->rt6i_dst.addr.s6_addr[i]);
1908 		arg->len += 2;
1909 	}
1910 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1911 			    rt->rt6i_dst.plen);
1912 
1913 #ifdef CONFIG_IPV6_SUBTREES
1914 	for (i=0; i<16; i++) {
1915 		sprintf(arg->buffer + arg->len, "%02x",
1916 			rt->rt6i_src.addr.s6_addr[i]);
1917 		arg->len += 2;
1918 	}
1919 	arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1920 			    rt->rt6i_src.plen);
1921 #else
1922 	sprintf(arg->buffer + arg->len,
1923 		"00000000000000000000000000000000 00 ");
1924 	arg->len += 36;
1925 #endif
1926 
1927 	if (rt->rt6i_nexthop) {
1928 		for (i=0; i<16; i++) {
1929 			sprintf(arg->buffer + arg->len, "%02x",
1930 				rt->rt6i_nexthop->primary_key[i]);
1931 			arg->len += 2;
1932 		}
1933 	} else {
1934 		sprintf(arg->buffer + arg->len,
1935 			"00000000000000000000000000000000");
1936 		arg->len += 32;
1937 	}
1938 	arg->len += sprintf(arg->buffer + arg->len,
1939 			    " %08x %08x %08x %08x %8s\n",
1940 			    rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1941 			    rt->u.dst.__use, rt->rt6i_flags,
1942 			    rt->rt6i_dev ? rt->rt6i_dev->name : "");
1943 	return 0;
1944 }
1945 
rt6_proc_info(char * buffer,char ** start,off_t offset,int length)1946 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1947 {
1948 	struct rt6_proc_arg arg;
1949 	arg.buffer = buffer;
1950 	arg.offset = offset;
1951 	arg.length = length;
1952 	arg.skip = 0;
1953 	arg.len = 0;
1954 
1955 	read_lock_bh(&rt6_lock);
1956 	fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1957 	read_unlock_bh(&rt6_lock);
1958 
1959 	*start = buffer;
1960 	if (offset)
1961 		*start += offset % RT6_INFO_LEN;
1962 
1963 	arg.len -= offset % RT6_INFO_LEN;
1964 
1965 	if (arg.len > length)
1966 		arg.len = length;
1967 	if (arg.len < 0)
1968 		arg.len = 0;
1969 
1970 	return arg.len;
1971 }
1972 
1973 extern struct rt6_statistics rt6_stats;
1974 
rt6_proc_stats(char * buffer,char ** start,off_t offset,int length)1975 static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length)
1976 {
1977 	int len;
1978 
1979 	len = sprintf(buffer, "%04x %04x %04x %04x %04x %04x\n",
1980 		      rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1981 		      rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1982 		      rt6_stats.fib_rt_cache,
1983 		      atomic_read(&ip6_dst_ops.entries));
1984 
1985 	len -= offset;
1986 
1987 	if (len > length)
1988 		len = length;
1989 	if(len < 0)
1990 		len = 0;
1991 
1992 	*start = buffer + offset;
1993 
1994 	return len;
1995 }
1996 #endif	/* CONFIG_PROC_FS */
1997 
1998 #ifdef CONFIG_SYSCTL
1999 
2000 static int flush_delay;
2001 
2002 static
ipv6_sysctl_rtcache_flush(ctl_table * ctl,int write,struct file * filp,void * buffer,size_t * lenp)2003 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2004 			      void *buffer, size_t *lenp)
2005 {
2006 	if (write) {
2007 		proc_dointvec(ctl, write, filp, buffer, lenp);
2008 		if (flush_delay < 0)
2009 			flush_delay = 0;
2010 		fib6_run_gc((unsigned long)flush_delay);
2011 		return 0;
2012 	} else
2013 		return -EINVAL;
2014 }
2015 
2016 ctl_table ipv6_route_table[] = {
2017         {NET_IPV6_ROUTE_FLUSH, "flush",
2018          &flush_delay, sizeof(int), 0644, NULL,
2019          &ipv6_sysctl_rtcache_flush},
2020 	{NET_IPV6_ROUTE_GC_THRESH, "gc_thresh",
2021          &ip6_dst_ops.gc_thresh, sizeof(int), 0644, NULL,
2022          &proc_dointvec},
2023 	{NET_IPV6_ROUTE_MAX_SIZE, "max_size",
2024          &ip6_rt_max_size, sizeof(int), 0644, NULL,
2025          &proc_dointvec},
2026 	{NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval",
2027          &ip6_rt_gc_min_interval, sizeof(int), 0644, NULL,
2028          &proc_dointvec_jiffies, &sysctl_jiffies},
2029 	{NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout",
2030          &ip6_rt_gc_timeout, sizeof(int), 0644, NULL,
2031          &proc_dointvec_jiffies, &sysctl_jiffies},
2032 	{NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval",
2033          &ip6_rt_gc_interval, sizeof(int), 0644, NULL,
2034          &proc_dointvec_jiffies, &sysctl_jiffies},
2035 	{NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity",
2036          &ip6_rt_gc_elasticity, sizeof(int), 0644, NULL,
2037          &proc_dointvec_jiffies, &sysctl_jiffies},
2038 	{NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires",
2039          &ip6_rt_mtu_expires, sizeof(int), 0644, NULL,
2040          &proc_dointvec_jiffies, &sysctl_jiffies},
2041 	{NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss",
2042          &ip6_rt_min_advmss, sizeof(int), 0644, NULL,
2043          &proc_dointvec_jiffies, &sysctl_jiffies},
2044 	 {0}
2045 };
2046 
2047 #endif
2048 
2049 
ip6_route_init(void)2050 void __init ip6_route_init(void)
2051 {
2052 	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2053 						     sizeof(struct rt6_info),
2054 						     0, SLAB_HWCACHE_ALIGN,
2055 						     NULL, NULL);
2056 	fib6_init();
2057 #ifdef 	CONFIG_PROC_FS
2058 	proc_net_create("ipv6_route", 0, rt6_proc_info);
2059 	proc_net_create("rt6_stats", 0, rt6_proc_stats);
2060 #endif
2061 }
2062 
2063 #ifdef MODULE
ip6_route_cleanup(void)2064 void ip6_route_cleanup(void)
2065 {
2066 #ifdef CONFIG_PROC_FS
2067 	proc_net_remove("ipv6_route");
2068 	proc_net_remove("rt6_stats");
2069 #endif
2070 
2071 	rt6_ifdown(NULL);
2072 	fib6_gc_cleanup();
2073 }
2074 #endif	/* MODULE */
2075