1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <pedro_m@yahoo.com>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16 /* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/route.h>
34 #include <linux/netdevice.h>
35 #include <linux/in6.h>
36 #include <linux/init.h>
37 #include <linux/netlink.h>
38 #include <linux/if_arp.h>
39
40 #ifdef CONFIG_PROC_FS
41 #include <linux/proc_fs.h>
42 #endif
43
44 #include <net/snmp.h>
45 #include <net/ipv6.h>
46 #include <net/ip6_fib.h>
47 #include <net/ip6_route.h>
48 #include <net/ndisc.h>
49 #include <net/addrconf.h>
50 #include <net/tcp.h>
51 #include <linux/rtnetlink.h>
52
53 #include <asm/uaccess.h>
54
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58
59 #undef CONFIG_RT6_POLICY
60
61 /* Set to 3 to get tracing. */
62 #define RT6_DEBUG 2
63
64 #if RT6_DEBUG >= 3
65 #define RDBG(x) printk x
66 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
67 #else
68 #define RDBG(x)
69 #define RT6_TRACE(x...) do { ; } while (0)
70 #endif
71
72
73 int ip6_rt_max_size = 4096;
74 int ip6_rt_gc_min_interval = HZ / 2;
75 int ip6_rt_gc_timeout = 60*HZ;
76 int ip6_rt_gc_interval = 30*HZ;
77 int ip6_rt_gc_elasticity = 9;
78 int ip6_rt_mtu_expires = 10*60*HZ;
79 int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
80
81 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
82 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
83 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst,
84 struct sk_buff *skb);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static int ip6_dst_gc(void);
87
88 static int ip6_pkt_discard(struct sk_buff *skb);
89 static void ip6_link_failure(struct sk_buff *skb);
90
91 struct dst_ops ip6_dst_ops = {
92 AF_INET6,
93 __constant_htons(ETH_P_IPV6),
94 1024,
95
96 ip6_dst_gc,
97 ip6_dst_check,
98 ip6_dst_reroute,
99 NULL,
100 ip6_negative_advice,
101 ip6_link_failure,
102 sizeof(struct rt6_info),
103 };
104
105 struct rt6_info ip6_null_entry = {
106 {{NULL, ATOMIC_INIT(1), 1, &loopback_dev,
107 -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
108 -ENETUNREACH, NULL, NULL,
109 ip6_pkt_discard, ip6_pkt_discard,
110 #ifdef CONFIG_NET_CLS_ROUTE
111 0,
112 #endif
113 &ip6_dst_ops}},
114 NULL, {{{0}}}, RTF_REJECT|RTF_NONEXTHOP, ~0U,
115 255, ATOMIC_INIT(1), {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
116 };
117
118 struct fib6_node ip6_routing_table = {
119 NULL, NULL, NULL, NULL,
120 &ip6_null_entry,
121 0, RTN_ROOT|RTN_TL_ROOT|RTN_RTINFO, 0
122 };
123
124 #ifdef CONFIG_RT6_POLICY
125 int ip6_rt_policy = 0;
126
127 struct pol_chain *rt6_pol_list = NULL;
128
129
130 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb);
131 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk);
132
133 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
134 struct in6_addr *daddr,
135 struct in6_addr *saddr,
136 struct fl_acc_args *args);
137
138 #else
139 #define ip6_rt_policy (0)
140 #endif
141
142 /* Protects all the ip6 fib */
143
144 rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
145
146
147 /*
148 * Route lookup. Any rt6_lock is implied.
149 */
150
rt6_device_match(struct rt6_info * rt,int oif,int strict)151 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
152 int oif,
153 int strict)
154 {
155 struct rt6_info *local = NULL;
156 struct rt6_info *sprt;
157
158 if (oif) {
159 for (sprt = rt; sprt; sprt = sprt->u.next) {
160 struct net_device *dev = sprt->rt6i_dev;
161 if (dev->ifindex == oif)
162 return sprt;
163 if (dev->flags&IFF_LOOPBACK)
164 local = sprt;
165 }
166
167 if (local)
168 return local;
169
170 if (strict)
171 return &ip6_null_entry;
172 }
173 return rt;
174 }
175
176 /*
177 * pointer to the last default router chosen. BH is disabled locally.
178 */
179 static struct rt6_info *rt6_dflt_pointer = NULL;
180 static spinlock_t rt6_dflt_lock = SPIN_LOCK_UNLOCKED;
181
182 /* Default Router Selection (RFC 2461 6.3.6) */
rt6_best_dflt(struct rt6_info * rt,int oif)183 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
184 {
185 struct rt6_info *match = NULL;
186 struct rt6_info *sprt;
187 int mpri = 0;
188
189 for (sprt = rt; sprt; sprt = sprt->u.next) {
190 struct neighbour *neigh;
191 int m = 0;
192
193 if (!oif ||
194 (sprt->rt6i_dev &&
195 sprt->rt6i_dev->ifindex == oif))
196 m += 8;
197
198 if (sprt == rt6_dflt_pointer)
199 m += 4;
200
201 if ((neigh = sprt->rt6i_nexthop) != NULL) {
202 read_lock_bh(&neigh->lock);
203 switch (neigh->nud_state) {
204 case NUD_REACHABLE:
205 m += 3;
206 break;
207
208 case NUD_STALE:
209 case NUD_DELAY:
210 case NUD_PROBE:
211 m += 2;
212 break;
213
214 case NUD_NOARP:
215 case NUD_PERMANENT:
216 m += 1;
217 break;
218
219 case NUD_INCOMPLETE:
220 default:
221 read_unlock_bh(&neigh->lock);
222 continue;
223 }
224 read_unlock_bh(&neigh->lock);
225 } else {
226 continue;
227 }
228
229 if (m > mpri || m >= 12) {
230 match = sprt;
231 mpri = m;
232 if (m >= 12) {
233 /* we choose the lastest default router if it
234 * is in (probably) reachable state.
235 * If route changed, we should do pmtu
236 * discovery. --yoshfuji
237 */
238 break;
239 }
240 }
241 }
242
243 spin_lock(&rt6_dflt_lock);
244 if (!match) {
245 /*
246 * No default routers are known to be reachable.
247 * SHOULD round robin
248 */
249 if (rt6_dflt_pointer) {
250 for (sprt = rt6_dflt_pointer->u.next;
251 sprt; sprt = sprt->u.next) {
252 if (sprt->u.dst.obsolete <= 0 &&
253 sprt->u.dst.error == 0) {
254 match = sprt;
255 break;
256 }
257 }
258 for (sprt = rt;
259 !match && sprt;
260 sprt = sprt->u.next) {
261 if (sprt->u.dst.obsolete <= 0 &&
262 sprt->u.dst.error == 0) {
263 match = sprt;
264 break;
265 }
266 if (sprt == rt6_dflt_pointer)
267 break;
268 }
269 }
270 }
271
272 if (match)
273 rt6_dflt_pointer = match;
274
275 spin_unlock(&rt6_dflt_lock);
276
277 if (!match) {
278 /*
279 * Last Resort: if no default routers found,
280 * use addrconf default route.
281 * We don't record this route.
282 */
283 for (sprt = ip6_routing_table.leaf;
284 sprt; sprt = sprt->u.next) {
285 if ((sprt->rt6i_flags & RTF_DEFAULT) &&
286 (!oif ||
287 (sprt->rt6i_dev &&
288 sprt->rt6i_dev->ifindex == oif))) {
289 match = sprt;
290 break;
291 }
292 }
293 if (!match) {
294 /* no default route. give up. */
295 match = &ip6_null_entry;
296 }
297 }
298
299 return match;
300 }
301
rt6_lookup(struct in6_addr * daddr,struct in6_addr * saddr,int oif,int strict)302 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
303 int oif, int strict)
304 {
305 struct fib6_node *fn;
306 struct rt6_info *rt;
307
308 read_lock_bh(&rt6_lock);
309 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
310 rt = rt6_device_match(fn->leaf, oif, strict);
311 dst_hold(&rt->u.dst);
312 rt->u.dst.__use++;
313 read_unlock_bh(&rt6_lock);
314
315 rt->u.dst.lastuse = jiffies;
316 if (rt->u.dst.error == 0)
317 return rt;
318 dst_release(&rt->u.dst);
319 return NULL;
320 }
321
322 /* rt6_ins is called with FREE rt6_lock.
323 It takes new route entry, the addition fails by any reason the
324 route is freed. In any case, if caller does not hold it, it may
325 be destroyed.
326 */
327
rt6_ins(struct rt6_info * rt,struct nlmsghdr * nlh,struct netlink_skb_parms * req)328 static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
329 {
330 int err;
331
332 write_lock_bh(&rt6_lock);
333 err = fib6_add(&ip6_routing_table, rt, nlh, req);
334 write_unlock_bh(&rt6_lock);
335
336 return err;
337 }
338
339 /* No rt6_lock! If COW failed, the function returns dead route entry
340 with dst->error set to errno value.
341 */
342
rt6_cow(struct rt6_info * ort,struct in6_addr * daddr,struct in6_addr * saddr,struct netlink_skb_parms * req)343 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
344 struct in6_addr *saddr, struct netlink_skb_parms *req)
345 {
346 int err;
347 struct rt6_info *rt;
348
349 /*
350 * Clone the route.
351 */
352
353 rt = ip6_rt_copy(ort);
354
355 if (rt) {
356 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
357
358 if (!(rt->rt6i_flags&RTF_GATEWAY))
359 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
360
361 rt->rt6i_dst.plen = 128;
362 rt->rt6i_flags |= RTF_CACHE;
363 rt->u.dst.flags |= DST_HOST;
364
365 #ifdef CONFIG_IPV6_SUBTREES
366 if (rt->rt6i_src.plen && saddr) {
367 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
368 rt->rt6i_src.plen = 128;
369 }
370 #endif
371
372 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
373
374 dst_hold(&rt->u.dst);
375
376 err = rt6_ins(rt, NULL, req);
377 if (err == 0)
378 return rt;
379
380 rt->u.dst.error = err;
381
382 return rt;
383 }
384 dst_hold(&ip6_null_entry.u.dst);
385 return &ip6_null_entry;
386 }
387
388 #ifdef CONFIG_RT6_POLICY
rt6_flow_lookup_in(struct rt6_info * rt,struct sk_buff * skb)389 static __inline__ struct rt6_info *rt6_flow_lookup_in(struct rt6_info *rt,
390 struct sk_buff *skb)
391 {
392 struct in6_addr *daddr, *saddr;
393 struct fl_acc_args arg;
394
395 arg.type = FL_ARG_FORWARD;
396 arg.fl_u.skb = skb;
397
398 saddr = &skb->nh.ipv6h->saddr;
399 daddr = &skb->nh.ipv6h->daddr;
400
401 return rt6_flow_lookup(rt, daddr, saddr, &arg);
402 }
403
rt6_flow_lookup_out(struct rt6_info * rt,struct sock * sk,struct flowi * fl)404 static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt,
405 struct sock *sk,
406 struct flowi *fl)
407 {
408 struct fl_acc_args arg;
409
410 arg.type = FL_ARG_ORIGIN;
411 arg.fl_u.fl_o.sk = sk;
412 arg.fl_u.fl_o.flow = fl;
413
414 return rt6_flow_lookup(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr,
415 &arg);
416 }
417
418 #endif
419
420 #define BACKTRACK() \
421 if (rt == &ip6_null_entry && strict) { \
422 while ((fn = fn->parent) != NULL) { \
423 if (fn->fn_flags & RTN_ROOT) { \
424 dst_hold(&rt->u.dst); \
425 goto out; \
426 } \
427 if (fn->fn_flags & RTN_RTINFO) \
428 goto restart; \
429 } \
430 }
431
432
ip6_route_input(struct sk_buff * skb)433 void ip6_route_input(struct sk_buff *skb)
434 {
435 struct fib6_node *fn;
436 struct rt6_info *rt;
437 int strict;
438 int attempts = 3;
439
440 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
441
442 relookup:
443 read_lock_bh(&rt6_lock);
444
445 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
446 &skb->nh.ipv6h->saddr);
447
448 restart:
449 rt = fn->leaf;
450
451 if ((rt->rt6i_flags & RTF_CACHE)) {
452 if (ip6_rt_policy == 0) {
453 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
454 BACKTRACK();
455 dst_hold(&rt->u.dst);
456 goto out;
457 }
458
459 #ifdef CONFIG_RT6_POLICY
460 if ((rt->rt6i_flags & RTF_FLOW)) {
461 struct rt6_info *sprt;
462
463 for (sprt = rt; sprt; sprt = sprt->u.next) {
464 if (rt6_flow_match_in(sprt, skb)) {
465 rt = sprt;
466 dst_hold(&rt->u.dst);
467 goto out;
468 }
469 }
470 }
471 #endif
472 }
473
474 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
475 BACKTRACK();
476
477 if (ip6_rt_policy == 0) {
478 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
479 read_unlock_bh(&rt6_lock);
480
481 rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
482 &skb->nh.ipv6h->saddr,
483 &NETLINK_CB(skb));
484
485 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
486 goto out2;
487 /* Race condition! In the gap, when rt6_lock was
488 released someone could insert this route. Relookup.
489 */
490 goto relookup;
491 }
492 dst_hold(&rt->u.dst);
493 } else {
494 #ifdef CONFIG_RT6_POLICY
495 rt = rt6_flow_lookup_in(rt, skb);
496 #else
497 /* NEVER REACHED */
498 #endif
499 }
500
501 out:
502 read_unlock_bh(&rt6_lock);
503 out2:
504 rt->u.dst.lastuse = jiffies;
505 rt->u.dst.__use++;
506 skb->dst = (struct dst_entry *) rt;
507 }
508
ip6_route_output(struct sock * sk,struct flowi * fl)509 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
510 {
511 struct fib6_node *fn;
512 struct rt6_info *rt;
513 int strict;
514 int attempts = 3;
515
516 strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
517
518 relookup:
519 read_lock_bh(&rt6_lock);
520
521 fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr,
522 fl->nl_u.ip6_u.saddr);
523
524 restart:
525 rt = fn->leaf;
526
527 if ((rt->rt6i_flags & RTF_CACHE)) {
528 if (ip6_rt_policy == 0) {
529 rt = rt6_device_match(rt, fl->oif, strict);
530 BACKTRACK();
531 dst_hold(&rt->u.dst);
532 goto out;
533 }
534
535 #ifdef CONFIG_RT6_POLICY
536 if ((rt->rt6i_flags & RTF_FLOW)) {
537 struct rt6_info *sprt;
538
539 for (sprt = rt; sprt; sprt = sprt->u.next) {
540 if (rt6_flow_match_out(sprt, sk)) {
541 rt = sprt;
542 dst_hold(&rt->u.dst);
543 goto out;
544 }
545 }
546 }
547 #endif
548 }
549 if (rt->rt6i_flags & RTF_DEFAULT) {
550 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
551 rt = rt6_best_dflt(rt, fl->oif);
552 } else {
553 rt = rt6_device_match(rt, fl->oif, strict);
554 BACKTRACK();
555 }
556
557 if (ip6_rt_policy == 0) {
558 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
559 read_unlock_bh(&rt6_lock);
560
561 rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr,
562 fl->nl_u.ip6_u.saddr, NULL);
563
564 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
565 goto out2;
566
567 /* Race condition! In the gap, when rt6_lock was
568 released someone could insert this route. Relookup.
569 */
570 goto relookup;
571 }
572 dst_hold(&rt->u.dst);
573 } else {
574 #ifdef CONFIG_RT6_POLICY
575 rt = rt6_flow_lookup_out(rt, sk, fl);
576 #else
577 /* NEVER REACHED */
578 #endif
579 }
580
581 out:
582 read_unlock_bh(&rt6_lock);
583 out2:
584 rt->u.dst.lastuse = jiffies;
585 rt->u.dst.__use++;
586 return &rt->u.dst;
587 }
588
589
590 /*
591 * Destination cache support functions
592 */
593
ip6_dst_check(struct dst_entry * dst,u32 cookie)594 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
595 {
596 struct rt6_info *rt;
597
598 rt = (struct rt6_info *) dst;
599
600 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
601 return dst;
602
603 dst_release(dst);
604 return NULL;
605 }
606
ip6_dst_reroute(struct dst_entry * dst,struct sk_buff * skb)607 static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
608 {
609 /*
610 * FIXME
611 */
612 RDBG(("ip6_dst_reroute(%p,%p)[%p] (AIEEE)\n", dst, skb,
613 __builtin_return_address(0)));
614 return NULL;
615 }
616
ip6_negative_advice(struct dst_entry * dst)617 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
618 {
619 struct rt6_info *rt = (struct rt6_info *) dst;
620
621 if (rt) {
622 if (rt->rt6i_flags & RTF_CACHE)
623 ip6_del_rt(rt, NULL, NULL);
624 else
625 dst_release(dst);
626 }
627 return NULL;
628 }
629
ip6_link_failure(struct sk_buff * skb)630 static void ip6_link_failure(struct sk_buff *skb)
631 {
632 struct rt6_info *rt;
633
634 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
635
636 rt = (struct rt6_info *) skb->dst;
637 if (rt) {
638 if (rt->rt6i_flags&RTF_CACHE) {
639 dst_set_expires(&rt->u.dst, 0);
640 rt->rt6i_flags |= RTF_EXPIRES;
641 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
642 rt->rt6i_node->fn_sernum = -1;
643 }
644 }
645
ip6_dst_gc()646 static int ip6_dst_gc()
647 {
648 static unsigned expire = 30*HZ;
649 static unsigned long last_gc;
650 unsigned long now = jiffies;
651
652 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
653 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
654 goto out;
655
656 expire++;
657 fib6_run_gc(expire);
658 last_gc = now;
659 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
660 expire = ip6_rt_gc_timeout>>1;
661
662 out:
663 expire -= expire>>ip6_rt_gc_elasticity;
664 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
665 }
666
667 /* Clean host part of a prefix. Not necessary in radix tree,
668 but results in cleaner routing tables.
669
670 Remove it only when all the things will work!
671 */
672
ipv6_addr_prefix(struct in6_addr * pfx,const struct in6_addr * addr,int plen)673 static void ipv6_addr_prefix(struct in6_addr *pfx,
674 const struct in6_addr *addr, int plen)
675 {
676 int b = plen&0x7;
677 int o = plen>>3;
678
679 memcpy(pfx->s6_addr, addr, o);
680 if (o < 16)
681 memset(pfx->s6_addr + o, 0, 16 - o);
682 if (b != 0)
683 pfx->s6_addr[o] = addr->s6_addr[o]&(0xff00 >> b);
684 }
685
ipv6_get_mtu(struct net_device * dev)686 static int ipv6_get_mtu(struct net_device *dev)
687 {
688 int mtu = IPV6_MIN_MTU;
689 struct inet6_dev *idev;
690
691 idev = in6_dev_get(dev);
692 if (idev) {
693 mtu = idev->cnf.mtu6;
694 in6_dev_put(idev);
695 }
696 return mtu;
697 }
698
ipv6_get_hoplimit(struct net_device * dev)699 static int ipv6_get_hoplimit(struct net_device *dev)
700 {
701 int hoplimit = ipv6_devconf.hop_limit;
702 struct inet6_dev *idev;
703
704 idev = in6_dev_get(dev);
705 if (idev) {
706 hoplimit = idev->cnf.hop_limit;
707 in6_dev_put(idev);
708 }
709 return hoplimit;
710 }
711
712 /*
713 *
714 */
715
ip6_route_add(struct in6_rtmsg * rtmsg,struct nlmsghdr * nlh,struct netlink_skb_parms * req)716 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
717 {
718 int err;
719 struct rtmsg *r;
720 struct rt6_info *rt;
721 struct net_device *dev = NULL;
722 int addr_type;
723
724 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
725 return -EINVAL;
726 #ifndef CONFIG_IPV6_SUBTREES
727 if (rtmsg->rtmsg_src_len)
728 return -EINVAL;
729 #endif
730 if (rtmsg->rtmsg_metric == 0)
731 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
732
733 rt = dst_alloc(&ip6_dst_ops);
734
735 if (rt == NULL)
736 return -ENOMEM;
737
738 rt->u.dst.obsolete = -1;
739 rt->rt6i_expires = rtmsg->rtmsg_info;
740 if (nlh && (r = NLMSG_DATA(nlh))) {
741 rt->rt6i_protocol = r->rtm_protocol;
742 } else {
743 rt->rt6i_protocol = RTPROT_BOOT;
744 }
745
746 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
747
748 if (addr_type & IPV6_ADDR_MULTICAST)
749 rt->u.dst.input = ip6_mc_input;
750 else
751 rt->u.dst.input = ip6_forward;
752
753 rt->u.dst.output = ip6_output;
754
755 if (rtmsg->rtmsg_ifindex) {
756 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
757 err = -ENODEV;
758 if (dev == NULL)
759 goto out;
760 }
761
762 ipv6_addr_prefix(&rt->rt6i_dst.addr,
763 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
764 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
765 if (rt->rt6i_dst.plen == 128)
766 rt->u.dst.flags = DST_HOST;
767
768 #ifdef CONFIG_IPV6_SUBTREES
769 ipv6_addr_prefix(&rt->rt6i_src.addr,
770 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
771 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
772 #endif
773
774 rt->rt6i_metric = rtmsg->rtmsg_metric;
775
776 /* We cannot add true routes via loopback here,
777 they would result in kernel looping; promote them to reject routes
778 */
779 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
780 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
781 if (dev)
782 dev_put(dev);
783 dev = &loopback_dev;
784 dev_hold(dev);
785 rt->u.dst.output = ip6_pkt_discard;
786 rt->u.dst.input = ip6_pkt_discard;
787 rt->u.dst.error = -ENETUNREACH;
788 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
789 goto install_route;
790 }
791
792 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
793 struct in6_addr *gw_addr;
794 int gwa_type;
795
796 gw_addr = &rtmsg->rtmsg_gateway;
797 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
798 gwa_type = ipv6_addr_type(gw_addr);
799
800 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
801 struct rt6_info *grt;
802
803 /* IPv6 strictly inhibits using not link-local
804 addresses as nexthop address.
805 Otherwise, router will not able to send redirects.
806 It is very good, but in some (rare!) curcumstances
807 (SIT, PtP, NBMA NOARP links) it is handy to allow
808 some exceptions. --ANK
809 */
810 err = -EINVAL;
811 if (!(gwa_type&IPV6_ADDR_UNICAST))
812 goto out;
813
814 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
815
816 err = -EHOSTUNREACH;
817 if (grt == NULL)
818 goto out;
819 if (dev) {
820 if (dev != grt->rt6i_dev) {
821 dst_release(&grt->u.dst);
822 goto out;
823 }
824 } else {
825 dev = grt->rt6i_dev;
826 dev_hold(dev);
827 }
828 if (!(grt->rt6i_flags&RTF_GATEWAY))
829 err = 0;
830 dst_release(&grt->u.dst);
831
832 if (err)
833 goto out;
834 }
835 err = -EINVAL;
836 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
837 goto out;
838 }
839
840 err = -ENODEV;
841 if (dev == NULL)
842 goto out;
843
844 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
845 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
846 if (IS_ERR(rt->rt6i_nexthop)) {
847 err = PTR_ERR(rt->rt6i_nexthop);
848 rt->rt6i_nexthop = NULL;
849 goto out;
850 }
851 }
852
853 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
854 rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS;
855 else
856 rt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
857 rt->rt6i_flags = rtmsg->rtmsg_flags;
858
859 install_route:
860 rt->u.dst.pmtu = ipv6_get_mtu(dev);
861 rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
862 /* Maximal non-jumbo IPv6 payload is 65535 and corresponding
863 MSS is 65535 - tcp_header_size. 65535 is also valid and
864 means: "any MSS, rely only on pmtu discovery"
865 */
866 if (rt->u.dst.advmss > 65535-20)
867 rt->u.dst.advmss = 65535;
868 rt->u.dst.dev = dev;
869 return rt6_ins(rt, nlh, req);
870
871 out:
872 if (dev)
873 dev_put(dev);
874 dst_free((struct dst_entry *) rt);
875 return err;
876 }
877
ip6_del_rt(struct rt6_info * rt,struct nlmsghdr * nlh,struct netlink_skb_parms * req)878 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
879 {
880 int err;
881
882 write_lock_bh(&rt6_lock);
883
884 spin_lock_bh(&rt6_dflt_lock);
885 rt6_dflt_pointer = NULL;
886 spin_unlock_bh(&rt6_dflt_lock);
887
888 dst_release(&rt->u.dst);
889
890 err = fib6_del(rt, nlh, req);
891 write_unlock_bh(&rt6_lock);
892
893 return err;
894 }
895
ip6_route_del(struct in6_rtmsg * rtmsg,struct nlmsghdr * nlh,struct netlink_skb_parms * req)896 int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, struct netlink_skb_parms *req)
897 {
898 struct fib6_node *fn;
899 struct rt6_info *rt;
900 int err = -ESRCH;
901
902 read_lock_bh(&rt6_lock);
903
904 fn = fib6_locate(&ip6_routing_table,
905 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
906 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
907
908 if (fn) {
909 for (rt = fn->leaf; rt; rt = rt->u.next) {
910 if (rtmsg->rtmsg_ifindex &&
911 (rt->rt6i_dev == NULL ||
912 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
913 continue;
914 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
915 ipv6_addr_cmp(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
916 continue;
917 if (rtmsg->rtmsg_metric &&
918 rtmsg->rtmsg_metric != rt->rt6i_metric)
919 continue;
920 dst_hold(&rt->u.dst);
921 read_unlock_bh(&rt6_lock);
922
923 return ip6_del_rt(rt, nlh, req);
924 }
925 }
926 read_unlock_bh(&rt6_lock);
927
928 return err;
929 }
930
931 /*
932 * Handle redirects
933 */
rt6_redirect(struct in6_addr * dest,struct in6_addr * saddr,struct neighbour * neigh,int on_link)934 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
935 struct neighbour *neigh, int on_link)
936 {
937 struct rt6_info *rt, *nrt;
938
939 /* Locate old route to this destination. */
940 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
941
942 if (rt == NULL)
943 return;
944
945 if (neigh->dev != rt->rt6i_dev)
946 goto out;
947
948 /* Redirect received -> path was valid.
949 Look, redirects are sent only in response to data packets,
950 so that this nexthop apparently is reachable. --ANK
951 */
952 dst_confirm(&rt->u.dst);
953
954 /* Duplicate redirect: silently ignore. */
955 if (neigh == rt->u.dst.neighbour)
956 goto out;
957
958 /* Current route is on-link; redirect is always invalid.
959
960 Seems, previous statement is not true. It could
961 be node, which looks for us as on-link (f.e. proxy ndisc)
962 But then router serving it might decide, that we should
963 know truth 8)8) --ANK (980726).
964 */
965 if (!(rt->rt6i_flags&RTF_GATEWAY))
966 goto out;
967
968 /*
969 * RFC 1970 specifies that redirects should only be
970 * accepted if they come from the nexthop to the target.
971 * Due to the way default routers are chosen, this notion
972 * is a bit fuzzy and one might need to check all default
973 * routers.
974 */
975
976 if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
977 if (rt->rt6i_flags & RTF_DEFAULT) {
978 struct rt6_info *rt1;
979
980 read_lock(&rt6_lock);
981 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
982 if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
983 dst_hold(&rt1->u.dst);
984 dst_release(&rt->u.dst);
985 read_unlock(&rt6_lock);
986 rt = rt1;
987 goto source_ok;
988 }
989 }
990 read_unlock(&rt6_lock);
991 }
992 if (net_ratelimit())
993 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
994 "for redirect target\n");
995 goto out;
996 }
997
998 source_ok:
999
1000 /*
1001 * We have finally decided to accept it.
1002 */
1003
1004 nrt = ip6_rt_copy(rt);
1005 if (nrt == NULL)
1006 goto out;
1007
1008 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1009 if (on_link)
1010 nrt->rt6i_flags &= ~RTF_GATEWAY;
1011
1012 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1013 nrt->rt6i_dst.plen = 128;
1014 nrt->u.dst.flags |= DST_HOST;
1015
1016 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1017 nrt->rt6i_nexthop = neigh_clone(neigh);
1018 /* Reset pmtu, it may be better */
1019 nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev);
1020 nrt->u.dst.advmss = max_t(unsigned int, nrt->u.dst.pmtu - 60, ip6_rt_min_advmss);
1021 if (rt->u.dst.advmss > 65535-20)
1022 rt->u.dst.advmss = 65535;
1023 nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev);
1024
1025 if (rt6_ins(nrt, NULL, NULL))
1026 goto out;
1027
1028 if (rt->rt6i_flags&RTF_CACHE) {
1029 ip6_del_rt(rt, NULL, NULL);
1030 return;
1031 }
1032
1033 out:
1034 dst_release(&rt->u.dst);
1035 return;
1036 }
1037
1038 /*
1039 * Handle ICMP "packet too big" messages
1040 * i.e. Path MTU discovery
1041 */
1042
rt6_pmtu_discovery(struct in6_addr * daddr,struct in6_addr * saddr,struct net_device * dev,u32 pmtu)1043 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1044 struct net_device *dev, u32 pmtu)
1045 {
1046 struct rt6_info *rt, *nrt;
1047
1048 if (pmtu < IPV6_MIN_MTU) {
1049 if (net_ratelimit())
1050 printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
1051 pmtu);
1052 /* According to RFC1981, the PMTU is set to the IPv6 minimum
1053 link MTU if the node receives a Packet Too Big message
1054 reporting next-hop MTU that is less than the IPv6 minimum MTU.
1055 */
1056 pmtu = IPV6_MIN_MTU;
1057 }
1058
1059 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1060
1061 if (rt == NULL)
1062 return;
1063
1064 if (pmtu >= rt->u.dst.pmtu)
1065 goto out;
1066
1067 /* New mtu received -> path was valid.
1068 They are sent only in response to data packets,
1069 so that this nexthop apparently is reachable. --ANK
1070 */
1071 dst_confirm(&rt->u.dst);
1072
1073 /* Host route. If it is static, it would be better
1074 not to override it, but add new one, so that
1075 when cache entry will expire old pmtu
1076 would return automatically.
1077 */
1078 if (rt->rt6i_flags & RTF_CACHE) {
1079 rt->u.dst.pmtu = pmtu;
1080 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1081 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1082 goto out;
1083 }
1084
1085 /* Network route.
1086 Two cases are possible:
1087 1. It is connected route. Action: COW
1088 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1089 */
1090 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1091 nrt = rt6_cow(rt, daddr, saddr, NULL);
1092 if (!nrt->u.dst.error) {
1093 nrt->u.dst.pmtu = pmtu;
1094 /* According to RFC 1981, detecting PMTU increase shouldn't be
1095 happened within 5 mins, the recommended timer is 10 mins.
1096 Here this route expiration time is set to ip6_rt_mtu_expires
1097 which is 10 mins. After 10 mins the decreased pmtu is expired
1098 and detecting PMTU increase will be automatically happened.
1099 */
1100 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1101 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1102 dst_release(&nrt->u.dst);
1103 }
1104 } else {
1105 nrt = ip6_rt_copy(rt);
1106 if (nrt == NULL)
1107 goto out;
1108 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1109 nrt->rt6i_dst.plen = 128;
1110 nrt->u.dst.flags |= DST_HOST;
1111 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1112 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1113 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1114 nrt->u.dst.pmtu = pmtu;
1115 rt6_ins(nrt, NULL, NULL);
1116 }
1117
1118 out:
1119 dst_release(&rt->u.dst);
1120 }
1121
1122 /*
1123 * Misc support functions
1124 */
1125
ip6_rt_copy(struct rt6_info * ort)1126 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1127 {
1128 struct rt6_info *rt;
1129
1130 rt = dst_alloc(&ip6_dst_ops);
1131
1132 if (rt) {
1133 rt->u.dst.input = ort->u.dst.input;
1134 rt->u.dst.output = ort->u.dst.output;
1135
1136 memcpy(&rt->u.dst.mxlock, &ort->u.dst.mxlock, RTAX_MAX*sizeof(unsigned));
1137 rt->u.dst.dev = ort->u.dst.dev;
1138 if (rt->u.dst.dev)
1139 dev_hold(rt->u.dst.dev);
1140 rt->u.dst.lastuse = jiffies;
1141 rt->rt6i_hoplimit = ort->rt6i_hoplimit;
1142 rt->rt6i_expires = 0;
1143
1144 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1145 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1146 rt->rt6i_metric = 0;
1147
1148 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1149 #ifdef CONFIG_IPV6_SUBTREES
1150 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1151 #endif
1152 }
1153 return rt;
1154 }
1155
rt6_get_dflt_router(struct in6_addr * addr,struct net_device * dev)1156 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1157 {
1158 struct rt6_info *rt;
1159 struct fib6_node *fn;
1160
1161 fn = &ip6_routing_table;
1162
1163 write_lock_bh(&rt6_lock);
1164 for (rt = fn->leaf; rt; rt=rt->u.next) {
1165 if (dev == rt->rt6i_dev &&
1166 ipv6_addr_cmp(&rt->rt6i_gateway, addr) == 0)
1167 break;
1168 }
1169 if (rt)
1170 dst_hold(&rt->u.dst);
1171 write_unlock_bh(&rt6_lock);
1172 return rt;
1173 }
1174
rt6_add_dflt_router(struct in6_addr * gwaddr,struct net_device * dev)1175 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1176 struct net_device *dev)
1177 {
1178 struct in6_rtmsg rtmsg;
1179
1180 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1181 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1182 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1183 rtmsg.rtmsg_metric = 1024;
1184 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP;
1185
1186 rtmsg.rtmsg_ifindex = dev->ifindex;
1187
1188 ip6_route_add(&rtmsg, NULL, NULL);
1189 return rt6_get_dflt_router(gwaddr, dev);
1190 }
1191
rt6_purge_dflt_routers(int last_resort)1192 void rt6_purge_dflt_routers(int last_resort)
1193 {
1194 struct rt6_info *rt;
1195 u32 flags;
1196
1197 if (last_resort)
1198 flags = RTF_ALLONLINK;
1199 else
1200 flags = RTF_DEFAULT | RTF_ADDRCONF;
1201
1202 restart:
1203 read_lock_bh(&rt6_lock);
1204 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1205 if (rt->rt6i_flags & flags) {
1206 dst_hold(&rt->u.dst);
1207
1208 spin_lock_bh(&rt6_dflt_lock);
1209 rt6_dflt_pointer = NULL;
1210 spin_unlock_bh(&rt6_dflt_lock);
1211
1212 read_unlock_bh(&rt6_lock);
1213
1214 ip6_del_rt(rt, NULL, NULL);
1215
1216 goto restart;
1217 }
1218 }
1219 read_unlock_bh(&rt6_lock);
1220 }
1221
ipv6_route_ioctl(unsigned int cmd,void * arg)1222 int ipv6_route_ioctl(unsigned int cmd, void *arg)
1223 {
1224 struct in6_rtmsg rtmsg;
1225 int err;
1226
1227 switch(cmd) {
1228 case SIOCADDRT: /* Add a route */
1229 case SIOCDELRT: /* Delete a route */
1230 if (!capable(CAP_NET_ADMIN))
1231 return -EPERM;
1232 err = copy_from_user(&rtmsg, arg,
1233 sizeof(struct in6_rtmsg));
1234 if (err)
1235 return -EFAULT;
1236
1237 rtnl_lock();
1238 switch (cmd) {
1239 case SIOCADDRT:
1240 err = ip6_route_add(&rtmsg, NULL, NULL);
1241 break;
1242 case SIOCDELRT:
1243 err = ip6_route_del(&rtmsg, NULL, NULL);
1244 break;
1245 default:
1246 err = -EINVAL;
1247 }
1248 rtnl_unlock();
1249
1250 return err;
1251 };
1252
1253 return -EINVAL;
1254 }
1255
1256 /*
1257 * Drop the packet on the floor
1258 */
1259
ip6_pkt_discard(struct sk_buff * skb)1260 int ip6_pkt_discard(struct sk_buff *skb)
1261 {
1262 IP6_INC_STATS(Ip6OutNoRoutes);
1263 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1264 kfree_skb(skb);
1265 return 0;
1266 }
1267
1268 /*
1269 * Add address
1270 */
1271
ip6_rt_addr_add(struct in6_addr * addr,struct net_device * dev)1272 int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev)
1273 {
1274 struct rt6_info *rt;
1275
1276 rt = dst_alloc(&ip6_dst_ops);
1277 if (rt == NULL)
1278 return -ENOMEM;
1279
1280 rt->u.dst.flags = DST_HOST;
1281 rt->u.dst.input = ip6_input;
1282 rt->u.dst.output = ip6_output;
1283 rt->rt6i_dev = dev_get_by_name("lo");
1284 rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev);
1285 rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
1286 if (rt->u.dst.advmss > 65535-20)
1287 rt->u.dst.advmss = 65535;
1288 rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev);
1289 rt->u.dst.obsolete = -1;
1290
1291 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1292 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1293 if (rt->rt6i_nexthop == NULL) {
1294 dst_free((struct dst_entry *) rt);
1295 return -ENOMEM;
1296 }
1297
1298 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1299 rt->rt6i_dst.plen = 128;
1300 rt6_ins(rt, NULL, NULL);
1301
1302 return 0;
1303 }
1304
1305 /* Delete address. Warning: you should check that this address
1306 disappeared before calling this function.
1307 */
1308
ip6_rt_addr_del(struct in6_addr * addr,struct net_device * dev)1309 int ip6_rt_addr_del(struct in6_addr *addr, struct net_device *dev)
1310 {
1311 struct rt6_info *rt;
1312 int err = -ENOENT;
1313
1314 rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
1315 if (rt) {
1316 if (rt->rt6i_dst.plen == 128)
1317 err = ip6_del_rt(rt, NULL, NULL);
1318 else
1319 dst_release(&rt->u.dst);
1320 }
1321
1322 return err;
1323 }
1324
1325 #ifdef CONFIG_RT6_POLICY
1326
rt6_flow_match_in(struct rt6_info * rt,struct sk_buff * skb)1327 static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb)
1328 {
1329 struct flow_filter *frule;
1330 struct pkt_filter *filter;
1331 int res = 1;
1332
1333 if ((frule = rt->rt6i_filter) == NULL)
1334 goto out;
1335
1336 if (frule->type != FLR_INPUT) {
1337 res = 0;
1338 goto out;
1339 }
1340
1341 for (filter = frule->u.filter; filter; filter = filter->next) {
1342 __u32 *word;
1343
1344 word = (__u32 *) skb->h.raw;
1345 word += filter->offset;
1346
1347 if ((*word ^ filter->value) & filter->mask) {
1348 res = 0;
1349 break;
1350 }
1351 }
1352
1353 out:
1354 return res;
1355 }
1356
rt6_flow_match_out(struct rt6_info * rt,struct sock * sk)1357 static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk)
1358 {
1359 struct flow_filter *frule;
1360 int res = 1;
1361
1362 if ((frule = rt->rt6i_filter) == NULL)
1363 goto out;
1364
1365 if (frule->type != FLR_INPUT) {
1366 res = 0;
1367 goto out;
1368 }
1369
1370 if (frule->u.sk != sk)
1371 res = 0;
1372 out:
1373 return res;
1374 }
1375
rt6_flow_lookup(struct rt6_info * rt,struct in6_addr * daddr,struct in6_addr * saddr,struct fl_acc_args * args)1376 static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
1377 struct in6_addr *daddr,
1378 struct in6_addr *saddr,
1379 struct fl_acc_args *args)
1380 {
1381 struct flow_rule *frule;
1382 struct rt6_info *nrt = NULL;
1383 struct pol_chain *pol;
1384
1385 for (pol = rt6_pol_list; pol; pol = pol->next) {
1386 struct fib6_node *fn;
1387 struct rt6_info *sprt;
1388
1389 fn = fib6_lookup(pol->rules, daddr, saddr);
1390
1391 do {
1392 for (sprt = fn->leaf; sprt; sprt=sprt->u.next) {
1393 int res;
1394
1395 frule = sprt->rt6i_flowr;
1396 #if RT6_DEBUG >= 2
1397 if (frule == NULL) {
1398 printk(KERN_DEBUG "NULL flowr\n");
1399 goto error;
1400 }
1401 #endif
1402 res = frule->ops->accept(rt, sprt, args, &nrt);
1403
1404 switch (res) {
1405 case FLOWR_SELECT:
1406 goto found;
1407 case FLOWR_CLEAR:
1408 goto next_policy;
1409 case FLOWR_NODECISION:
1410 break;
1411 default:
1412 goto error;
1413 };
1414 }
1415
1416 fn = fn->parent;
1417
1418 } while ((fn->fn_flags & RTN_TL_ROOT) == 0);
1419
1420 next_policy:
1421 }
1422
1423 error:
1424 dst_hold(&ip6_null_entry.u.dst);
1425 return &ip6_null_entry;
1426
1427 found:
1428 if (nrt == NULL)
1429 goto error;
1430
1431 nrt->rt6i_flags |= RTF_CACHE;
1432 dst_hold(&nrt->u.dst);
1433 err = rt6_ins(nrt, NULL, NULL);
1434 if (err)
1435 nrt->u.dst.error = err;
1436 return nrt;
1437 }
1438 #endif
1439
fib6_ifdown(struct rt6_info * rt,void * arg)1440 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1441 {
1442 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1443 rt != &ip6_null_entry) {
1444 RT6_TRACE("deleted by ifdown %p\n", rt);
1445 return -1;
1446 }
1447 return 0;
1448 }
1449
rt6_ifdown(struct net_device * dev)1450 void rt6_ifdown(struct net_device *dev)
1451 {
1452 write_lock_bh(&rt6_lock);
1453 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1454 write_unlock_bh(&rt6_lock);
1455 }
1456
1457 struct rt6_mtu_change_arg
1458 {
1459 struct net_device *dev;
1460 unsigned mtu;
1461 };
1462
rt6_mtu_change_route(struct rt6_info * rt,void * p_arg)1463 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1464 {
1465 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1466 struct inet6_dev *idev;
1467 /* In IPv6 pmtu discovery is not optional,
1468 so that RTAX_MTU lock cannot disable it.
1469 We still use this lock to block changes
1470 caused by addrconf/ndisc.
1471 */
1472 idev = __in6_dev_get(arg->dev);
1473 if (idev == NULL)
1474 return 0;
1475
1476 /* For administrative MTU increase, there is no way to discover
1477 IPv6 PMTU increase, so PMTU increase should be updated here.
1478 Since RFC 1981 doesn't include administrative MTU increase
1479 update PMTU increase is a MUST. (i.e. jumbo frame)
1480 */
1481 /*
1482 If new MTU is less than route PMTU, this new MTU will be the
1483 lowest MTU in the path, update the route PMTU to refect PMTU
1484 decreases; if new MTU is greater than route PMTU, and the
1485 old MTU is the lowest MTU in the path, update the route PMTU
1486 to refect the increase. In this case if the other nodes' MTU
1487 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1488 PMTU discouvery.
1489 */
1490 if (rt->rt6i_dev == arg->dev &&
1491 !(rt->u.dst.mxlock&(1<<RTAX_MTU)) &&
1492 (rt->u.dst.pmtu > arg->mtu ||
1493 (rt->u.dst.pmtu < arg->mtu &&
1494 rt->u.dst.pmtu == idev->cnf.mtu6)))
1495 rt->u.dst.pmtu = arg->mtu;
1496 rt->u.dst.advmss = max_t(unsigned int, arg->mtu - 60, ip6_rt_min_advmss);
1497 if (rt->u.dst.advmss > 65535-20)
1498 rt->u.dst.advmss = 65535;
1499 return 0;
1500 }
1501
rt6_mtu_change(struct net_device * dev,unsigned mtu)1502 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1503 {
1504 struct rt6_mtu_change_arg arg;
1505
1506 arg.dev = dev;
1507 arg.mtu = mtu;
1508 read_lock_bh(&rt6_lock);
1509 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1510 read_unlock_bh(&rt6_lock);
1511 }
1512
inet6_rtm_to_rtmsg(struct rtmsg * r,struct rtattr ** rta,struct in6_rtmsg * rtmsg)1513 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1514 struct in6_rtmsg *rtmsg)
1515 {
1516 memset(rtmsg, 0, sizeof(*rtmsg));
1517
1518 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1519 rtmsg->rtmsg_src_len = r->rtm_src_len;
1520 rtmsg->rtmsg_flags = RTF_UP;
1521 if (r->rtm_type == RTN_UNREACHABLE)
1522 rtmsg->rtmsg_flags |= RTF_REJECT;
1523
1524 if (rta[RTA_GATEWAY-1]) {
1525 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1526 return -EINVAL;
1527 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1528 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1529 }
1530 if (rta[RTA_DST-1]) {
1531 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1532 return -EINVAL;
1533 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1534 }
1535 if (rta[RTA_SRC-1]) {
1536 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1537 return -EINVAL;
1538 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1539 }
1540 if (rta[RTA_OIF-1]) {
1541 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1542 return -EINVAL;
1543 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1544 }
1545 if (rta[RTA_PRIORITY-1]) {
1546 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1547 return -EINVAL;
1548 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1549 }
1550 return 0;
1551 }
1552
inet6_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)1553 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1554 {
1555 struct rtmsg *r = NLMSG_DATA(nlh);
1556 struct in6_rtmsg rtmsg;
1557
1558 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1559 return -EINVAL;
1560 return ip6_route_del(&rtmsg, nlh, &NETLINK_CB(skb));
1561 }
1562
inet6_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)1563 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1564 {
1565 struct rtmsg *r = NLMSG_DATA(nlh);
1566 struct in6_rtmsg rtmsg;
1567
1568 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1569 return -EINVAL;
1570 return ip6_route_add(&rtmsg, nlh, &NETLINK_CB(skb));
1571 }
1572
1573 struct rt6_rtnl_dump_arg
1574 {
1575 struct sk_buff *skb;
1576 struct netlink_callback *cb;
1577 };
1578
rt6_fill_node(struct sk_buff * skb,struct rt6_info * rt,struct in6_addr * dst,struct in6_addr * src,int iif,int type,u32 pid,u32 seq,int prefix)1579 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1580 struct in6_addr *dst, struct in6_addr *src,
1581 int iif, int type, u32 pid, u32 seq, int prefix)
1582 {
1583 struct rtmsg *rtm;
1584 struct nlmsghdr *nlh;
1585 unsigned char *b = skb->tail;
1586 struct rta_cacheinfo ci;
1587
1588 if (prefix) { /* user wants prefix routes only */
1589 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1590 /* success since this is not a prefix route */
1591 return 1;
1592 }
1593 }
1594
1595 nlh = NLMSG_PUT(skb, pid, seq, type, sizeof(*rtm));
1596 rtm = NLMSG_DATA(nlh);
1597 rtm->rtm_family = AF_INET6;
1598 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1599 rtm->rtm_src_len = rt->rt6i_src.plen;
1600 rtm->rtm_tos = 0;
1601 rtm->rtm_table = RT_TABLE_MAIN;
1602 if (rt->rt6i_flags&RTF_REJECT)
1603 rtm->rtm_type = RTN_UNREACHABLE;
1604 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1605 rtm->rtm_type = RTN_LOCAL;
1606 else
1607 rtm->rtm_type = RTN_UNICAST;
1608 rtm->rtm_flags = 0;
1609 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1610 rtm->rtm_protocol = rt->rt6i_protocol;
1611 if (rt->rt6i_flags&RTF_DYNAMIC)
1612 rtm->rtm_protocol = RTPROT_REDIRECT;
1613 else if (rt->rt6i_flags&(RTF_ADDRCONF|RTF_ALLONLINK))
1614 rtm->rtm_protocol = RTPROT_KERNEL;
1615 else if (rt->rt6i_flags&RTF_DEFAULT)
1616 rtm->rtm_protocol = RTPROT_RA;
1617
1618 if (rt->rt6i_flags&RTF_CACHE)
1619 rtm->rtm_flags |= RTM_F_CLONED;
1620
1621 if (dst) {
1622 RTA_PUT(skb, RTA_DST, 16, dst);
1623 rtm->rtm_dst_len = 128;
1624 } else if (rtm->rtm_dst_len)
1625 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1626 #ifdef CONFIG_IPV6_SUBTREES
1627 if (src) {
1628 RTA_PUT(skb, RTA_SRC, 16, src);
1629 rtm->rtm_src_len = 128;
1630 } else if (rtm->rtm_src_len)
1631 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1632 #endif
1633 if (iif)
1634 RTA_PUT(skb, RTA_IIF, 4, &iif);
1635 else if (dst) {
1636 struct in6_addr saddr_buf;
1637 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1638 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1639 }
1640 if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0)
1641 goto rtattr_failure;
1642 if (rt->u.dst.neighbour)
1643 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1644 if (rt->u.dst.dev)
1645 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1646 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1647 ci.rta_lastuse = jiffies - rt->u.dst.lastuse;
1648 if (rt->rt6i_expires)
1649 ci.rta_expires = rt->rt6i_expires - jiffies;
1650 else
1651 ci.rta_expires = 0;
1652 ci.rta_used = rt->u.dst.__use;
1653 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1654 ci.rta_error = rt->u.dst.error;
1655 ci.rta_id = 0;
1656 ci.rta_ts = 0;
1657 ci.rta_tsage = 0;
1658 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1659 nlh->nlmsg_len = skb->tail - b;
1660 return skb->len;
1661
1662 nlmsg_failure:
1663 rtattr_failure:
1664 skb_trim(skb, b - skb->data);
1665 return -1;
1666 }
1667
rt6_dump_route(struct rt6_info * rt,void * p_arg)1668 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1669 {
1670 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1671 int prefix;
1672
1673 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1674 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1675 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1676 } else
1677 prefix = 0;
1678
1679 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1680 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1681 prefix);
1682 }
1683
fib6_dump_node(struct fib6_walker_t * w)1684 static int fib6_dump_node(struct fib6_walker_t *w)
1685 {
1686 int res;
1687 struct rt6_info *rt;
1688
1689 for (rt = w->leaf; rt; rt = rt->u.next) {
1690 res = rt6_dump_route(rt, w->args);
1691 if (res < 0) {
1692 /* Frame is full, suspend walking */
1693 w->leaf = rt;
1694 return 1;
1695 }
1696 BUG_TRAP(res!=0);
1697 }
1698 w->leaf = NULL;
1699 return 0;
1700 }
1701
fib6_dump_end(struct netlink_callback * cb)1702 static void fib6_dump_end(struct netlink_callback *cb)
1703 {
1704 struct fib6_walker_t *w = (void*)cb->args[0];
1705
1706 if (w) {
1707 cb->args[0] = 0;
1708 fib6_walker_unlink(w);
1709 kfree(w);
1710 }
1711 if (cb->args[1]) {
1712 cb->done = (void*)cb->args[1];
1713 cb->args[1] = 0;
1714 }
1715 }
1716
fib6_dump_done(struct netlink_callback * cb)1717 static int fib6_dump_done(struct netlink_callback *cb)
1718 {
1719 fib6_dump_end(cb);
1720 return cb->done(cb);
1721 }
1722
inet6_dump_fib(struct sk_buff * skb,struct netlink_callback * cb)1723 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1724 {
1725 struct rt6_rtnl_dump_arg arg;
1726 struct fib6_walker_t *w;
1727 int res;
1728
1729 arg.skb = skb;
1730 arg.cb = cb;
1731
1732 w = (void*)cb->args[0];
1733 if (w == NULL) {
1734 /* New dump:
1735 *
1736 * 1. hook callback destructor.
1737 */
1738 cb->args[1] = (long)cb->done;
1739 cb->done = fib6_dump_done;
1740
1741 /*
1742 * 2. allocate and initialize walker.
1743 */
1744 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1745 if (w == NULL)
1746 return -ENOMEM;
1747 RT6_TRACE("dump<%p", w);
1748 memset(w, 0, sizeof(*w));
1749 w->root = &ip6_routing_table;
1750 w->func = fib6_dump_node;
1751 w->args = &arg;
1752 cb->args[0] = (long)w;
1753 read_lock_bh(&rt6_lock);
1754 res = fib6_walk(w);
1755 read_unlock_bh(&rt6_lock);
1756 } else {
1757 w->args = &arg;
1758 read_lock_bh(&rt6_lock);
1759 res = fib6_walk_continue(w);
1760 read_unlock_bh(&rt6_lock);
1761 }
1762 #if RT6_DEBUG >= 3
1763 if (res <= 0 && skb->len == 0)
1764 RT6_TRACE("%p>dump end\n", w);
1765 #endif
1766 res = res < 0 ? res : skb->len;
1767 /* res < 0 is an error. (really, impossible)
1768 res == 0 means that dump is complete, but skb still can contain data.
1769 res > 0 dump is not complete, but frame is full.
1770 */
1771 /* Destroy walker, if dump of this table is complete. */
1772 if (res <= 0)
1773 fib6_dump_end(cb);
1774 return res;
1775 }
1776
inet6_rtm_getroute(struct sk_buff * in_skb,struct nlmsghdr * nlh,void * arg)1777 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1778 {
1779 struct rtattr **rta = arg;
1780 int iif = 0;
1781 int err = -ENOBUFS;
1782 struct sk_buff *skb;
1783 struct flowi fl;
1784 struct rt6_info *rt;
1785
1786 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1787 if (skb == NULL)
1788 goto out;
1789
1790 /* Reserve room for dummy headers, this skb can pass
1791 through good chunk of routing engine.
1792 */
1793 skb->mac.raw = skb->data;
1794 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1795
1796 fl.proto = 0;
1797 fl.nl_u.ip6_u.daddr = NULL;
1798 fl.nl_u.ip6_u.saddr = NULL;
1799 fl.uli_u.icmpt.type = 0;
1800 fl.uli_u.icmpt.code = 0;
1801 if (rta[RTA_SRC-1])
1802 fl.nl_u.ip6_u.saddr = (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]);
1803 if (rta[RTA_DST-1])
1804 fl.nl_u.ip6_u.daddr = (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]);
1805
1806 if (rta[RTA_IIF-1])
1807 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1808
1809 if (iif) {
1810 struct net_device *dev;
1811 dev = __dev_get_by_index(iif);
1812 if (!dev) {
1813 err = -ENODEV;
1814 goto out_free;
1815 }
1816 }
1817
1818 fl.oif = 0;
1819 if (rta[RTA_OIF-1])
1820 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1821
1822 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1823
1824 skb->dst = &rt->u.dst;
1825
1826 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1827 err = rt6_fill_node(skb, rt,
1828 fl.nl_u.ip6_u.daddr,
1829 fl.nl_u.ip6_u.saddr,
1830 iif,
1831 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1832 nlh->nlmsg_seq, 0);
1833 if (err < 0) {
1834 err = -EMSGSIZE;
1835 goto out_free;
1836 }
1837
1838 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1839 if (err > 0)
1840 err = 0;
1841 out:
1842 return err;
1843 out_free:
1844 kfree_skb(skb);
1845 goto out;
1846 }
1847
inet6_rt_notify(int event,struct rt6_info * rt,struct nlmsghdr * nlh,struct netlink_skb_parms * req)1848 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1849 struct netlink_skb_parms *req)
1850 {
1851 struct sk_buff *skb;
1852 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1853 u32 pid = current->pid;
1854 u32 seq = 0;
1855
1856 if (req)
1857 pid = req->pid;
1858 if (nlh)
1859 seq = nlh->nlmsg_seq;
1860
1861 skb = alloc_skb(size, gfp_any());
1862 if (!skb) {
1863 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1864 return;
1865 }
1866 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0) < 0) {
1867 kfree_skb(skb);
1868 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1869 return;
1870 }
1871 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1872 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1873 }
1874
1875 /*
1876 * /proc
1877 */
1878
1879 #ifdef CONFIG_PROC_FS
1880
1881 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1882
1883 struct rt6_proc_arg
1884 {
1885 char *buffer;
1886 int offset;
1887 int length;
1888 int skip;
1889 int len;
1890 };
1891
rt6_info_route(struct rt6_info * rt,void * p_arg)1892 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1893 {
1894 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1895 int i;
1896
1897 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1898 arg->skip++;
1899 return 0;
1900 }
1901
1902 if (arg->len >= arg->length)
1903 return 0;
1904
1905 for (i=0; i<16; i++) {
1906 sprintf(arg->buffer + arg->len, "%02x",
1907 rt->rt6i_dst.addr.s6_addr[i]);
1908 arg->len += 2;
1909 }
1910 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1911 rt->rt6i_dst.plen);
1912
1913 #ifdef CONFIG_IPV6_SUBTREES
1914 for (i=0; i<16; i++) {
1915 sprintf(arg->buffer + arg->len, "%02x",
1916 rt->rt6i_src.addr.s6_addr[i]);
1917 arg->len += 2;
1918 }
1919 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1920 rt->rt6i_src.plen);
1921 #else
1922 sprintf(arg->buffer + arg->len,
1923 "00000000000000000000000000000000 00 ");
1924 arg->len += 36;
1925 #endif
1926
1927 if (rt->rt6i_nexthop) {
1928 for (i=0; i<16; i++) {
1929 sprintf(arg->buffer + arg->len, "%02x",
1930 rt->rt6i_nexthop->primary_key[i]);
1931 arg->len += 2;
1932 }
1933 } else {
1934 sprintf(arg->buffer + arg->len,
1935 "00000000000000000000000000000000");
1936 arg->len += 32;
1937 }
1938 arg->len += sprintf(arg->buffer + arg->len,
1939 " %08x %08x %08x %08x %8s\n",
1940 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1941 rt->u.dst.__use, rt->rt6i_flags,
1942 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1943 return 0;
1944 }
1945
rt6_proc_info(char * buffer,char ** start,off_t offset,int length)1946 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1947 {
1948 struct rt6_proc_arg arg;
1949 arg.buffer = buffer;
1950 arg.offset = offset;
1951 arg.length = length;
1952 arg.skip = 0;
1953 arg.len = 0;
1954
1955 read_lock_bh(&rt6_lock);
1956 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1957 read_unlock_bh(&rt6_lock);
1958
1959 *start = buffer;
1960 if (offset)
1961 *start += offset % RT6_INFO_LEN;
1962
1963 arg.len -= offset % RT6_INFO_LEN;
1964
1965 if (arg.len > length)
1966 arg.len = length;
1967 if (arg.len < 0)
1968 arg.len = 0;
1969
1970 return arg.len;
1971 }
1972
1973 extern struct rt6_statistics rt6_stats;
1974
rt6_proc_stats(char * buffer,char ** start,off_t offset,int length)1975 static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length)
1976 {
1977 int len;
1978
1979 len = sprintf(buffer, "%04x %04x %04x %04x %04x %04x\n",
1980 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1981 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1982 rt6_stats.fib_rt_cache,
1983 atomic_read(&ip6_dst_ops.entries));
1984
1985 len -= offset;
1986
1987 if (len > length)
1988 len = length;
1989 if(len < 0)
1990 len = 0;
1991
1992 *start = buffer + offset;
1993
1994 return len;
1995 }
1996 #endif /* CONFIG_PROC_FS */
1997
1998 #ifdef CONFIG_SYSCTL
1999
2000 static int flush_delay;
2001
2002 static
ipv6_sysctl_rtcache_flush(ctl_table * ctl,int write,struct file * filp,void * buffer,size_t * lenp)2003 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2004 void *buffer, size_t *lenp)
2005 {
2006 if (write) {
2007 proc_dointvec(ctl, write, filp, buffer, lenp);
2008 if (flush_delay < 0)
2009 flush_delay = 0;
2010 fib6_run_gc((unsigned long)flush_delay);
2011 return 0;
2012 } else
2013 return -EINVAL;
2014 }
2015
2016 ctl_table ipv6_route_table[] = {
2017 {NET_IPV6_ROUTE_FLUSH, "flush",
2018 &flush_delay, sizeof(int), 0644, NULL,
2019 &ipv6_sysctl_rtcache_flush},
2020 {NET_IPV6_ROUTE_GC_THRESH, "gc_thresh",
2021 &ip6_dst_ops.gc_thresh, sizeof(int), 0644, NULL,
2022 &proc_dointvec},
2023 {NET_IPV6_ROUTE_MAX_SIZE, "max_size",
2024 &ip6_rt_max_size, sizeof(int), 0644, NULL,
2025 &proc_dointvec},
2026 {NET_IPV6_ROUTE_GC_MIN_INTERVAL, "gc_min_interval",
2027 &ip6_rt_gc_min_interval, sizeof(int), 0644, NULL,
2028 &proc_dointvec_jiffies, &sysctl_jiffies},
2029 {NET_IPV6_ROUTE_GC_TIMEOUT, "gc_timeout",
2030 &ip6_rt_gc_timeout, sizeof(int), 0644, NULL,
2031 &proc_dointvec_jiffies, &sysctl_jiffies},
2032 {NET_IPV6_ROUTE_GC_INTERVAL, "gc_interval",
2033 &ip6_rt_gc_interval, sizeof(int), 0644, NULL,
2034 &proc_dointvec_jiffies, &sysctl_jiffies},
2035 {NET_IPV6_ROUTE_GC_ELASTICITY, "gc_elasticity",
2036 &ip6_rt_gc_elasticity, sizeof(int), 0644, NULL,
2037 &proc_dointvec_jiffies, &sysctl_jiffies},
2038 {NET_IPV6_ROUTE_MTU_EXPIRES, "mtu_expires",
2039 &ip6_rt_mtu_expires, sizeof(int), 0644, NULL,
2040 &proc_dointvec_jiffies, &sysctl_jiffies},
2041 {NET_IPV6_ROUTE_MIN_ADVMSS, "min_adv_mss",
2042 &ip6_rt_min_advmss, sizeof(int), 0644, NULL,
2043 &proc_dointvec_jiffies, &sysctl_jiffies},
2044 {0}
2045 };
2046
2047 #endif
2048
2049
ip6_route_init(void)2050 void __init ip6_route_init(void)
2051 {
2052 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2053 sizeof(struct rt6_info),
2054 0, SLAB_HWCACHE_ALIGN,
2055 NULL, NULL);
2056 fib6_init();
2057 #ifdef CONFIG_PROC_FS
2058 proc_net_create("ipv6_route", 0, rt6_proc_info);
2059 proc_net_create("rt6_stats", 0, rt6_proc_stats);
2060 #endif
2061 }
2062
2063 #ifdef MODULE
ip6_route_cleanup(void)2064 void ip6_route_cleanup(void)
2065 {
2066 #ifdef CONFIG_PROC_FS
2067 proc_net_remove("ipv6_route");
2068 proc_net_remove("rt6_stats");
2069 #endif
2070
2071 rt6_ifdown(NULL);
2072 fib6_gc_cleanup();
2073 }
2074 #endif /* MODULE */
2075