1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.18.2.2 2002/01/12 07:54:15 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45
46 #define FSprintk(a...)
47
48 static struct fib_info *fib_info_list;
49 static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
50 int fib_info_cnt;
51
52 #define for_fib_info() { struct fib_info *fi; \
53 for (fi = fib_info_list; fi; fi = fi->fib_next)
54
55 #define endfor_fib_info() }
56
57 #ifdef CONFIG_IP_ROUTE_MULTIPATH
58
59 static spinlock_t fib_multipath_lock = SPIN_LOCK_UNLOCKED;
60
61 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
62 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
63
64 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
65 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
66
67 #else /* CONFIG_IP_ROUTE_MULTIPATH */
68
69 /* Hope, that gcc will optimize it to get rid of dummy loop */
70
71 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
72 for (nhsel=0; nhsel < 1; nhsel++)
73
74 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
75 for (nhsel=0; nhsel < 1; nhsel++)
76
77 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
78
79 #define endfor_nexthops(fi) }
80
81
82 static struct
83 {
84 int error;
85 u8 scope;
86 } fib_props[RTN_MAX+1] = {
87 { 0, RT_SCOPE_NOWHERE}, /* RTN_UNSPEC */
88 { 0, RT_SCOPE_UNIVERSE}, /* RTN_UNICAST */
89 { 0, RT_SCOPE_HOST}, /* RTN_LOCAL */
90 { 0, RT_SCOPE_LINK}, /* RTN_BROADCAST */
91 { 0, RT_SCOPE_LINK}, /* RTN_ANYCAST */
92 { 0, RT_SCOPE_UNIVERSE}, /* RTN_MULTICAST */
93 { -EINVAL, RT_SCOPE_UNIVERSE}, /* RTN_BLACKHOLE */
94 { -EHOSTUNREACH, RT_SCOPE_UNIVERSE},/* RTN_UNREACHABLE */
95 { -EACCES, RT_SCOPE_UNIVERSE}, /* RTN_PROHIBIT */
96 { -EAGAIN, RT_SCOPE_UNIVERSE}, /* RTN_THROW */
97 #ifdef CONFIG_IP_ROUTE_NAT
98 { 0, RT_SCOPE_HOST}, /* RTN_NAT */
99 #else
100 { -EINVAL, RT_SCOPE_NOWHERE}, /* RTN_NAT */
101 #endif
102 { -EINVAL, RT_SCOPE_NOWHERE} /* RTN_XRESOLVE */
103 };
104
105
106 /* Release a nexthop info record */
107
free_fib_info(struct fib_info * fi)108 void free_fib_info(struct fib_info *fi)
109 {
110 if (fi->fib_dead == 0) {
111 printk("Freeing alive fib_info %p\n", fi);
112 return;
113 }
114 change_nexthops(fi) {
115 if (nh->nh_dev)
116 dev_put(nh->nh_dev);
117 nh->nh_dev = NULL;
118 } endfor_nexthops(fi);
119 fib_info_cnt--;
120 kfree(fi);
121 }
122
fib_release_info(struct fib_info * fi)123 void fib_release_info(struct fib_info *fi)
124 {
125 write_lock(&fib_info_lock);
126 if (fi && --fi->fib_treeref == 0) {
127 if (fi->fib_next)
128 fi->fib_next->fib_prev = fi->fib_prev;
129 if (fi->fib_prev)
130 fi->fib_prev->fib_next = fi->fib_next;
131 if (fi == fib_info_list)
132 fib_info_list = fi->fib_next;
133 fi->fib_dead = 1;
134 fib_info_put(fi);
135 }
136 write_unlock(&fib_info_lock);
137 }
138
nh_comp(const struct fib_info * fi,const struct fib_info * ofi)139 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
140 {
141 const struct fib_nh *onh = ofi->fib_nh;
142
143 for_nexthops(fi) {
144 if (nh->nh_oif != onh->nh_oif ||
145 nh->nh_gw != onh->nh_gw ||
146 nh->nh_scope != onh->nh_scope ||
147 #ifdef CONFIG_IP_ROUTE_MULTIPATH
148 nh->nh_weight != onh->nh_weight ||
149 #endif
150 #ifdef CONFIG_NET_CLS_ROUTE
151 nh->nh_tclassid != onh->nh_tclassid ||
152 #endif
153 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
154 return -1;
155 onh++;
156 } endfor_nexthops(fi);
157 return 0;
158 }
159
fib_find_info(const struct fib_info * nfi)160 static __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
161 {
162 for_fib_info() {
163 if (fi->fib_nhs != nfi->fib_nhs)
164 continue;
165 if (nfi->fib_protocol == fi->fib_protocol &&
166 nfi->fib_prefsrc == fi->fib_prefsrc &&
167 nfi->fib_priority == fi->fib_priority &&
168 memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
169 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
170 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
171 return fi;
172 } endfor_fib_info();
173 return NULL;
174 }
175
176 /* Check, that the gateway is already configured.
177 Used only by redirect accept routine.
178 */
179
ip_fib_check_default(u32 gw,struct net_device * dev)180 int ip_fib_check_default(u32 gw, struct net_device *dev)
181 {
182 read_lock(&fib_info_lock);
183 for_fib_info() {
184 if (fi->fib_flags & RTNH_F_DEAD)
185 continue;
186 for_nexthops(fi) {
187 if (nh->nh_dev == dev && nh->nh_gw == gw &&
188 nh->nh_scope == RT_SCOPE_LINK &&
189 !(nh->nh_flags&RTNH_F_DEAD)) {
190 read_unlock(&fib_info_lock);
191 return 0;
192 }
193 } endfor_nexthops(fi);
194 } endfor_fib_info();
195 read_unlock(&fib_info_lock);
196 return -1;
197 }
198
199 #ifdef CONFIG_IP_ROUTE_MULTIPATH
200
fib_get_attr32(struct rtattr * attr,int attrlen,int type)201 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
202 {
203 while (RTA_OK(attr,attrlen)) {
204 if (attr->rta_type == type)
205 return *(u32*)RTA_DATA(attr);
206 attr = RTA_NEXT(attr, attrlen);
207 }
208 return 0;
209 }
210
211 static int
fib_count_nexthops(struct rtattr * rta)212 fib_count_nexthops(struct rtattr *rta)
213 {
214 int nhs = 0;
215 struct rtnexthop *nhp = RTA_DATA(rta);
216 int nhlen = RTA_PAYLOAD(rta);
217
218 while (nhlen >= (int)sizeof(struct rtnexthop)) {
219 if ((nhlen -= nhp->rtnh_len) < 0)
220 return 0;
221 nhs++;
222 nhp = RTNH_NEXT(nhp);
223 };
224 return nhs;
225 }
226
227 static int
fib_get_nhs(struct fib_info * fi,const struct rtattr * rta,const struct rtmsg * r)228 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
229 {
230 struct rtnexthop *nhp = RTA_DATA(rta);
231 int nhlen = RTA_PAYLOAD(rta);
232
233 change_nexthops(fi) {
234 int attrlen = nhlen - sizeof(struct rtnexthop);
235 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
236 return -EINVAL;
237 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
238 nh->nh_oif = nhp->rtnh_ifindex;
239 nh->nh_weight = nhp->rtnh_hops + 1;
240 if (attrlen) {
241 nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
242 #ifdef CONFIG_NET_CLS_ROUTE
243 nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
244 #endif
245 }
246 nhp = RTNH_NEXT(nhp);
247 } endfor_nexthops(fi);
248 return 0;
249 }
250
251 #endif
252
fib_nh_match(struct rtmsg * r,struct nlmsghdr * nlh,struct kern_rta * rta,struct fib_info * fi)253 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
254 struct fib_info *fi)
255 {
256 #ifdef CONFIG_IP_ROUTE_MULTIPATH
257 struct rtnexthop *nhp;
258 int nhlen;
259 #endif
260
261 if (rta->rta_priority &&
262 *rta->rta_priority != fi->fib_priority)
263 return 1;
264
265 if (rta->rta_oif || rta->rta_gw) {
266 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
267 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
268 return 0;
269 return 1;
270 }
271
272 #ifdef CONFIG_IP_ROUTE_MULTIPATH
273 if (rta->rta_mp == NULL)
274 return 0;
275 nhp = RTA_DATA(rta->rta_mp);
276 nhlen = RTA_PAYLOAD(rta->rta_mp);
277
278 for_nexthops(fi) {
279 int attrlen = nhlen - sizeof(struct rtnexthop);
280 u32 gw;
281
282 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
283 return -EINVAL;
284 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
285 return 1;
286 if (attrlen) {
287 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
288 if (gw && gw != nh->nh_gw)
289 return 1;
290 #ifdef CONFIG_NET_CLS_ROUTE
291 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
292 if (gw && gw != nh->nh_tclassid)
293 return 1;
294 #endif
295 }
296 nhp = RTNH_NEXT(nhp);
297 } endfor_nexthops(fi);
298 #endif
299 return 0;
300 }
301
302
303 /*
304 Picture
305 -------
306
307 Semantics of nexthop is very messy by historical reasons.
308 We have to take into account, that:
309 a) gateway can be actually local interface address,
310 so that gatewayed route is direct.
311 b) gateway must be on-link address, possibly
312 described not by an ifaddr, but also by a direct route.
313 c) If both gateway and interface are specified, they should not
314 contradict.
315 d) If we use tunnel routes, gateway could be not on-link.
316
317 Attempt to reconcile all of these (alas, self-contradictory) conditions
318 results in pretty ugly and hairy code with obscure logic.
319
320 I choosed to generalized it instead, so that the size
321 of code does not increase practically, but it becomes
322 much more general.
323 Every prefix is assigned a "scope" value: "host" is local address,
324 "link" is direct route,
325 [ ... "site" ... "interior" ... ]
326 and "universe" is true gateway route with global meaning.
327
328 Every prefix refers to a set of "nexthop"s (gw, oif),
329 where gw must have narrower scope. This recursion stops
330 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
331 which means that gw is forced to be on link.
332
333 Code is still hairy, but now it is apparently logically
334 consistent and very flexible. F.e. as by-product it allows
335 to co-exists in peace independent exterior and interior
336 routing processes.
337
338 Normally it looks as following.
339
340 {universe prefix} -> (gw, oif) [scope link]
341 |
342 |-> {link prefix} -> (gw, oif) [scope local]
343 |
344 |-> {local prefix} (terminal node)
345 */
346
fib_check_nh(const struct rtmsg * r,struct fib_info * fi,struct fib_nh * nh)347 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
348 {
349 int err;
350
351 if (nh->nh_gw) {
352 struct rt_key key;
353 struct fib_result res;
354
355 #ifdef CONFIG_IP_ROUTE_PERVASIVE
356 if (nh->nh_flags&RTNH_F_PERVASIVE)
357 return 0;
358 #endif
359 if (nh->nh_flags&RTNH_F_ONLINK) {
360 struct net_device *dev;
361
362 if (r->rtm_scope >= RT_SCOPE_LINK)
363 return -EINVAL;
364 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
365 return -EINVAL;
366 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
367 return -ENODEV;
368 if (!(dev->flags&IFF_UP))
369 return -ENETDOWN;
370 nh->nh_dev = dev;
371 dev_hold(dev);
372 nh->nh_scope = RT_SCOPE_LINK;
373 return 0;
374 }
375 memset(&key, 0, sizeof(key));
376 key.dst = nh->nh_gw;
377 key.oif = nh->nh_oif;
378 key.scope = r->rtm_scope + 1;
379
380 /* It is not necessary, but requires a bit of thinking */
381 if (key.scope < RT_SCOPE_LINK)
382 key.scope = RT_SCOPE_LINK;
383 if ((err = fib_lookup(&key, &res)) != 0)
384 return err;
385 err = -EINVAL;
386 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
387 goto out;
388 nh->nh_scope = res.scope;
389 nh->nh_oif = FIB_RES_OIF(res);
390 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
391 goto out;
392 dev_hold(nh->nh_dev);
393 err = -ENETDOWN;
394 if (!(nh->nh_dev->flags & IFF_UP))
395 goto out;
396 err = 0;
397 out:
398 fib_res_put(&res);
399 return err;
400 } else {
401 struct in_device *in_dev;
402
403 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
404 return -EINVAL;
405
406 in_dev = inetdev_by_index(nh->nh_oif);
407 if (in_dev == NULL)
408 return -ENODEV;
409 if (!(in_dev->dev->flags&IFF_UP)) {
410 in_dev_put(in_dev);
411 return -ENETDOWN;
412 }
413 nh->nh_dev = in_dev->dev;
414 dev_hold(nh->nh_dev);
415 nh->nh_scope = RT_SCOPE_HOST;
416 in_dev_put(in_dev);
417 }
418 return 0;
419 }
420
421 struct fib_info *
fib_create_info(const struct rtmsg * r,struct kern_rta * rta,const struct nlmsghdr * nlh,int * errp)422 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
423 const struct nlmsghdr *nlh, int *errp)
424 {
425 int err;
426 struct fib_info *fi = NULL;
427 struct fib_info *ofi;
428 #ifdef CONFIG_IP_ROUTE_MULTIPATH
429 int nhs = 1;
430 #else
431 const int nhs = 1;
432 #endif
433
434 if (r->rtm_type > RTN_MAX)
435 goto err_inval;
436
437 /* Fast check to catch the most weird cases */
438 if (fib_props[r->rtm_type].scope > r->rtm_scope)
439 goto err_inval;
440
441 #ifdef CONFIG_IP_ROUTE_MULTIPATH
442 if (rta->rta_mp) {
443 nhs = fib_count_nexthops(rta->rta_mp);
444 if (nhs == 0)
445 goto err_inval;
446 }
447 #endif
448
449 fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
450 err = -ENOBUFS;
451 if (fi == NULL)
452 goto failure;
453 fib_info_cnt++;
454 memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
455
456 fi->fib_protocol = r->rtm_protocol;
457 fi->fib_nhs = nhs;
458 fi->fib_flags = r->rtm_flags;
459 if (rta->rta_priority)
460 fi->fib_priority = *rta->rta_priority;
461 if (rta->rta_mx) {
462 int attrlen = RTA_PAYLOAD(rta->rta_mx);
463 struct rtattr *attr = RTA_DATA(rta->rta_mx);
464
465 while (RTA_OK(attr, attrlen)) {
466 unsigned flavor = attr->rta_type;
467 if (flavor) {
468 if (flavor > RTAX_MAX)
469 goto err_inval;
470 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
471 }
472 attr = RTA_NEXT(attr, attrlen);
473 }
474 }
475 if (rta->rta_prefsrc)
476 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
477
478 if (rta->rta_mp) {
479 #ifdef CONFIG_IP_ROUTE_MULTIPATH
480 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
481 goto failure;
482 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
483 goto err_inval;
484 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
485 goto err_inval;
486 #ifdef CONFIG_NET_CLS_ROUTE
487 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
488 goto err_inval;
489 #endif
490 #else
491 goto err_inval;
492 #endif
493 } else {
494 struct fib_nh *nh = fi->fib_nh;
495 if (rta->rta_oif)
496 nh->nh_oif = *rta->rta_oif;
497 if (rta->rta_gw)
498 memcpy(&nh->nh_gw, rta->rta_gw, 4);
499 #ifdef CONFIG_NET_CLS_ROUTE
500 if (rta->rta_flow)
501 memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
502 #endif
503 nh->nh_flags = r->rtm_flags;
504 #ifdef CONFIG_IP_ROUTE_MULTIPATH
505 nh->nh_weight = 1;
506 #endif
507 }
508
509 #ifdef CONFIG_IP_ROUTE_NAT
510 if (r->rtm_type == RTN_NAT) {
511 if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
512 goto err_inval;
513 memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4);
514 goto link_it;
515 }
516 #endif
517
518 if (fib_props[r->rtm_type].error) {
519 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
520 goto err_inval;
521 goto link_it;
522 }
523
524 if (r->rtm_scope > RT_SCOPE_HOST)
525 goto err_inval;
526
527 if (r->rtm_scope == RT_SCOPE_HOST) {
528 struct fib_nh *nh = fi->fib_nh;
529
530 /* Local address is added. */
531 if (nhs != 1 || nh->nh_gw)
532 goto err_inval;
533 nh->nh_scope = RT_SCOPE_NOWHERE;
534 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
535 err = -ENODEV;
536 if (nh->nh_dev == NULL)
537 goto failure;
538 } else {
539 change_nexthops(fi) {
540 if ((err = fib_check_nh(r, fi, nh)) != 0)
541 goto failure;
542 } endfor_nexthops(fi)
543 }
544
545 if (fi->fib_prefsrc) {
546 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
547 memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
548 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
549 goto err_inval;
550 }
551
552 link_it:
553 if ((ofi = fib_find_info(fi)) != NULL) {
554 fi->fib_dead = 1;
555 free_fib_info(fi);
556 ofi->fib_treeref++;
557 return ofi;
558 }
559
560 fi->fib_treeref++;
561 atomic_inc(&fi->fib_clntref);
562 write_lock(&fib_info_lock);
563 fi->fib_next = fib_info_list;
564 fi->fib_prev = NULL;
565 if (fib_info_list)
566 fib_info_list->fib_prev = fi;
567 fib_info_list = fi;
568 write_unlock(&fib_info_lock);
569 return fi;
570
571 err_inval:
572 err = -EINVAL;
573
574 failure:
575 *errp = err;
576 if (fi) {
577 fi->fib_dead = 1;
578 free_fib_info(fi);
579 }
580 return NULL;
581 }
582
583 int
fib_semantic_match(int type,struct fib_info * fi,const struct rt_key * key,struct fib_result * res)584 fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res)
585 {
586 int err = fib_props[type].error;
587
588 if (err == 0) {
589 if (fi->fib_flags&RTNH_F_DEAD)
590 return 1;
591
592 res->fi = fi;
593
594 switch (type) {
595 #ifdef CONFIG_IP_ROUTE_NAT
596 case RTN_NAT:
597 FIB_RES_RESET(*res);
598 atomic_inc(&fi->fib_clntref);
599 return 0;
600 #endif
601 case RTN_UNICAST:
602 case RTN_LOCAL:
603 case RTN_BROADCAST:
604 case RTN_ANYCAST:
605 case RTN_MULTICAST:
606 for_nexthops(fi) {
607 if (nh->nh_flags&RTNH_F_DEAD)
608 continue;
609 if (!key->oif || key->oif == nh->nh_oif)
610 break;
611 }
612 #ifdef CONFIG_IP_ROUTE_MULTIPATH
613 if (nhsel < fi->fib_nhs) {
614 res->nh_sel = nhsel;
615 atomic_inc(&fi->fib_clntref);
616 return 0;
617 }
618 #else
619 if (nhsel < 1) {
620 atomic_inc(&fi->fib_clntref);
621 return 0;
622 }
623 #endif
624 endfor_nexthops(fi);
625 res->fi = NULL;
626 return 1;
627 default:
628 res->fi = NULL;
629 printk(KERN_DEBUG "impossible 102\n");
630 return -EINVAL;
631 }
632 }
633 return err;
634 }
635
636 /* Find appropriate source address to this destination */
637
__fib_res_prefsrc(struct fib_result * res)638 u32 __fib_res_prefsrc(struct fib_result *res)
639 {
640 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
641 }
642
643 int
fib_dump_info(struct sk_buff * skb,u32 pid,u32 seq,int event,u8 tb_id,u8 type,u8 scope,void * dst,int dst_len,u8 tos,struct fib_info * fi)644 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
645 u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
646 struct fib_info *fi)
647 {
648 struct rtmsg *rtm;
649 struct nlmsghdr *nlh;
650 unsigned char *b = skb->tail;
651
652 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
653 rtm = NLMSG_DATA(nlh);
654 rtm->rtm_family = AF_INET;
655 rtm->rtm_dst_len = dst_len;
656 rtm->rtm_src_len = 0;
657 rtm->rtm_tos = tos;
658 rtm->rtm_table = tb_id;
659 rtm->rtm_type = type;
660 rtm->rtm_flags = fi->fib_flags;
661 rtm->rtm_scope = scope;
662 if (rtm->rtm_dst_len)
663 RTA_PUT(skb, RTA_DST, 4, dst);
664 rtm->rtm_protocol = fi->fib_protocol;
665 if (fi->fib_priority)
666 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
667 #ifdef CONFIG_NET_CLS_ROUTE
668 if (fi->fib_nh[0].nh_tclassid)
669 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
670 #endif
671 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
672 goto rtattr_failure;
673 if (fi->fib_prefsrc)
674 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
675 if (fi->fib_nhs == 1) {
676 if (fi->fib_nh->nh_gw)
677 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
678 if (fi->fib_nh->nh_oif)
679 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
680 }
681 #ifdef CONFIG_IP_ROUTE_MULTIPATH
682 if (fi->fib_nhs > 1) {
683 struct rtnexthop *nhp;
684 struct rtattr *mp_head;
685 if (skb_tailroom(skb) <= RTA_SPACE(0))
686 goto rtattr_failure;
687 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
688
689 for_nexthops(fi) {
690 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
691 goto rtattr_failure;
692 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
693 nhp->rtnh_flags = nh->nh_flags & 0xFF;
694 nhp->rtnh_hops = nh->nh_weight-1;
695 nhp->rtnh_ifindex = nh->nh_oif;
696 if (nh->nh_gw)
697 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
698 nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
699 } endfor_nexthops(fi);
700 mp_head->rta_type = RTA_MULTIPATH;
701 mp_head->rta_len = skb->tail - (u8*)mp_head;
702 }
703 #endif
704 nlh->nlmsg_len = skb->tail - b;
705 return skb->len;
706
707 nlmsg_failure:
708 rtattr_failure:
709 skb_trim(skb, b - skb->data);
710 return -1;
711 }
712
713 #ifndef CONFIG_IP_NOSIOCRT
714
715 int
fib_convert_rtentry(int cmd,struct nlmsghdr * nl,struct rtmsg * rtm,struct kern_rta * rta,struct rtentry * r)716 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
717 struct kern_rta *rta, struct rtentry *r)
718 {
719 int plen;
720 u32 *ptr;
721
722 memset(rtm, 0, sizeof(*rtm));
723 memset(rta, 0, sizeof(*rta));
724
725 if (r->rt_dst.sa_family != AF_INET)
726 return -EAFNOSUPPORT;
727
728 /* Check mask for validity:
729 a) it must be contiguous.
730 b) destination must have all host bits clear.
731 c) if application forgot to set correct family (AF_INET),
732 reject request unless it is absolutely clear i.e.
733 both family and mask are zero.
734 */
735 plen = 32;
736 ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
737 if (!(r->rt_flags&RTF_HOST)) {
738 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
739 if (r->rt_genmask.sa_family != AF_INET) {
740 if (mask || r->rt_genmask.sa_family)
741 return -EAFNOSUPPORT;
742 }
743 if (bad_mask(mask, *ptr))
744 return -EINVAL;
745 plen = inet_mask_len(mask);
746 }
747
748 nl->nlmsg_flags = NLM_F_REQUEST;
749 nl->nlmsg_pid = 0;
750 nl->nlmsg_seq = 0;
751 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
752 if (cmd == SIOCDELRT) {
753 nl->nlmsg_type = RTM_DELROUTE;
754 nl->nlmsg_flags = 0;
755 } else {
756 nl->nlmsg_type = RTM_NEWROUTE;
757 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
758 rtm->rtm_protocol = RTPROT_BOOT;
759 }
760
761 rtm->rtm_dst_len = plen;
762 rta->rta_dst = ptr;
763
764 if (r->rt_metric) {
765 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
766 rta->rta_priority = (u32*)&r->rt_pad3;
767 }
768 if (r->rt_flags&RTF_REJECT) {
769 rtm->rtm_scope = RT_SCOPE_HOST;
770 rtm->rtm_type = RTN_UNREACHABLE;
771 return 0;
772 }
773 rtm->rtm_scope = RT_SCOPE_NOWHERE;
774 rtm->rtm_type = RTN_UNICAST;
775
776 if (r->rt_dev) {
777 char *colon;
778 struct net_device *dev;
779 char devname[IFNAMSIZ];
780
781 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
782 return -EFAULT;
783 devname[IFNAMSIZ-1] = 0;
784 colon = strchr(devname, ':');
785 if (colon)
786 *colon = 0;
787 dev = __dev_get_by_name(devname);
788 if (!dev)
789 return -ENODEV;
790 rta->rta_oif = &dev->ifindex;
791 if (colon) {
792 struct in_ifaddr *ifa;
793 struct in_device *in_dev = __in_dev_get(dev);
794 if (!in_dev)
795 return -ENODEV;
796 *colon = ':';
797 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
798 if (strcmp(ifa->ifa_label, devname) == 0)
799 break;
800 if (ifa == NULL)
801 return -ENODEV;
802 rta->rta_prefsrc = &ifa->ifa_local;
803 }
804 }
805
806 ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
807 if (r->rt_gateway.sa_family == AF_INET && *ptr) {
808 rta->rta_gw = ptr;
809 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
810 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
811 }
812
813 if (cmd == SIOCDELRT)
814 return 0;
815
816 if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
817 return -EINVAL;
818
819 if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
820 rtm->rtm_scope = RT_SCOPE_LINK;
821
822 if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
823 struct rtattr *rec;
824 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
825 if (mx == NULL)
826 return -ENOMEM;
827 rta->rta_mx = mx;
828 mx->rta_type = RTA_METRICS;
829 mx->rta_len = RTA_LENGTH(0);
830 if (r->rt_flags&RTF_MTU) {
831 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
832 rec->rta_type = RTAX_ADVMSS;
833 rec->rta_len = RTA_LENGTH(4);
834 mx->rta_len += RTA_LENGTH(4);
835 *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
836 }
837 if (r->rt_flags&RTF_WINDOW) {
838 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
839 rec->rta_type = RTAX_WINDOW;
840 rec->rta_len = RTA_LENGTH(4);
841 mx->rta_len += RTA_LENGTH(4);
842 *(u32*)RTA_DATA(rec) = r->rt_window;
843 }
844 if (r->rt_flags&RTF_IRTT) {
845 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
846 rec->rta_type = RTAX_RTT;
847 rec->rta_len = RTA_LENGTH(4);
848 mx->rta_len += RTA_LENGTH(4);
849 *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
850 }
851 }
852 return 0;
853 }
854
855 #endif
856
857 /*
858 Update FIB if:
859 - local address disappeared -> we must delete all the entries
860 referring to it.
861 - device went down -> we must shutdown all nexthops going via it.
862 */
863
fib_sync_down(u32 local,struct net_device * dev,int force)864 int fib_sync_down(u32 local, struct net_device *dev, int force)
865 {
866 int ret = 0;
867 int scope = RT_SCOPE_NOWHERE;
868
869 if (force)
870 scope = -1;
871
872 for_fib_info() {
873 if (local && fi->fib_prefsrc == local) {
874 fi->fib_flags |= RTNH_F_DEAD;
875 ret++;
876 } else if (dev && fi->fib_nhs) {
877 int dead = 0;
878
879 change_nexthops(fi) {
880 if (nh->nh_flags&RTNH_F_DEAD)
881 dead++;
882 else if (nh->nh_dev == dev &&
883 nh->nh_scope != scope) {
884 nh->nh_flags |= RTNH_F_DEAD;
885 #ifdef CONFIG_IP_ROUTE_MULTIPATH
886 spin_lock_bh(&fib_multipath_lock);
887 fi->fib_power -= nh->nh_power;
888 nh->nh_power = 0;
889 spin_unlock_bh(&fib_multipath_lock);
890 #endif
891 dead++;
892 }
893 #ifdef CONFIG_IP_ROUTE_MULTIPATH
894 if (force > 1 && nh->nh_dev == dev) {
895 dead = fi->fib_nhs;
896 break;
897 }
898 #endif
899 } endfor_nexthops(fi)
900 if (dead == fi->fib_nhs) {
901 fi->fib_flags |= RTNH_F_DEAD;
902 ret++;
903 }
904 }
905 } endfor_fib_info();
906 return ret;
907 }
908
909 #ifdef CONFIG_IP_ROUTE_MULTIPATH
910
911 /*
912 Dead device goes up. We wake up dead nexthops.
913 It takes sense only on multipath routes.
914 */
915
fib_sync_up(struct net_device * dev)916 int fib_sync_up(struct net_device *dev)
917 {
918 int ret = 0;
919
920 if (!(dev->flags&IFF_UP))
921 return 0;
922
923 for_fib_info() {
924 int alive = 0;
925
926 change_nexthops(fi) {
927 if (!(nh->nh_flags&RTNH_F_DEAD)) {
928 alive++;
929 continue;
930 }
931 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
932 continue;
933 if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
934 continue;
935 alive++;
936 spin_lock_bh(&fib_multipath_lock);
937 nh->nh_power = 0;
938 nh->nh_flags &= ~RTNH_F_DEAD;
939 spin_unlock_bh(&fib_multipath_lock);
940 } endfor_nexthops(fi)
941
942 if (alive > 0) {
943 fi->fib_flags &= ~RTNH_F_DEAD;
944 ret++;
945 }
946 } endfor_fib_info();
947 return ret;
948 }
949
950 /*
951 The algorithm is suboptimal, but it provides really
952 fair weighted route distribution.
953 */
954
fib_select_multipath(const struct rt_key * key,struct fib_result * res)955 void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
956 {
957 struct fib_info *fi = res->fi;
958 int w;
959
960 spin_lock_bh(&fib_multipath_lock);
961 if (fi->fib_power <= 0) {
962 int power = 0;
963 change_nexthops(fi) {
964 if (!(nh->nh_flags&RTNH_F_DEAD)) {
965 power += nh->nh_weight;
966 nh->nh_power = nh->nh_weight;
967 }
968 } endfor_nexthops(fi);
969 fi->fib_power = power;
970 if (power <= 0) {
971 spin_unlock_bh(&fib_multipath_lock);
972 /* Race condition: route has just become dead. */
973 res->nh_sel = 0;
974 return;
975 }
976 }
977
978
979 /* w should be random number [0..fi->fib_power-1],
980 it is pretty bad approximation.
981 */
982
983 w = jiffies % fi->fib_power;
984
985 change_nexthops(fi) {
986 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
987 if ((w -= nh->nh_power) <= 0) {
988 nh->nh_power--;
989 fi->fib_power--;
990 res->nh_sel = nhsel;
991 spin_unlock_bh(&fib_multipath_lock);
992 return;
993 }
994 }
995 } endfor_nexthops(fi);
996
997 /* Race condition: route has just become dead. */
998 res->nh_sel = 0;
999 spin_unlock_bh(&fib_multipath_lock);
1000 }
1001 #endif
1002
1003
1004 #ifdef CONFIG_PROC_FS
1005
fib_flag_trans(int type,int dead,u32 mask,struct fib_info * fi)1006 static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi)
1007 {
1008 static unsigned type2flags[RTN_MAX+1] = {
1009 0, 0, 0, 0, 0, 0, 0, RTF_REJECT, RTF_REJECT, 0, 0, 0
1010 };
1011 unsigned flags = type2flags[type];
1012
1013 if (fi && fi->fib_nh->nh_gw)
1014 flags |= RTF_GATEWAY;
1015 if (mask == 0xFFFFFFFF)
1016 flags |= RTF_HOST;
1017 if (!dead)
1018 flags |= RTF_UP;
1019 return flags;
1020 }
1021
fib_node_get_info(int type,int dead,struct fib_info * fi,u32 prefix,u32 mask,char * buffer)1022 void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32 mask, char *buffer)
1023 {
1024 int len;
1025 unsigned flags = fib_flag_trans(type, dead, mask, fi);
1026
1027 if (fi) {
1028 len = sprintf(buffer, "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1029 fi->fib_dev ? fi->fib_dev->name : "*", prefix,
1030 fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
1031 mask, (fi->fib_advmss ? fi->fib_advmss+40 : 0),
1032 fi->fib_window, fi->fib_rtt>>3);
1033 } else {
1034 len = sprintf(buffer, "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1035 prefix, 0,
1036 flags, 0, 0, 0,
1037 mask, 0, 0, 0);
1038 }
1039 memset(buffer+len, ' ', 127-len);
1040 buffer[127] = '\n';
1041 }
1042
1043 #endif
1044