1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:	$Id: fib_semantics.c,v 1.18.2.2 2002/01/12 07:54:15 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38 
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45 
46 #define FSprintk(a...)
47 
48 static struct fib_info 	*fib_info_list;
49 static rwlock_t fib_info_lock = RW_LOCK_UNLOCKED;
50 int fib_info_cnt;
51 
52 #define for_fib_info() { struct fib_info *fi; \
53 	for (fi = fib_info_list; fi; fi = fi->fib_next)
54 
55 #define endfor_fib_info() }
56 
57 #ifdef CONFIG_IP_ROUTE_MULTIPATH
58 
59 static spinlock_t fib_multipath_lock = SPIN_LOCK_UNLOCKED;
60 
61 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
62 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
63 
64 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
65 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
66 
67 #else /* CONFIG_IP_ROUTE_MULTIPATH */
68 
69 /* Hope, that gcc will optimize it to get rid of dummy loop */
70 
71 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
72 for (nhsel=0; nhsel < 1; nhsel++)
73 
74 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
75 for (nhsel=0; nhsel < 1; nhsel++)
76 
77 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
78 
79 #define endfor_nexthops(fi) }
80 
81 
82 static struct
83 {
84 	int	error;
85 	u8	scope;
86 } fib_props[RTN_MAX+1] = {
87         { 0, RT_SCOPE_NOWHERE},		/* RTN_UNSPEC */
88 	{ 0, RT_SCOPE_UNIVERSE},	/* RTN_UNICAST */
89 	{ 0, RT_SCOPE_HOST},		/* RTN_LOCAL */
90 	{ 0, RT_SCOPE_LINK},		/* RTN_BROADCAST */
91 	{ 0, RT_SCOPE_LINK},		/* RTN_ANYCAST */
92 	{ 0, RT_SCOPE_UNIVERSE},	/* RTN_MULTICAST */
93 	{ -EINVAL, RT_SCOPE_UNIVERSE},	/* RTN_BLACKHOLE */
94 	{ -EHOSTUNREACH, RT_SCOPE_UNIVERSE},/* RTN_UNREACHABLE */
95 	{ -EACCES, RT_SCOPE_UNIVERSE},	/* RTN_PROHIBIT */
96 	{ -EAGAIN, RT_SCOPE_UNIVERSE},	/* RTN_THROW */
97 #ifdef CONFIG_IP_ROUTE_NAT
98 	{ 0, RT_SCOPE_HOST},		/* RTN_NAT */
99 #else
100 	{ -EINVAL, RT_SCOPE_NOWHERE},	/* RTN_NAT */
101 #endif
102 	{ -EINVAL, RT_SCOPE_NOWHERE}	/* RTN_XRESOLVE */
103 };
104 
105 
106 /* Release a nexthop info record */
107 
free_fib_info(struct fib_info * fi)108 void free_fib_info(struct fib_info *fi)
109 {
110 	if (fi->fib_dead == 0) {
111 		printk("Freeing alive fib_info %p\n", fi);
112 		return;
113 	}
114 	change_nexthops(fi) {
115 		if (nh->nh_dev)
116 			dev_put(nh->nh_dev);
117 		nh->nh_dev = NULL;
118 	} endfor_nexthops(fi);
119 	fib_info_cnt--;
120 	kfree(fi);
121 }
122 
fib_release_info(struct fib_info * fi)123 void fib_release_info(struct fib_info *fi)
124 {
125 	write_lock(&fib_info_lock);
126 	if (fi && --fi->fib_treeref == 0) {
127 		if (fi->fib_next)
128 			fi->fib_next->fib_prev = fi->fib_prev;
129 		if (fi->fib_prev)
130 			fi->fib_prev->fib_next = fi->fib_next;
131 		if (fi == fib_info_list)
132 			fib_info_list = fi->fib_next;
133 		fi->fib_dead = 1;
134 		fib_info_put(fi);
135 	}
136 	write_unlock(&fib_info_lock);
137 }
138 
nh_comp(const struct fib_info * fi,const struct fib_info * ofi)139 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
140 {
141 	const struct fib_nh *onh = ofi->fib_nh;
142 
143 	for_nexthops(fi) {
144 		if (nh->nh_oif != onh->nh_oif ||
145 		    nh->nh_gw  != onh->nh_gw ||
146 		    nh->nh_scope != onh->nh_scope ||
147 #ifdef CONFIG_IP_ROUTE_MULTIPATH
148 		    nh->nh_weight != onh->nh_weight ||
149 #endif
150 #ifdef CONFIG_NET_CLS_ROUTE
151 		    nh->nh_tclassid != onh->nh_tclassid ||
152 #endif
153 		    ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
154 			return -1;
155 		onh++;
156 	} endfor_nexthops(fi);
157 	return 0;
158 }
159 
fib_find_info(const struct fib_info * nfi)160 static __inline__ struct fib_info * fib_find_info(const struct fib_info *nfi)
161 {
162 	for_fib_info() {
163 		if (fi->fib_nhs != nfi->fib_nhs)
164 			continue;
165 		if (nfi->fib_protocol == fi->fib_protocol &&
166 		    nfi->fib_prefsrc == fi->fib_prefsrc &&
167 		    nfi->fib_priority == fi->fib_priority &&
168 		    memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
169 		    ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
170 		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
171 			return fi;
172 	} endfor_fib_info();
173 	return NULL;
174 }
175 
176 /* Check, that the gateway is already configured.
177    Used only by redirect accept routine.
178  */
179 
ip_fib_check_default(u32 gw,struct net_device * dev)180 int ip_fib_check_default(u32 gw, struct net_device *dev)
181 {
182 	read_lock(&fib_info_lock);
183 	for_fib_info() {
184 		if (fi->fib_flags & RTNH_F_DEAD)
185 			continue;
186 		for_nexthops(fi) {
187 			if (nh->nh_dev == dev && nh->nh_gw == gw &&
188 			    nh->nh_scope == RT_SCOPE_LINK &&
189 			    !(nh->nh_flags&RTNH_F_DEAD)) {
190 				read_unlock(&fib_info_lock);
191 				return 0;
192 			}
193 		} endfor_nexthops(fi);
194 	} endfor_fib_info();
195 	read_unlock(&fib_info_lock);
196 	return -1;
197 }
198 
199 #ifdef CONFIG_IP_ROUTE_MULTIPATH
200 
fib_get_attr32(struct rtattr * attr,int attrlen,int type)201 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
202 {
203 	while (RTA_OK(attr,attrlen)) {
204 		if (attr->rta_type == type)
205 			return *(u32*)RTA_DATA(attr);
206 		attr = RTA_NEXT(attr, attrlen);
207 	}
208 	return 0;
209 }
210 
211 static int
fib_count_nexthops(struct rtattr * rta)212 fib_count_nexthops(struct rtattr *rta)
213 {
214 	int nhs = 0;
215 	struct rtnexthop *nhp = RTA_DATA(rta);
216 	int nhlen = RTA_PAYLOAD(rta);
217 
218 	while (nhlen >= (int)sizeof(struct rtnexthop)) {
219 		if ((nhlen -= nhp->rtnh_len) < 0)
220 			return 0;
221 		nhs++;
222 		nhp = RTNH_NEXT(nhp);
223 	};
224 	return nhs;
225 }
226 
227 static int
fib_get_nhs(struct fib_info * fi,const struct rtattr * rta,const struct rtmsg * r)228 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
229 {
230 	struct rtnexthop *nhp = RTA_DATA(rta);
231 	int nhlen = RTA_PAYLOAD(rta);
232 
233 	change_nexthops(fi) {
234 		int attrlen = nhlen - sizeof(struct rtnexthop);
235 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
236 			return -EINVAL;
237 		nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
238 		nh->nh_oif = nhp->rtnh_ifindex;
239 		nh->nh_weight = nhp->rtnh_hops + 1;
240 		if (attrlen) {
241 			nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
242 #ifdef CONFIG_NET_CLS_ROUTE
243 			nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
244 #endif
245 		}
246 		nhp = RTNH_NEXT(nhp);
247 	} endfor_nexthops(fi);
248 	return 0;
249 }
250 
251 #endif
252 
fib_nh_match(struct rtmsg * r,struct nlmsghdr * nlh,struct kern_rta * rta,struct fib_info * fi)253 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
254 		 struct fib_info *fi)
255 {
256 #ifdef CONFIG_IP_ROUTE_MULTIPATH
257 	struct rtnexthop *nhp;
258 	int nhlen;
259 #endif
260 
261 	if (rta->rta_priority &&
262 	    *rta->rta_priority != fi->fib_priority)
263 		return 1;
264 
265 	if (rta->rta_oif || rta->rta_gw) {
266 		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
267 		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
268 			return 0;
269 		return 1;
270 	}
271 
272 #ifdef CONFIG_IP_ROUTE_MULTIPATH
273 	if (rta->rta_mp == NULL)
274 		return 0;
275 	nhp = RTA_DATA(rta->rta_mp);
276 	nhlen = RTA_PAYLOAD(rta->rta_mp);
277 
278 	for_nexthops(fi) {
279 		int attrlen = nhlen - sizeof(struct rtnexthop);
280 		u32 gw;
281 
282 		if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
283 			return -EINVAL;
284 		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
285 			return 1;
286 		if (attrlen) {
287 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
288 			if (gw && gw != nh->nh_gw)
289 				return 1;
290 #ifdef CONFIG_NET_CLS_ROUTE
291 			gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
292 			if (gw && gw != nh->nh_tclassid)
293 				return 1;
294 #endif
295 		}
296 		nhp = RTNH_NEXT(nhp);
297 	} endfor_nexthops(fi);
298 #endif
299 	return 0;
300 }
301 
302 
303 /*
304    Picture
305    -------
306 
307    Semantics of nexthop is very messy by historical reasons.
308    We have to take into account, that:
309    a) gateway can be actually local interface address,
310       so that gatewayed route is direct.
311    b) gateway must be on-link address, possibly
312       described not by an ifaddr, but also by a direct route.
313    c) If both gateway and interface are specified, they should not
314       contradict.
315    d) If we use tunnel routes, gateway could be not on-link.
316 
317    Attempt to reconcile all of these (alas, self-contradictory) conditions
318    results in pretty ugly and hairy code with obscure logic.
319 
320    I choosed to generalized it instead, so that the size
321    of code does not increase practically, but it becomes
322    much more general.
323    Every prefix is assigned a "scope" value: "host" is local address,
324    "link" is direct route,
325    [ ... "site" ... "interior" ... ]
326    and "universe" is true gateway route with global meaning.
327 
328    Every prefix refers to a set of "nexthop"s (gw, oif),
329    where gw must have narrower scope. This recursion stops
330    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
331    which means that gw is forced to be on link.
332 
333    Code is still hairy, but now it is apparently logically
334    consistent and very flexible. F.e. as by-product it allows
335    to co-exists in peace independent exterior and interior
336    routing processes.
337 
338    Normally it looks as following.
339 
340    {universe prefix}  -> (gw, oif) [scope link]
341                           |
342 			  |-> {link prefix} -> (gw, oif) [scope local]
343 			                        |
344 						|-> {local prefix} (terminal node)
345  */
346 
fib_check_nh(const struct rtmsg * r,struct fib_info * fi,struct fib_nh * nh)347 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
348 {
349 	int err;
350 
351 	if (nh->nh_gw) {
352 		struct rt_key key;
353 		struct fib_result res;
354 
355 #ifdef CONFIG_IP_ROUTE_PERVASIVE
356 		if (nh->nh_flags&RTNH_F_PERVASIVE)
357 			return 0;
358 #endif
359 		if (nh->nh_flags&RTNH_F_ONLINK) {
360 			struct net_device *dev;
361 
362 			if (r->rtm_scope >= RT_SCOPE_LINK)
363 				return -EINVAL;
364 			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
365 				return -EINVAL;
366 			if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
367 				return -ENODEV;
368 			if (!(dev->flags&IFF_UP))
369 				return -ENETDOWN;
370 			nh->nh_dev = dev;
371 			dev_hold(dev);
372 			nh->nh_scope = RT_SCOPE_LINK;
373 			return 0;
374 		}
375 		memset(&key, 0, sizeof(key));
376 		key.dst = nh->nh_gw;
377 		key.oif = nh->nh_oif;
378 		key.scope = r->rtm_scope + 1;
379 
380 		/* It is not necessary, but requires a bit of thinking */
381 		if (key.scope < RT_SCOPE_LINK)
382 			key.scope = RT_SCOPE_LINK;
383 		if ((err = fib_lookup(&key, &res)) != 0)
384 			return err;
385 		err = -EINVAL;
386 		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
387 			goto out;
388 		nh->nh_scope = res.scope;
389 		nh->nh_oif = FIB_RES_OIF(res);
390 		if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
391 			goto out;
392 		dev_hold(nh->nh_dev);
393 		err = -ENETDOWN;
394 		if (!(nh->nh_dev->flags & IFF_UP))
395 			goto out;
396 		err = 0;
397 out:
398 		fib_res_put(&res);
399 		return err;
400 	} else {
401 		struct in_device *in_dev;
402 
403 		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
404 			return -EINVAL;
405 
406 		in_dev = inetdev_by_index(nh->nh_oif);
407 		if (in_dev == NULL)
408 			return -ENODEV;
409 		if (!(in_dev->dev->flags&IFF_UP)) {
410 			in_dev_put(in_dev);
411 			return -ENETDOWN;
412 		}
413 		nh->nh_dev = in_dev->dev;
414 		dev_hold(nh->nh_dev);
415 		nh->nh_scope = RT_SCOPE_HOST;
416 		in_dev_put(in_dev);
417 	}
418 	return 0;
419 }
420 
421 struct fib_info *
fib_create_info(const struct rtmsg * r,struct kern_rta * rta,const struct nlmsghdr * nlh,int * errp)422 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
423 		const struct nlmsghdr *nlh, int *errp)
424 {
425 	int err;
426 	struct fib_info *fi = NULL;
427 	struct fib_info *ofi;
428 #ifdef CONFIG_IP_ROUTE_MULTIPATH
429 	int nhs = 1;
430 #else
431 	const int nhs = 1;
432 #endif
433 
434 	if (r->rtm_type > RTN_MAX)
435 		goto err_inval;
436 
437 	/* Fast check to catch the most weird cases */
438 	if (fib_props[r->rtm_type].scope > r->rtm_scope)
439 		goto err_inval;
440 
441 #ifdef CONFIG_IP_ROUTE_MULTIPATH
442 	if (rta->rta_mp) {
443 		nhs = fib_count_nexthops(rta->rta_mp);
444 		if (nhs == 0)
445 			goto err_inval;
446 	}
447 #endif
448 
449 	fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
450 	err = -ENOBUFS;
451 	if (fi == NULL)
452 		goto failure;
453 	fib_info_cnt++;
454 	memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
455 
456 	fi->fib_protocol = r->rtm_protocol;
457 	fi->fib_nhs = nhs;
458 	fi->fib_flags = r->rtm_flags;
459 	if (rta->rta_priority)
460 		fi->fib_priority = *rta->rta_priority;
461 	if (rta->rta_mx) {
462 		int attrlen = RTA_PAYLOAD(rta->rta_mx);
463 		struct rtattr *attr = RTA_DATA(rta->rta_mx);
464 
465 		while (RTA_OK(attr, attrlen)) {
466 			unsigned flavor = attr->rta_type;
467 			if (flavor) {
468 				if (flavor > RTAX_MAX)
469 					goto err_inval;
470 				fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
471 			}
472 			attr = RTA_NEXT(attr, attrlen);
473 		}
474 	}
475 	if (rta->rta_prefsrc)
476 		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
477 
478 	if (rta->rta_mp) {
479 #ifdef CONFIG_IP_ROUTE_MULTIPATH
480 		if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
481 			goto failure;
482 		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
483 			goto err_inval;
484 		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
485 			goto err_inval;
486 #ifdef CONFIG_NET_CLS_ROUTE
487 		if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
488 			goto err_inval;
489 #endif
490 #else
491 		goto err_inval;
492 #endif
493 	} else {
494 		struct fib_nh *nh = fi->fib_nh;
495 		if (rta->rta_oif)
496 			nh->nh_oif = *rta->rta_oif;
497 		if (rta->rta_gw)
498 			memcpy(&nh->nh_gw, rta->rta_gw, 4);
499 #ifdef CONFIG_NET_CLS_ROUTE
500 		if (rta->rta_flow)
501 			memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
502 #endif
503 		nh->nh_flags = r->rtm_flags;
504 #ifdef CONFIG_IP_ROUTE_MULTIPATH
505 		nh->nh_weight = 1;
506 #endif
507 	}
508 
509 #ifdef CONFIG_IP_ROUTE_NAT
510 	if (r->rtm_type == RTN_NAT) {
511 		if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif)
512 			goto err_inval;
513 		memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 4);
514 		goto link_it;
515 	}
516 #endif
517 
518 	if (fib_props[r->rtm_type].error) {
519 		if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
520 			goto err_inval;
521 		goto link_it;
522 	}
523 
524 	if (r->rtm_scope > RT_SCOPE_HOST)
525 		goto err_inval;
526 
527 	if (r->rtm_scope == RT_SCOPE_HOST) {
528 		struct fib_nh *nh = fi->fib_nh;
529 
530 		/* Local address is added. */
531 		if (nhs != 1 || nh->nh_gw)
532 			goto err_inval;
533 		nh->nh_scope = RT_SCOPE_NOWHERE;
534 		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
535 		err = -ENODEV;
536 		if (nh->nh_dev == NULL)
537 			goto failure;
538 	} else {
539 		change_nexthops(fi) {
540 			if ((err = fib_check_nh(r, fi, nh)) != 0)
541 				goto failure;
542 		} endfor_nexthops(fi)
543 	}
544 
545 	if (fi->fib_prefsrc) {
546 		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
547 		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
548 			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
549 				goto err_inval;
550 	}
551 
552 link_it:
553 	if ((ofi = fib_find_info(fi)) != NULL) {
554 		fi->fib_dead = 1;
555 		free_fib_info(fi);
556 		ofi->fib_treeref++;
557 		return ofi;
558 	}
559 
560 	fi->fib_treeref++;
561 	atomic_inc(&fi->fib_clntref);
562 	write_lock(&fib_info_lock);
563 	fi->fib_next = fib_info_list;
564 	fi->fib_prev = NULL;
565 	if (fib_info_list)
566 		fib_info_list->fib_prev = fi;
567 	fib_info_list = fi;
568 	write_unlock(&fib_info_lock);
569 	return fi;
570 
571 err_inval:
572 	err = -EINVAL;
573 
574 failure:
575         *errp = err;
576         if (fi) {
577 		fi->fib_dead = 1;
578 		free_fib_info(fi);
579 	}
580 	return NULL;
581 }
582 
583 int
fib_semantic_match(int type,struct fib_info * fi,const struct rt_key * key,struct fib_result * res)584 fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res)
585 {
586 	int err = fib_props[type].error;
587 
588 	if (err == 0) {
589 		if (fi->fib_flags&RTNH_F_DEAD)
590 			return 1;
591 
592 		res->fi = fi;
593 
594 		switch (type) {
595 #ifdef CONFIG_IP_ROUTE_NAT
596 		case RTN_NAT:
597 			FIB_RES_RESET(*res);
598 			atomic_inc(&fi->fib_clntref);
599 			return 0;
600 #endif
601 		case RTN_UNICAST:
602 		case RTN_LOCAL:
603 		case RTN_BROADCAST:
604 		case RTN_ANYCAST:
605 		case RTN_MULTICAST:
606 			for_nexthops(fi) {
607 				if (nh->nh_flags&RTNH_F_DEAD)
608 					continue;
609 				if (!key->oif || key->oif == nh->nh_oif)
610 					break;
611 			}
612 #ifdef CONFIG_IP_ROUTE_MULTIPATH
613 			if (nhsel < fi->fib_nhs) {
614 				res->nh_sel = nhsel;
615 				atomic_inc(&fi->fib_clntref);
616 				return 0;
617 			}
618 #else
619 			if (nhsel < 1) {
620 				atomic_inc(&fi->fib_clntref);
621 				return 0;
622 			}
623 #endif
624 			endfor_nexthops(fi);
625 			res->fi = NULL;
626 			return 1;
627 		default:
628 			res->fi = NULL;
629 			printk(KERN_DEBUG "impossible 102\n");
630 			return -EINVAL;
631 		}
632 	}
633 	return err;
634 }
635 
636 /* Find appropriate source address to this destination */
637 
__fib_res_prefsrc(struct fib_result * res)638 u32 __fib_res_prefsrc(struct fib_result *res)
639 {
640 	return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
641 }
642 
643 int
fib_dump_info(struct sk_buff * skb,u32 pid,u32 seq,int event,u8 tb_id,u8 type,u8 scope,void * dst,int dst_len,u8 tos,struct fib_info * fi)644 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
645 	      u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
646 	      struct fib_info *fi)
647 {
648 	struct rtmsg *rtm;
649 	struct nlmsghdr  *nlh;
650 	unsigned char	 *b = skb->tail;
651 
652 	nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*rtm));
653 	rtm = NLMSG_DATA(nlh);
654 	rtm->rtm_family = AF_INET;
655 	rtm->rtm_dst_len = dst_len;
656 	rtm->rtm_src_len = 0;
657 	rtm->rtm_tos = tos;
658 	rtm->rtm_table = tb_id;
659 	rtm->rtm_type = type;
660 	rtm->rtm_flags = fi->fib_flags;
661 	rtm->rtm_scope = scope;
662 	if (rtm->rtm_dst_len)
663 		RTA_PUT(skb, RTA_DST, 4, dst);
664 	rtm->rtm_protocol = fi->fib_protocol;
665 	if (fi->fib_priority)
666 		RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
667 #ifdef CONFIG_NET_CLS_ROUTE
668 	if (fi->fib_nh[0].nh_tclassid)
669 		RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
670 #endif
671 	if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
672 		goto rtattr_failure;
673 	if (fi->fib_prefsrc)
674 		RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
675 	if (fi->fib_nhs == 1) {
676 		if (fi->fib_nh->nh_gw)
677 			RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
678 		if (fi->fib_nh->nh_oif)
679 			RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
680 	}
681 #ifdef CONFIG_IP_ROUTE_MULTIPATH
682 	if (fi->fib_nhs > 1) {
683 		struct rtnexthop *nhp;
684 		struct rtattr *mp_head;
685 		if (skb_tailroom(skb) <= RTA_SPACE(0))
686 			goto rtattr_failure;
687 		mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
688 
689 		for_nexthops(fi) {
690 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
691 				goto rtattr_failure;
692 			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
693 			nhp->rtnh_flags = nh->nh_flags & 0xFF;
694 			nhp->rtnh_hops = nh->nh_weight-1;
695 			nhp->rtnh_ifindex = nh->nh_oif;
696 			if (nh->nh_gw)
697 				RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
698 			nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
699 		} endfor_nexthops(fi);
700 		mp_head->rta_type = RTA_MULTIPATH;
701 		mp_head->rta_len = skb->tail - (u8*)mp_head;
702 	}
703 #endif
704 	nlh->nlmsg_len = skb->tail - b;
705 	return skb->len;
706 
707 nlmsg_failure:
708 rtattr_failure:
709 	skb_trim(skb, b - skb->data);
710 	return -1;
711 }
712 
713 #ifndef CONFIG_IP_NOSIOCRT
714 
715 int
fib_convert_rtentry(int cmd,struct nlmsghdr * nl,struct rtmsg * rtm,struct kern_rta * rta,struct rtentry * r)716 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
717 		    struct kern_rta *rta, struct rtentry *r)
718 {
719 	int    plen;
720 	u32    *ptr;
721 
722 	memset(rtm, 0, sizeof(*rtm));
723 	memset(rta, 0, sizeof(*rta));
724 
725 	if (r->rt_dst.sa_family != AF_INET)
726 		return -EAFNOSUPPORT;
727 
728 	/* Check mask for validity:
729 	   a) it must be contiguous.
730 	   b) destination must have all host bits clear.
731 	   c) if application forgot to set correct family (AF_INET),
732 	      reject request unless it is absolutely clear i.e.
733 	      both family and mask are zero.
734 	 */
735 	plen = 32;
736 	ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
737 	if (!(r->rt_flags&RTF_HOST)) {
738 		u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
739 		if (r->rt_genmask.sa_family != AF_INET) {
740 			if (mask || r->rt_genmask.sa_family)
741 				return -EAFNOSUPPORT;
742 		}
743 		if (bad_mask(mask, *ptr))
744 			return -EINVAL;
745 		plen = inet_mask_len(mask);
746 	}
747 
748 	nl->nlmsg_flags = NLM_F_REQUEST;
749 	nl->nlmsg_pid = 0;
750 	nl->nlmsg_seq = 0;
751 	nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
752 	if (cmd == SIOCDELRT) {
753 		nl->nlmsg_type = RTM_DELROUTE;
754 		nl->nlmsg_flags = 0;
755 	} else {
756 		nl->nlmsg_type = RTM_NEWROUTE;
757 		nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
758 		rtm->rtm_protocol = RTPROT_BOOT;
759 	}
760 
761 	rtm->rtm_dst_len = plen;
762 	rta->rta_dst = ptr;
763 
764 	if (r->rt_metric) {
765 		*(u32*)&r->rt_pad3 = r->rt_metric - 1;
766 		rta->rta_priority = (u32*)&r->rt_pad3;
767 	}
768 	if (r->rt_flags&RTF_REJECT) {
769 		rtm->rtm_scope = RT_SCOPE_HOST;
770 		rtm->rtm_type = RTN_UNREACHABLE;
771 		return 0;
772 	}
773 	rtm->rtm_scope = RT_SCOPE_NOWHERE;
774 	rtm->rtm_type = RTN_UNICAST;
775 
776 	if (r->rt_dev) {
777 		char *colon;
778 		struct net_device *dev;
779 		char   devname[IFNAMSIZ];
780 
781 		if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
782 			return -EFAULT;
783 		devname[IFNAMSIZ-1] = 0;
784 		colon = strchr(devname, ':');
785 		if (colon)
786 			*colon = 0;
787 		dev = __dev_get_by_name(devname);
788 		if (!dev)
789 			return -ENODEV;
790 		rta->rta_oif = &dev->ifindex;
791 		if (colon) {
792 			struct in_ifaddr *ifa;
793 			struct in_device *in_dev = __in_dev_get(dev);
794 			if (!in_dev)
795 				return -ENODEV;
796 			*colon = ':';
797 			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
798 				if (strcmp(ifa->ifa_label, devname) == 0)
799 					break;
800 			if (ifa == NULL)
801 				return -ENODEV;
802 			rta->rta_prefsrc = &ifa->ifa_local;
803 		}
804 	}
805 
806 	ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
807 	if (r->rt_gateway.sa_family == AF_INET && *ptr) {
808 		rta->rta_gw = ptr;
809 		if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
810 			rtm->rtm_scope = RT_SCOPE_UNIVERSE;
811 	}
812 
813 	if (cmd == SIOCDELRT)
814 		return 0;
815 
816 	if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
817 		return -EINVAL;
818 
819 	if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
820 		rtm->rtm_scope = RT_SCOPE_LINK;
821 
822 	if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
823 		struct rtattr *rec;
824 		struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
825 		if (mx == NULL)
826 			return -ENOMEM;
827 		rta->rta_mx = mx;
828 		mx->rta_type = RTA_METRICS;
829 		mx->rta_len  = RTA_LENGTH(0);
830 		if (r->rt_flags&RTF_MTU) {
831 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
832 			rec->rta_type = RTAX_ADVMSS;
833 			rec->rta_len = RTA_LENGTH(4);
834 			mx->rta_len += RTA_LENGTH(4);
835 			*(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
836 		}
837 		if (r->rt_flags&RTF_WINDOW) {
838 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
839 			rec->rta_type = RTAX_WINDOW;
840 			rec->rta_len = RTA_LENGTH(4);
841 			mx->rta_len += RTA_LENGTH(4);
842 			*(u32*)RTA_DATA(rec) = r->rt_window;
843 		}
844 		if (r->rt_flags&RTF_IRTT) {
845 			rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
846 			rec->rta_type = RTAX_RTT;
847 			rec->rta_len = RTA_LENGTH(4);
848 			mx->rta_len += RTA_LENGTH(4);
849 			*(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
850 		}
851 	}
852 	return 0;
853 }
854 
855 #endif
856 
857 /*
858    Update FIB if:
859    - local address disappeared -> we must delete all the entries
860      referring to it.
861    - device went down -> we must shutdown all nexthops going via it.
862  */
863 
fib_sync_down(u32 local,struct net_device * dev,int force)864 int fib_sync_down(u32 local, struct net_device *dev, int force)
865 {
866 	int ret = 0;
867 	int scope = RT_SCOPE_NOWHERE;
868 
869 	if (force)
870 		scope = -1;
871 
872 	for_fib_info() {
873 		if (local && fi->fib_prefsrc == local) {
874 			fi->fib_flags |= RTNH_F_DEAD;
875 			ret++;
876 		} else if (dev && fi->fib_nhs) {
877 			int dead = 0;
878 
879 			change_nexthops(fi) {
880 				if (nh->nh_flags&RTNH_F_DEAD)
881 					dead++;
882 				else if (nh->nh_dev == dev &&
883 					 nh->nh_scope != scope) {
884 					nh->nh_flags |= RTNH_F_DEAD;
885 #ifdef CONFIG_IP_ROUTE_MULTIPATH
886 					spin_lock_bh(&fib_multipath_lock);
887 					fi->fib_power -= nh->nh_power;
888 					nh->nh_power = 0;
889 					spin_unlock_bh(&fib_multipath_lock);
890 #endif
891 					dead++;
892 				}
893 #ifdef CONFIG_IP_ROUTE_MULTIPATH
894 				if (force > 1 && nh->nh_dev == dev) {
895 					dead = fi->fib_nhs;
896 					break;
897 				}
898 #endif
899 			} endfor_nexthops(fi)
900 			if (dead == fi->fib_nhs) {
901 				fi->fib_flags |= RTNH_F_DEAD;
902 				ret++;
903 			}
904 		}
905 	} endfor_fib_info();
906 	return ret;
907 }
908 
909 #ifdef CONFIG_IP_ROUTE_MULTIPATH
910 
911 /*
912    Dead device goes up. We wake up dead nexthops.
913    It takes sense only on multipath routes.
914  */
915 
fib_sync_up(struct net_device * dev)916 int fib_sync_up(struct net_device *dev)
917 {
918 	int ret = 0;
919 
920 	if (!(dev->flags&IFF_UP))
921 		return 0;
922 
923 	for_fib_info() {
924 		int alive = 0;
925 
926 		change_nexthops(fi) {
927 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
928 				alive++;
929 				continue;
930 			}
931 			if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
932 				continue;
933 			if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
934 				continue;
935 			alive++;
936 			spin_lock_bh(&fib_multipath_lock);
937 			nh->nh_power = 0;
938 			nh->nh_flags &= ~RTNH_F_DEAD;
939 			spin_unlock_bh(&fib_multipath_lock);
940 		} endfor_nexthops(fi)
941 
942 		if (alive > 0) {
943 			fi->fib_flags &= ~RTNH_F_DEAD;
944 			ret++;
945 		}
946 	} endfor_fib_info();
947 	return ret;
948 }
949 
950 /*
951    The algorithm is suboptimal, but it provides really
952    fair weighted route distribution.
953  */
954 
fib_select_multipath(const struct rt_key * key,struct fib_result * res)955 void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
956 {
957 	struct fib_info *fi = res->fi;
958 	int w;
959 
960 	spin_lock_bh(&fib_multipath_lock);
961 	if (fi->fib_power <= 0) {
962 		int power = 0;
963 		change_nexthops(fi) {
964 			if (!(nh->nh_flags&RTNH_F_DEAD)) {
965 				power += nh->nh_weight;
966 				nh->nh_power = nh->nh_weight;
967 			}
968 		} endfor_nexthops(fi);
969 		fi->fib_power = power;
970 		if (power <= 0) {
971 			spin_unlock_bh(&fib_multipath_lock);
972 			/* Race condition: route has just become dead. */
973 			res->nh_sel = 0;
974 			return;
975 		}
976 	}
977 
978 
979 	/* w should be random number [0..fi->fib_power-1],
980 	   it is pretty bad approximation.
981 	 */
982 
983 	w = jiffies % fi->fib_power;
984 
985 	change_nexthops(fi) {
986 		if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
987 			if ((w -= nh->nh_power) <= 0) {
988 				nh->nh_power--;
989 				fi->fib_power--;
990 				res->nh_sel = nhsel;
991 				spin_unlock_bh(&fib_multipath_lock);
992 				return;
993 			}
994 		}
995 	} endfor_nexthops(fi);
996 
997 	/* Race condition: route has just become dead. */
998 	res->nh_sel = 0;
999 	spin_unlock_bh(&fib_multipath_lock);
1000 }
1001 #endif
1002 
1003 
1004 #ifdef CONFIG_PROC_FS
1005 
fib_flag_trans(int type,int dead,u32 mask,struct fib_info * fi)1006 static unsigned fib_flag_trans(int type, int dead, u32 mask, struct fib_info *fi)
1007 {
1008 	static unsigned type2flags[RTN_MAX+1] = {
1009 		0, 0, 0, 0, 0, 0, 0, RTF_REJECT, RTF_REJECT, 0, 0, 0
1010 	};
1011 	unsigned flags = type2flags[type];
1012 
1013 	if (fi && fi->fib_nh->nh_gw)
1014 		flags |= RTF_GATEWAY;
1015 	if (mask == 0xFFFFFFFF)
1016 		flags |= RTF_HOST;
1017 	if (!dead)
1018 		flags |= RTF_UP;
1019 	return flags;
1020 }
1021 
fib_node_get_info(int type,int dead,struct fib_info * fi,u32 prefix,u32 mask,char * buffer)1022 void fib_node_get_info(int type, int dead, struct fib_info *fi, u32 prefix, u32 mask, char *buffer)
1023 {
1024 	int len;
1025 	unsigned flags = fib_flag_trans(type, dead, mask, fi);
1026 
1027 	if (fi) {
1028 		len = sprintf(buffer, "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1029 			      fi->fib_dev ? fi->fib_dev->name : "*", prefix,
1030 			      fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
1031 			      mask, (fi->fib_advmss ? fi->fib_advmss+40 : 0),
1032 			      fi->fib_window, fi->fib_rtt>>3);
1033 	} else {
1034 		len = sprintf(buffer, "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1035 			      prefix, 0,
1036 			      flags, 0, 0, 0,
1037 			      mask, 0, 0, 0);
1038 	}
1039 	memset(buffer+len, ' ', 127-len);
1040 	buffer[127] = '\n';
1041 }
1042 
1043 #endif
1044