1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		IPv4 FIB: lookup engine and maintenance routines.
7  *
8  * Version:	$Id: fib_hash.c,v 1.13 2001/10/31 21:55:54 davem Exp $
9  *
10  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *		This program is free software; you can redistribute it and/or
13  *		modify it under the terms of the GNU General Public License
14  *		as published by the Free Software Foundation; either version
15  *		2 of the License, or (at your option) any later version.
16  */
17 
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38 
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45 
46 #define FTprint(a...)
47 /*
48    printk(KERN_DEBUG a)
49  */
50 
51 static kmem_cache_t * fn_hash_kmem;
52 
53 /*
54    These bizarre types are just to force strict type checking.
55    When I reversed order of bytes and changed to natural mask lengths,
56    I forgot to make fixes in several places. Now I am lazy to return
57    it back.
58  */
59 
60 typedef struct {
61 	u32	datum;
62 } fn_key_t;
63 
64 typedef struct {
65 	u32	datum;
66 } fn_hash_idx_t;
67 
68 struct fib_node
69 {
70 	struct fib_node		*fn_next;
71 	struct fib_info		*fn_info;
72 #define FIB_INFO(f)	((f)->fn_info)
73 	fn_key_t		fn_key;
74 	u8			fn_tos;
75 	u8			fn_type;
76 	u8			fn_scope;
77 	u8			fn_state;
78 };
79 
80 #define FN_S_ZOMBIE	1
81 #define FN_S_ACCESSED	2
82 
83 static int fib_hash_zombies;
84 
85 struct fn_zone
86 {
87 	struct fn_zone	*fz_next;	/* Next not empty zone	*/
88 	struct fib_node	**fz_hash;	/* Hash table pointer	*/
89 	int		fz_nent;	/* Number of entries	*/
90 
91 	int		fz_divisor;	/* Hash divisor		*/
92 	u32		fz_hashmask;	/* (fz_divisor - 1)	*/
93 #define FZ_HASHMASK(fz)	((fz)->fz_hashmask)
94 
95 	int		fz_order;	/* Zone order		*/
96 	u32		fz_mask;
97 #define FZ_MASK(fz)	((fz)->fz_mask)
98 };
99 
100 /* NOTE. On fast computers evaluation of fz_hashmask and fz_mask
101    can be cheaper than memory lookup, so that FZ_* macros are used.
102  */
103 
104 struct fn_hash
105 {
106 	struct fn_zone	*fn_zones[33];
107 	struct fn_zone	*fn_zone_list;
108 };
109 
fn_hash(fn_key_t key,struct fn_zone * fz)110 static __inline__ fn_hash_idx_t fn_hash(fn_key_t key, struct fn_zone *fz)
111 {
112 	u32 h = ntohl(key.datum)>>(32 - fz->fz_order);
113 	h ^= (h>>20);
114 	h ^= (h>>10);
115 	h ^= (h>>5);
116 	h &= FZ_HASHMASK(fz);
117 	return *(fn_hash_idx_t*)&h;
118 }
119 
120 #define fz_key_0(key)		((key).datum = 0)
121 #define fz_prefix(key,fz)	((key).datum)
122 
fz_key(u32 dst,struct fn_zone * fz)123 static __inline__ fn_key_t fz_key(u32 dst, struct fn_zone *fz)
124 {
125 	fn_key_t k;
126 	k.datum = dst & FZ_MASK(fz);
127 	return k;
128 }
129 
fz_chain_p(fn_key_t key,struct fn_zone * fz)130 static __inline__ struct fib_node ** fz_chain_p(fn_key_t key, struct fn_zone *fz)
131 {
132 	return &fz->fz_hash[fn_hash(key, fz).datum];
133 }
134 
fz_chain(fn_key_t key,struct fn_zone * fz)135 static __inline__ struct fib_node * fz_chain(fn_key_t key, struct fn_zone *fz)
136 {
137 	return fz->fz_hash[fn_hash(key, fz).datum];
138 }
139 
fn_key_eq(fn_key_t a,fn_key_t b)140 extern __inline__ int fn_key_eq(fn_key_t a, fn_key_t b)
141 {
142 	return a.datum == b.datum;
143 }
144 
fn_key_leq(fn_key_t a,fn_key_t b)145 extern __inline__ int fn_key_leq(fn_key_t a, fn_key_t b)
146 {
147 	return a.datum <= b.datum;
148 }
149 
150 static rwlock_t fib_hash_lock = RW_LOCK_UNLOCKED;
151 
152 #define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct fib_node *))
153 
fz_hash_alloc(int divisor)154 static struct fib_node **fz_hash_alloc(int divisor)
155 {
156 	unsigned long size = divisor * sizeof(struct fib_node *);
157 
158 	if (divisor <= 1024) {
159 		return kmalloc(size, GFP_KERNEL);
160 	} else {
161 		return (struct fib_node **)
162 			__get_free_pages(GFP_KERNEL, get_order(size));
163 	}
164 }
165 
166 /* The fib hash lock must be held when this is called. */
fn_rebuild_zone(struct fn_zone * fz,struct fib_node ** old_ht,int old_divisor)167 static __inline__ void fn_rebuild_zone(struct fn_zone *fz,
168 				       struct fib_node **old_ht,
169 				       int old_divisor)
170 {
171 	int i;
172 	struct fib_node *f, **fp, *next;
173 
174 	for (i=0; i<old_divisor; i++) {
175 		for (f=old_ht[i]; f; f=next) {
176 			next = f->fn_next;
177 			for (fp = fz_chain_p(f->fn_key, fz);
178 			     *fp && fn_key_leq((*fp)->fn_key, f->fn_key);
179 			     fp = &(*fp)->fn_next)
180 				/* NONE */;
181 			f->fn_next = *fp;
182 			*fp = f;
183 		}
184 	}
185 }
186 
fz_hash_free(struct fib_node ** hash,int divisor)187 static void fz_hash_free(struct fib_node **hash, int divisor)
188 {
189 	if (divisor <= 1024)
190 		kfree(hash);
191 	else
192 		free_pages((unsigned long) hash,
193 			   get_order(divisor * sizeof(struct fib_node *)));
194 }
195 
fn_rehash_zone(struct fn_zone * fz)196 static void fn_rehash_zone(struct fn_zone *fz)
197 {
198 	struct fib_node **ht, **old_ht;
199 	int old_divisor, new_divisor;
200 	u32 new_hashmask;
201 
202 	old_divisor = fz->fz_divisor;
203 
204 	switch (old_divisor) {
205 	case 16:
206 		new_divisor = 256;
207 		break;
208 	case 256:
209 		new_divisor = 1024;
210 		break;
211 	default:
212 		if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
213 			printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
214 			return;
215 		}
216 		new_divisor = (old_divisor << 1);
217 		break;
218 	}
219 
220 	new_hashmask = (new_divisor - 1);
221 
222 #if RT_CACHE_DEBUG >= 2
223 	printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor);
224 #endif
225 
226 	ht = fz_hash_alloc(new_divisor);
227 
228 	if (ht)	{
229 		memset(ht, 0, new_divisor*sizeof(struct fib_node*));
230 
231 		write_lock_bh(&fib_hash_lock);
232 		old_ht = fz->fz_hash;
233 		fz->fz_hash = ht;
234 		fz->fz_hashmask = new_hashmask;
235 		fz->fz_divisor = new_divisor;
236 		fn_rebuild_zone(fz, old_ht, old_divisor);
237 		write_unlock_bh(&fib_hash_lock);
238 
239 		fz_hash_free(old_ht, old_divisor);
240 	}
241 }
242 
fn_free_node(struct fib_node * f)243 static void fn_free_node(struct fib_node * f)
244 {
245 	fib_release_info(FIB_INFO(f));
246 	kmem_cache_free(fn_hash_kmem, f);
247 }
248 
249 
250 static struct fn_zone *
fn_new_zone(struct fn_hash * table,int z)251 fn_new_zone(struct fn_hash *table, int z)
252 {
253 	int i;
254 	struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL);
255 	if (!fz)
256 		return NULL;
257 
258 	memset(fz, 0, sizeof(struct fn_zone));
259 	if (z) {
260 		fz->fz_divisor = 16;
261 	} else {
262 		fz->fz_divisor = 1;
263 	}
264 	fz->fz_hashmask = (fz->fz_divisor - 1);
265 	fz->fz_hash = fz_hash_alloc(fz->fz_divisor);
266 	if (!fz->fz_hash) {
267 		kfree(fz);
268 		return NULL;
269 	}
270 	memset(fz->fz_hash, 0, fz->fz_divisor*sizeof(struct fib_node*));
271 	fz->fz_order = z;
272 	fz->fz_mask = inet_make_mask(z);
273 
274 	/* Find the first not empty zone with more specific mask */
275 	for (i=z+1; i<=32; i++)
276 		if (table->fn_zones[i])
277 			break;
278 	write_lock_bh(&fib_hash_lock);
279 	if (i>32) {
280 		/* No more specific masks, we are the first. */
281 		fz->fz_next = table->fn_zone_list;
282 		table->fn_zone_list = fz;
283 	} else {
284 		fz->fz_next = table->fn_zones[i]->fz_next;
285 		table->fn_zones[i]->fz_next = fz;
286 	}
287 	table->fn_zones[z] = fz;
288 	write_unlock_bh(&fib_hash_lock);
289 	return fz;
290 }
291 
292 static int
fn_hash_lookup(struct fib_table * tb,const struct rt_key * key,struct fib_result * res)293 fn_hash_lookup(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
294 {
295 	int err;
296 	struct fn_zone *fz;
297 	struct fn_hash *t = (struct fn_hash*)tb->tb_data;
298 
299 	read_lock(&fib_hash_lock);
300 	for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
301 		struct fib_node *f;
302 		fn_key_t k = fz_key(key->dst, fz);
303 
304 		for (f = fz_chain(k, fz); f; f = f->fn_next) {
305 			if (!fn_key_eq(k, f->fn_key)) {
306 				if (fn_key_leq(k, f->fn_key))
307 					break;
308 				else
309 					continue;
310 			}
311 #ifdef CONFIG_IP_ROUTE_TOS
312 			if (f->fn_tos && f->fn_tos != key->tos)
313 				continue;
314 #endif
315 			f->fn_state |= FN_S_ACCESSED;
316 
317 			if (f->fn_state&FN_S_ZOMBIE)
318 				continue;
319 			if (f->fn_scope < key->scope)
320 				continue;
321 
322 			err = fib_semantic_match(f->fn_type, FIB_INFO(f), key, res);
323 			if (err == 0) {
324 				res->type = f->fn_type;
325 				res->scope = f->fn_scope;
326 				res->prefixlen = fz->fz_order;
327 				goto out;
328 			}
329 			if (err < 0)
330 				goto out;
331 		}
332 	}
333 	err = 1;
334 out:
335 	read_unlock(&fib_hash_lock);
336 	return err;
337 }
338 
339 static int fn_hash_last_dflt=-1;
340 
fib_detect_death(struct fib_info * fi,int order,struct fib_info ** last_resort,int * last_idx)341 static int fib_detect_death(struct fib_info *fi, int order,
342 			    struct fib_info **last_resort, int *last_idx)
343 {
344 	struct neighbour *n;
345 	int state = NUD_NONE;
346 
347 	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
348 	if (n) {
349 		state = n->nud_state;
350 		neigh_release(n);
351 	}
352 	if (state==NUD_REACHABLE)
353 		return 0;
354 	if ((state&NUD_VALID) && order != fn_hash_last_dflt)
355 		return 0;
356 	if ((state&NUD_VALID) ||
357 	    (*last_idx<0 && order > fn_hash_last_dflt)) {
358 		*last_resort = fi;
359 		*last_idx = order;
360 	}
361 	return 1;
362 }
363 
364 static void
fn_hash_select_default(struct fib_table * tb,const struct rt_key * key,struct fib_result * res)365 fn_hash_select_default(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
366 {
367 	int order, last_idx;
368 	struct fib_node *f;
369 	struct fib_info *fi = NULL;
370 	struct fib_info *last_resort;
371 	struct fn_hash *t = (struct fn_hash*)tb->tb_data;
372 	struct fn_zone *fz = t->fn_zones[0];
373 
374 	if (fz == NULL)
375 		return;
376 
377 	last_idx = -1;
378 	last_resort = NULL;
379 	order = -1;
380 
381 	read_lock(&fib_hash_lock);
382 	for (f = fz->fz_hash[0]; f; f = f->fn_next) {
383 		struct fib_info *next_fi = FIB_INFO(f);
384 
385 		if ((f->fn_state&FN_S_ZOMBIE) ||
386 		    f->fn_scope != res->scope ||
387 		    f->fn_type != RTN_UNICAST)
388 			continue;
389 
390 		if (next_fi->fib_priority > res->fi->fib_priority)
391 			break;
392 		if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
393 			continue;
394 		f->fn_state |= FN_S_ACCESSED;
395 
396 		if (fi == NULL) {
397 			if (next_fi != res->fi)
398 				break;
399 		} else if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
400 			if (res->fi)
401 				fib_info_put(res->fi);
402 			res->fi = fi;
403 			atomic_inc(&fi->fib_clntref);
404 			fn_hash_last_dflt = order;
405 			goto out;
406 		}
407 		fi = next_fi;
408 		order++;
409 	}
410 
411 	if (order<=0 || fi==NULL) {
412 		fn_hash_last_dflt = -1;
413 		goto out;
414 	}
415 
416 	if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
417 		if (res->fi)
418 			fib_info_put(res->fi);
419 		res->fi = fi;
420 		atomic_inc(&fi->fib_clntref);
421 		fn_hash_last_dflt = order;
422 		goto out;
423 	}
424 
425 	if (last_idx >= 0) {
426 		if (res->fi)
427 			fib_info_put(res->fi);
428 		res->fi = last_resort;
429 		if (last_resort)
430 			atomic_inc(&last_resort->fib_clntref);
431 	}
432 	fn_hash_last_dflt = last_idx;
433 out:
434 	read_unlock(&fib_hash_lock);
435 }
436 
437 #define FIB_SCAN(f, fp) \
438 for ( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
439 
440 #define FIB_SCAN_KEY(f, fp, key) \
441 for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
442 
443 #ifndef CONFIG_IP_ROUTE_TOS
444 #define FIB_SCAN_TOS(f, fp, key, tos) FIB_SCAN_KEY(f, fp, key)
445 #else
446 #define FIB_SCAN_TOS(f, fp, key, tos) \
447 for ( ; ((f) = *(fp)) != NULL && fn_key_eq((f)->fn_key, (key)) && \
448      (f)->fn_tos == (tos) ; (fp) = &(f)->fn_next)
449 #endif
450 
451 
452 static void rtmsg_fib(int, struct fib_node*, int, int,
453 		      struct nlmsghdr *n,
454 		      struct netlink_skb_parms *);
455 
456 static int
fn_hash_insert(struct fib_table * tb,struct rtmsg * r,struct kern_rta * rta,struct nlmsghdr * n,struct netlink_skb_parms * req)457 fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
458 		struct nlmsghdr *n, struct netlink_skb_parms *req)
459 {
460 	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
461 	struct fib_node *new_f, *f, **fp, **del_fp;
462 	struct fn_zone *fz;
463 	struct fib_info *fi;
464 
465 	int z = r->rtm_dst_len;
466 	int type = r->rtm_type;
467 #ifdef CONFIG_IP_ROUTE_TOS
468 	u8 tos = r->rtm_tos;
469 #endif
470 	fn_key_t key;
471 	int err;
472 
473 FTprint("tb(%d)_insert: %d %08x/%d %d %08x\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
474 *(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1,
475 rta->rta_prefsrc ? *(u32*)rta->rta_prefsrc : 0);
476 	if (z > 32)
477 		return -EINVAL;
478 	fz = table->fn_zones[z];
479 	if (!fz && !(fz = fn_new_zone(table, z)))
480 		return -ENOBUFS;
481 
482 	fz_key_0(key);
483 	if (rta->rta_dst) {
484 		u32 dst;
485 		memcpy(&dst, rta->rta_dst, 4);
486 		if (dst & ~FZ_MASK(fz))
487 			return -EINVAL;
488 		key = fz_key(dst, fz);
489 	}
490 
491 	if  ((fi = fib_create_info(r, rta, n, &err)) == NULL)
492 		return err;
493 
494 	if (fz->fz_nent > (fz->fz_divisor<<1) &&
495 	    fz->fz_divisor < FZ_MAX_DIVISOR &&
496 	    (z==32 || (1<<z) > fz->fz_divisor))
497 		fn_rehash_zone(fz);
498 
499 	fp = fz_chain_p(key, fz);
500 
501 
502 	/*
503 	 * Scan list to find the first route with the same destination
504 	 */
505 	FIB_SCAN(f, fp) {
506 		if (fn_key_leq(key,f->fn_key))
507 			break;
508 	}
509 
510 #ifdef CONFIG_IP_ROUTE_TOS
511 	/*
512 	 * Find route with the same destination and tos.
513 	 */
514 	FIB_SCAN_KEY(f, fp, key) {
515 		if (f->fn_tos <= tos)
516 			break;
517 	}
518 #endif
519 
520 	del_fp = NULL;
521 
522 	if (f && (f->fn_state&FN_S_ZOMBIE) &&
523 #ifdef CONFIG_IP_ROUTE_TOS
524 	    f->fn_tos == tos &&
525 #endif
526 	    fn_key_eq(f->fn_key, key)) {
527 		del_fp = fp;
528 		fp = &f->fn_next;
529 		f = *fp;
530 		goto create;
531 	}
532 
533 	FIB_SCAN_TOS(f, fp, key, tos) {
534 		if (fi->fib_priority <= FIB_INFO(f)->fib_priority)
535 			break;
536 	}
537 
538 	/* Now f==*fp points to the first node with the same
539 	   keys [prefix,tos,priority], if such key already
540 	   exists or to the node, before which we will insert new one.
541 	 */
542 
543 	if (f &&
544 #ifdef CONFIG_IP_ROUTE_TOS
545 	    f->fn_tos == tos &&
546 #endif
547 	    fn_key_eq(f->fn_key, key) &&
548 	    fi->fib_priority == FIB_INFO(f)->fib_priority) {
549 		struct fib_node **ins_fp;
550 
551 		err = -EEXIST;
552 		if (n->nlmsg_flags&NLM_F_EXCL)
553 			goto out;
554 
555 		if (n->nlmsg_flags&NLM_F_REPLACE) {
556 			del_fp = fp;
557 			fp = &f->fn_next;
558 			f = *fp;
559 			goto replace;
560 		}
561 
562 		ins_fp = fp;
563 		err = -EEXIST;
564 
565 		FIB_SCAN_TOS(f, fp, key, tos) {
566 			if (fi->fib_priority != FIB_INFO(f)->fib_priority)
567 				break;
568 			if (f->fn_type == type && f->fn_scope == r->rtm_scope
569 			    && FIB_INFO(f) == fi)
570 				goto out;
571 		}
572 
573 		if (!(n->nlmsg_flags&NLM_F_APPEND)) {
574 			fp = ins_fp;
575 			f = *fp;
576 		}
577 	}
578 
579 create:
580 	err = -ENOENT;
581 	if (!(n->nlmsg_flags&NLM_F_CREATE))
582 		goto out;
583 
584 replace:
585 	err = -ENOBUFS;
586 	new_f = kmem_cache_alloc(fn_hash_kmem, SLAB_KERNEL);
587 	if (new_f == NULL)
588 		goto out;
589 
590 	memset(new_f, 0, sizeof(struct fib_node));
591 
592 	new_f->fn_key = key;
593 #ifdef CONFIG_IP_ROUTE_TOS
594 	new_f->fn_tos = tos;
595 #endif
596 	new_f->fn_type = type;
597 	new_f->fn_scope = r->rtm_scope;
598 	FIB_INFO(new_f) = fi;
599 
600 	/*
601 	 * Insert new entry to the list.
602 	 */
603 
604 	new_f->fn_next = f;
605 	write_lock_bh(&fib_hash_lock);
606 	*fp = new_f;
607 	write_unlock_bh(&fib_hash_lock);
608 	fz->fz_nent++;
609 
610 	if (del_fp) {
611 		f = *del_fp;
612 		/* Unlink replaced node */
613 		write_lock_bh(&fib_hash_lock);
614 		*del_fp = f->fn_next;
615 		write_unlock_bh(&fib_hash_lock);
616 
617 		if (!(f->fn_state&FN_S_ZOMBIE))
618 			rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
619 		if (f->fn_state&FN_S_ACCESSED)
620 			rt_cache_flush(-1);
621 		fn_free_node(f);
622 		fz->fz_nent--;
623 	} else {
624 		rt_cache_flush(-1);
625 	}
626 	rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->tb_id, n, req);
627 	return 0;
628 
629 out:
630 	fib_release_info(fi);
631 	return err;
632 }
633 
634 
635 static int
fn_hash_delete(struct fib_table * tb,struct rtmsg * r,struct kern_rta * rta,struct nlmsghdr * n,struct netlink_skb_parms * req)636 fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
637 		struct nlmsghdr *n, struct netlink_skb_parms *req)
638 {
639 	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
640 	struct fib_node **fp, **del_fp, *f;
641 	int z = r->rtm_dst_len;
642 	struct fn_zone *fz;
643 	fn_key_t key;
644 	int matched;
645 #ifdef CONFIG_IP_ROUTE_TOS
646 	u8 tos = r->rtm_tos;
647 #endif
648 
649 FTprint("tb(%d)_delete: %d %08x/%d %d\n", tb->tb_id, r->rtm_type, rta->rta_dst ?
650        *(u32*)rta->rta_dst : 0, z, rta->rta_oif ? *rta->rta_oif : -1);
651 	if (z > 32)
652 		return -EINVAL;
653 	if ((fz  = table->fn_zones[z]) == NULL)
654 		return -ESRCH;
655 
656 	fz_key_0(key);
657 	if (rta->rta_dst) {
658 		u32 dst;
659 		memcpy(&dst, rta->rta_dst, 4);
660 		if (dst & ~FZ_MASK(fz))
661 			return -EINVAL;
662 		key = fz_key(dst, fz);
663 	}
664 
665 	fp = fz_chain_p(key, fz);
666 
667 
668 	FIB_SCAN(f, fp) {
669 		if (fn_key_eq(f->fn_key, key))
670 			break;
671 		if (fn_key_leq(key, f->fn_key)) {
672 			return -ESRCH;
673 		}
674 	}
675 #ifdef CONFIG_IP_ROUTE_TOS
676 	FIB_SCAN_KEY(f, fp, key) {
677 		if (f->fn_tos == tos)
678 			break;
679 	}
680 #endif
681 
682 	matched = 0;
683 	del_fp = NULL;
684 	FIB_SCAN_TOS(f, fp, key, tos) {
685 		struct fib_info * fi = FIB_INFO(f);
686 
687 		if (f->fn_state&FN_S_ZOMBIE) {
688 			return -ESRCH;
689 		}
690 		matched++;
691 
692 		if (del_fp == NULL &&
693 		    (!r->rtm_type || f->fn_type == r->rtm_type) &&
694 		    (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
695 		    (!r->rtm_protocol || fi->fib_protocol == r->rtm_protocol) &&
696 		    fib_nh_match(r, n, rta, fi) == 0)
697 			del_fp = fp;
698 	}
699 
700 	if (del_fp) {
701 		f = *del_fp;
702 		rtmsg_fib(RTM_DELROUTE, f, z, tb->tb_id, n, req);
703 
704 		if (matched != 1) {
705 			write_lock_bh(&fib_hash_lock);
706 			*del_fp = f->fn_next;
707 			write_unlock_bh(&fib_hash_lock);
708 
709 			if (f->fn_state&FN_S_ACCESSED)
710 				rt_cache_flush(-1);
711 			fn_free_node(f);
712 			fz->fz_nent--;
713 		} else {
714 			f->fn_state |= FN_S_ZOMBIE;
715 			if (f->fn_state&FN_S_ACCESSED) {
716 				f->fn_state &= ~FN_S_ACCESSED;
717 				rt_cache_flush(-1);
718 			}
719 			if (++fib_hash_zombies > 128)
720 				fib_flush();
721 		}
722 
723 		return 0;
724 	}
725 	return -ESRCH;
726 }
727 
728 extern __inline__ int
fn_flush_list(struct fib_node ** fp,int z,struct fn_hash * table)729 fn_flush_list(struct fib_node ** fp, int z, struct fn_hash *table)
730 {
731 	int found = 0;
732 	struct fib_node *f;
733 
734 	while ((f = *fp) != NULL) {
735 		struct fib_info *fi = FIB_INFO(f);
736 
737 		if (fi && ((f->fn_state&FN_S_ZOMBIE) || (fi->fib_flags&RTNH_F_DEAD))) {
738 			write_lock_bh(&fib_hash_lock);
739 			*fp = f->fn_next;
740 			write_unlock_bh(&fib_hash_lock);
741 
742 			fn_free_node(f);
743 			found++;
744 			continue;
745 		}
746 		fp = &f->fn_next;
747 	}
748 	return found;
749 }
750 
fn_hash_flush(struct fib_table * tb)751 static int fn_hash_flush(struct fib_table *tb)
752 {
753 	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
754 	struct fn_zone *fz;
755 	int found = 0;
756 
757 	fib_hash_zombies = 0;
758 	for (fz = table->fn_zone_list; fz; fz = fz->fz_next) {
759 		int i;
760 		int tmp = 0;
761 		for (i=fz->fz_divisor-1; i>=0; i--)
762 			tmp += fn_flush_list(&fz->fz_hash[i], fz->fz_order, table);
763 		fz->fz_nent -= tmp;
764 		found += tmp;
765 	}
766 	return found;
767 }
768 
769 
770 #ifdef CONFIG_PROC_FS
771 
fn_hash_get_info(struct fib_table * tb,char * buffer,int first,int count)772 static int fn_hash_get_info(struct fib_table *tb, char *buffer, int first, int count)
773 {
774 	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
775 	struct fn_zone *fz;
776 	int pos = 0;
777 	int n = 0;
778 
779 	read_lock(&fib_hash_lock);
780 	for (fz=table->fn_zone_list; fz; fz = fz->fz_next) {
781 		int i;
782 		struct fib_node *f;
783 		int maxslot = fz->fz_divisor;
784 		struct fib_node **fp = fz->fz_hash;
785 
786 		if (fz->fz_nent == 0)
787 			continue;
788 
789 		if (pos + fz->fz_nent <= first) {
790 			pos += fz->fz_nent;
791 			continue;
792 		}
793 
794 		for (i=0; i < maxslot; i++, fp++) {
795 			for (f = *fp; f; f = f->fn_next) {
796 				if (++pos <= first)
797 					continue;
798 				fib_node_get_info(f->fn_type,
799 						  f->fn_state&FN_S_ZOMBIE,
800 						  FIB_INFO(f),
801 						  fz_prefix(f->fn_key, fz),
802 						  FZ_MASK(fz), buffer);
803 				buffer += 128;
804 				if (++n >= count)
805 					goto out;
806 			}
807 		}
808 	}
809 out:
810 	read_unlock(&fib_hash_lock);
811   	return n;
812 }
813 #endif
814 
815 
816 static __inline__ int
fn_hash_dump_bucket(struct sk_buff * skb,struct netlink_callback * cb,struct fib_table * tb,struct fn_zone * fz,struct fib_node * f)817 fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
818 		     struct fib_table *tb,
819 		     struct fn_zone *fz,
820 		     struct fib_node *f)
821 {
822 	int i, s_i;
823 
824 	s_i = cb->args[3];
825 	for (i=0; f; i++, f=f->fn_next) {
826 		if (i < s_i) continue;
827 		if (f->fn_state&FN_S_ZOMBIE) continue;
828 		if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
829 				  RTM_NEWROUTE,
830 				  tb->tb_id, (f->fn_state&FN_S_ZOMBIE) ? 0 : f->fn_type, f->fn_scope,
831 				  &f->fn_key, fz->fz_order, f->fn_tos,
832 				  f->fn_info) < 0) {
833 			cb->args[3] = i;
834 			return -1;
835 		}
836 	}
837 	cb->args[3] = i;
838 	return skb->len;
839 }
840 
841 static __inline__ int
fn_hash_dump_zone(struct sk_buff * skb,struct netlink_callback * cb,struct fib_table * tb,struct fn_zone * fz)842 fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
843 		   struct fib_table *tb,
844 		   struct fn_zone *fz)
845 {
846 	int h, s_h;
847 
848 	s_h = cb->args[2];
849 	for (h=0; h < fz->fz_divisor; h++) {
850 		if (h < s_h) continue;
851 		if (h > s_h)
852 			memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
853 		if (fz->fz_hash == NULL || fz->fz_hash[h] == NULL)
854 			continue;
855 		if (fn_hash_dump_bucket(skb, cb, tb, fz, fz->fz_hash[h]) < 0) {
856 			cb->args[2] = h;
857 			return -1;
858 		}
859 	}
860 	cb->args[2] = h;
861 	return skb->len;
862 }
863 
fn_hash_dump(struct fib_table * tb,struct sk_buff * skb,struct netlink_callback * cb)864 static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
865 {
866 	int m, s_m;
867 	struct fn_zone *fz;
868 	struct fn_hash *table = (struct fn_hash*)tb->tb_data;
869 
870 	s_m = cb->args[1];
871 	read_lock(&fib_hash_lock);
872 	for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
873 		if (m < s_m) continue;
874 		if (m > s_m)
875 			memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0]));
876 		if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
877 			cb->args[1] = m;
878 			read_unlock(&fib_hash_lock);
879 			return -1;
880 		}
881 	}
882 	read_unlock(&fib_hash_lock);
883 	cb->args[1] = m;
884 	return skb->len;
885 }
886 
rtmsg_fib(int event,struct fib_node * f,int z,int tb_id,struct nlmsghdr * n,struct netlink_skb_parms * req)887 static void rtmsg_fib(int event, struct fib_node* f, int z, int tb_id,
888 		      struct nlmsghdr *n, struct netlink_skb_parms *req)
889 {
890 	struct sk_buff *skb;
891 	u32 pid = req ? req->pid : 0;
892 	int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
893 
894 	skb = alloc_skb(size, GFP_KERNEL);
895 	if (!skb)
896 		return;
897 
898 	if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
899 			  f->fn_type, f->fn_scope, &f->fn_key, z, f->fn_tos,
900 			  FIB_INFO(f)) < 0) {
901 		kfree_skb(skb);
902 		return;
903 	}
904 	NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
905 	if (n->nlmsg_flags&NLM_F_ECHO)
906 		atomic_inc(&skb->users);
907 	netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
908 	if (n->nlmsg_flags&NLM_F_ECHO)
909 		netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
910 }
911 
912 #ifdef CONFIG_IP_MULTIPLE_TABLES
fib_hash_init(int id)913 struct fib_table * fib_hash_init(int id)
914 #else
915 struct fib_table * __init fib_hash_init(int id)
916 #endif
917 {
918 	struct fib_table *tb;
919 
920 	if (fn_hash_kmem == NULL)
921 		fn_hash_kmem = kmem_cache_create("ip_fib_hash",
922 						 sizeof(struct fib_node),
923 						 0, SLAB_HWCACHE_ALIGN,
924 						 NULL, NULL);
925 
926 	tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash), GFP_KERNEL);
927 	if (tb == NULL)
928 		return NULL;
929 
930 	tb->tb_id = id;
931 	tb->tb_lookup = fn_hash_lookup;
932 	tb->tb_insert = fn_hash_insert;
933 	tb->tb_delete = fn_hash_delete;
934 	tb->tb_flush = fn_hash_flush;
935 	tb->tb_select_default = fn_hash_select_default;
936 	tb->tb_dump = fn_hash_dump;
937 #ifdef CONFIG_PROC_FS
938 	tb->tb_get_info = fn_hash_get_info;
939 #endif
940 	memset(tb->tb_data, 0, sizeof(struct fn_hash));
941 	return tb;
942 }
943