1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: FIB frontend.
7 *
8 * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/icmp.h>
45 #include <net/arp.h>
46 #include <net/ip_fib.h>
47
48 #define FFprint(a...) printk(KERN_DEBUG a)
49
50 #ifndef CONFIG_IP_MULTIPLE_TABLES
51
52 #define RT_TABLE_MIN RT_TABLE_MAIN
53
54 struct fib_table *local_table;
55 struct fib_table *main_table;
56
57 #else
58
59 #define RT_TABLE_MIN 1
60
61 struct fib_table *fib_tables[RT_TABLE_MAX+1];
62
__fib_new_table(int id)63 struct fib_table *__fib_new_table(int id)
64 {
65 struct fib_table *tb;
66
67 tb = fib_hash_init(id);
68 if (!tb)
69 return NULL;
70 fib_tables[id] = tb;
71 return tb;
72 }
73
74
75 #endif /* CONFIG_IP_MULTIPLE_TABLES */
76
77
fib_flush(void)78 void fib_flush(void)
79 {
80 int flushed = 0;
81 #ifdef CONFIG_IP_MULTIPLE_TABLES
82 struct fib_table *tb;
83 int id;
84
85 for (id = RT_TABLE_MAX; id>0; id--) {
86 if ((tb = fib_get_table(id))==NULL)
87 continue;
88 flushed += tb->tb_flush(tb);
89 }
90 #else /* CONFIG_IP_MULTIPLE_TABLES */
91 flushed += main_table->tb_flush(main_table);
92 flushed += local_table->tb_flush(local_table);
93 #endif /* CONFIG_IP_MULTIPLE_TABLES */
94
95 if (flushed)
96 rt_cache_flush(-1);
97 }
98
99
100 #ifdef CONFIG_PROC_FS
101
102 /*
103 * Called from the PROCfs module. This outputs /proc/net/route.
104 *
105 * It always works in backward compatibility mode.
106 * The format of the file is not supposed to be changed.
107 */
108
109 static int
fib_get_procinfo(char * buffer,char ** start,off_t offset,int length)110 fib_get_procinfo(char *buffer, char **start, off_t offset, int length)
111 {
112 int first = offset/128;
113 char *ptr = buffer;
114 int count = (length+127)/128;
115 int len;
116
117 *start = buffer + offset%128;
118
119 if (--first < 0) {
120 sprintf(buffer, "%-127s\n", "Iface\tDestination\tGateway \tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU\tWindow\tIRTT");
121 --count;
122 ptr += 128;
123 first = 0;
124 }
125
126 if (main_table && count > 0) {
127 int n = main_table->tb_get_info(main_table, ptr, first, count);
128 count -= n;
129 ptr += n*128;
130 }
131 len = ptr - *start;
132 if (len >= length)
133 return length;
134 if (len >= 0)
135 return len;
136 return 0;
137 }
138
139 #endif /* CONFIG_PROC_FS */
140
141 /*
142 * Find the first device with a given source address.
143 */
144
ip_dev_find(u32 addr)145 struct net_device * ip_dev_find(u32 addr)
146 {
147 struct rt_key key;
148 struct fib_result res;
149 struct net_device *dev = NULL;
150
151 memset(&key, 0, sizeof(key));
152 key.dst = addr;
153 #ifdef CONFIG_IP_MULTIPLE_TABLES
154 res.r = NULL;
155 #endif
156
157 if (!local_table || local_table->tb_lookup(local_table, &key, &res)) {
158 return NULL;
159 }
160 if (res.type != RTN_LOCAL)
161 goto out;
162 dev = FIB_RES_DEV(res);
163
164 if (dev)
165 dev_hold(dev);
166 out:
167 fib_res_put(&res);
168 return dev;
169 }
170
inet_addr_type(u32 addr)171 unsigned inet_addr_type(u32 addr)
172 {
173 struct rt_key key;
174 struct fib_result res;
175 unsigned ret = RTN_BROADCAST;
176
177 if (ZERONET(addr) || BADCLASS(addr))
178 return RTN_BROADCAST;
179 if (MULTICAST(addr))
180 return RTN_MULTICAST;
181
182 memset(&key, 0, sizeof(key));
183 key.dst = addr;
184 #ifdef CONFIG_IP_MULTIPLE_TABLES
185 res.r = NULL;
186 #endif
187
188 if (local_table) {
189 ret = RTN_UNICAST;
190 if (local_table->tb_lookup(local_table, &key, &res) == 0) {
191 ret = res.type;
192 fib_res_put(&res);
193 }
194 }
195 return ret;
196 }
197
198 /* Given (packet source, input interface) and optional (dst, oif, tos):
199 - (main) check, that source is valid i.e. not broadcast or our local
200 address.
201 - figure out what "logical" interface this packet arrived
202 and calculate "specific destination" address.
203 - check, that packet arrived from expected physical interface.
204 */
205
fib_validate_source(u32 src,u32 dst,u8 tos,int oif,struct net_device * dev,u32 * spec_dst,u32 * itag)206 int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
207 struct net_device *dev, u32 *spec_dst, u32 *itag)
208 {
209 struct in_device *in_dev;
210 struct rt_key key;
211 struct fib_result res;
212 int no_addr, rpf;
213 int ret;
214
215 key.dst = src;
216 key.src = dst;
217 key.tos = tos;
218 key.oif = 0;
219 key.iif = oif;
220 key.scope = RT_SCOPE_UNIVERSE;
221
222 no_addr = rpf = 0;
223 read_lock(&inetdev_lock);
224 in_dev = __in_dev_get(dev);
225 if (in_dev) {
226 no_addr = in_dev->ifa_list == NULL;
227 rpf = IN_DEV_RPFILTER(in_dev);
228 }
229 read_unlock(&inetdev_lock);
230
231 if (in_dev == NULL)
232 goto e_inval;
233
234 if (fib_lookup(&key, &res))
235 goto last_resort;
236 if (res.type != RTN_UNICAST)
237 goto e_inval_res;
238 *spec_dst = FIB_RES_PREFSRC(res);
239 fib_combine_itag(itag, &res);
240 #ifdef CONFIG_IP_ROUTE_MULTIPATH
241 if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
242 #else
243 if (FIB_RES_DEV(res) == dev)
244 #endif
245 {
246 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
247 fib_res_put(&res);
248 return ret;
249 }
250 fib_res_put(&res);
251 if (no_addr)
252 goto last_resort;
253 if (rpf)
254 goto e_inval;
255 key.oif = dev->ifindex;
256
257 ret = 0;
258 if (fib_lookup(&key, &res) == 0) {
259 if (res.type == RTN_UNICAST) {
260 *spec_dst = FIB_RES_PREFSRC(res);
261 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
262 }
263 fib_res_put(&res);
264 }
265 return ret;
266
267 last_resort:
268 if (rpf)
269 goto e_inval;
270 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
271 *itag = 0;
272 return 0;
273
274 e_inval_res:
275 fib_res_put(&res);
276 e_inval:
277 return -EINVAL;
278 }
279
280 #ifndef CONFIG_IP_NOSIOCRT
281
282 /*
283 * Handle IP routing ioctl calls. These are used to manipulate the routing tables
284 */
285
ip_rt_ioctl(unsigned int cmd,void * arg)286 int ip_rt_ioctl(unsigned int cmd, void *arg)
287 {
288 int err;
289 struct kern_rta rta;
290 struct rtentry r;
291 struct {
292 struct nlmsghdr nlh;
293 struct rtmsg rtm;
294 } req;
295
296 switch (cmd) {
297 case SIOCADDRT: /* Add a route */
298 case SIOCDELRT: /* Delete a route */
299 if (!capable(CAP_NET_ADMIN))
300 return -EPERM;
301 if (copy_from_user(&r, arg, sizeof(struct rtentry)))
302 return -EFAULT;
303 rtnl_lock();
304 err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r);
305 if (err == 0) {
306 if (cmd == SIOCDELRT) {
307 struct fib_table *tb = fib_get_table(req.rtm.rtm_table);
308 err = -ESRCH;
309 if (tb)
310 err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
311 } else {
312 struct fib_table *tb = fib_new_table(req.rtm.rtm_table);
313 err = -ENOBUFS;
314 if (tb)
315 err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
316 }
317 if (rta.rta_mx)
318 kfree(rta.rta_mx);
319 }
320 rtnl_unlock();
321 return err;
322 }
323 return -EINVAL;
324 }
325
326 #else
327
ip_rt_ioctl(unsigned int cmd,void * arg)328 int ip_rt_ioctl(unsigned int cmd, void *arg)
329 {
330 return -EINVAL;
331 }
332
333 #endif
334
inet_check_attr(struct rtmsg * r,struct rtattr ** rta)335 static int inet_check_attr(struct rtmsg *r, struct rtattr **rta)
336 {
337 int i;
338
339 for (i=1; i<=RTA_MAX; i++) {
340 struct rtattr *attr = rta[i-1];
341 if (attr) {
342 if (RTA_PAYLOAD(attr) < 4)
343 return -EINVAL;
344 if (i != RTA_MULTIPATH && i != RTA_METRICS)
345 rta[i-1] = (struct rtattr*)RTA_DATA(attr);
346 }
347 }
348 return 0;
349 }
350
inet_rtm_delroute(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)351 int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
352 {
353 struct fib_table * tb;
354 struct rtattr **rta = arg;
355 struct rtmsg *r = NLMSG_DATA(nlh);
356
357 if (inet_check_attr(r, rta))
358 return -EINVAL;
359
360 tb = fib_get_table(r->rtm_table);
361 if (tb)
362 return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
363 return -ESRCH;
364 }
365
inet_rtm_newroute(struct sk_buff * skb,struct nlmsghdr * nlh,void * arg)366 int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
367 {
368 struct fib_table * tb;
369 struct rtattr **rta = arg;
370 struct rtmsg *r = NLMSG_DATA(nlh);
371
372 if (inet_check_attr(r, rta))
373 return -EINVAL;
374
375 tb = fib_new_table(r->rtm_table);
376 if (tb)
377 return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb));
378 return -ENOBUFS;
379 }
380
inet_dump_fib(struct sk_buff * skb,struct netlink_callback * cb)381 int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
382 {
383 int t;
384 int s_t;
385 struct fib_table *tb;
386
387 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
388 ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
389 return ip_rt_dump(skb, cb);
390
391 s_t = cb->args[0];
392 if (s_t == 0)
393 s_t = cb->args[0] = RT_TABLE_MIN;
394
395 for (t=s_t; t<=RT_TABLE_MAX; t++) {
396 if (t < s_t) continue;
397 if (t > s_t)
398 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
399 if ((tb = fib_get_table(t))==NULL)
400 continue;
401 if (tb->tb_dump(tb, skb, cb) < 0)
402 break;
403 }
404
405 cb->args[0] = t;
406
407 return skb->len;
408 }
409
410 /* Prepare and feed intra-kernel routing request.
411 Really, it should be netlink message, but :-( netlink
412 can be not configured, so that we feed it directly
413 to fib engine. It is legal, because all events occur
414 only when netlink is already locked.
415 */
416
fib_magic(int cmd,int type,u32 dst,int dst_len,struct in_ifaddr * ifa)417 static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa)
418 {
419 struct fib_table * tb;
420 struct {
421 struct nlmsghdr nlh;
422 struct rtmsg rtm;
423 } req;
424 struct kern_rta rta;
425
426 memset(&req.rtm, 0, sizeof(req.rtm));
427 memset(&rta, 0, sizeof(rta));
428
429 if (type == RTN_UNICAST)
430 tb = fib_new_table(RT_TABLE_MAIN);
431 else
432 tb = fib_new_table(RT_TABLE_LOCAL);
433
434 if (tb == NULL)
435 return;
436
437 req.nlh.nlmsg_len = sizeof(req);
438 req.nlh.nlmsg_type = cmd;
439 req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
440 req.nlh.nlmsg_pid = 0;
441 req.nlh.nlmsg_seq = 0;
442
443 req.rtm.rtm_dst_len = dst_len;
444 req.rtm.rtm_table = tb->tb_id;
445 req.rtm.rtm_protocol = RTPROT_KERNEL;
446 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
447 req.rtm.rtm_type = type;
448
449 rta.rta_dst = &dst;
450 rta.rta_prefsrc = &ifa->ifa_local;
451 rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
452
453 if (cmd == RTM_NEWROUTE)
454 tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL);
455 else
456 tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL);
457 }
458
fib_add_ifaddr(struct in_ifaddr * ifa)459 static void fib_add_ifaddr(struct in_ifaddr *ifa)
460 {
461 struct in_device *in_dev = ifa->ifa_dev;
462 struct net_device *dev = in_dev->dev;
463 struct in_ifaddr *prim = ifa;
464 u32 mask = ifa->ifa_mask;
465 u32 addr = ifa->ifa_local;
466 u32 prefix = ifa->ifa_address&mask;
467
468 if (ifa->ifa_flags&IFA_F_SECONDARY) {
469 prim = inet_ifa_byprefix(in_dev, prefix, mask);
470 if (prim == NULL) {
471 printk(KERN_DEBUG "fib_add_ifaddr: bug: prim == NULL\n");
472 return;
473 }
474 }
475
476 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
477
478 if (!(dev->flags&IFF_UP))
479 return;
480
481 /* Add broadcast address, if it is explicitly assigned. */
482 if (ifa->ifa_broadcast && ifa->ifa_broadcast != 0xFFFFFFFF)
483 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
484
485 if (!ZERONET(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) &&
486 (prefix != addr || ifa->ifa_prefixlen < 32)) {
487 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
488 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim);
489
490 /* Add network specific broadcasts, when it takes a sense */
491 if (ifa->ifa_prefixlen < 31) {
492 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
493 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim);
494 }
495 }
496 }
497
fib_del_ifaddr(struct in_ifaddr * ifa)498 static void fib_del_ifaddr(struct in_ifaddr *ifa)
499 {
500 struct in_device *in_dev = ifa->ifa_dev;
501 struct net_device *dev = in_dev->dev;
502 struct in_ifaddr *ifa1;
503 struct in_ifaddr *prim = ifa;
504 u32 brd = ifa->ifa_address|~ifa->ifa_mask;
505 u32 any = ifa->ifa_address&ifa->ifa_mask;
506 #define LOCAL_OK 1
507 #define BRD_OK 2
508 #define BRD0_OK 4
509 #define BRD1_OK 8
510 unsigned ok = 0;
511
512 if (!(ifa->ifa_flags&IFA_F_SECONDARY))
513 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL :
514 RTN_UNICAST, any, ifa->ifa_prefixlen, prim);
515 else {
516 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
517 if (prim == NULL) {
518 printk(KERN_DEBUG "fib_del_ifaddr: bug: prim == NULL\n");
519 return;
520 }
521 }
522
523 /* Deletion is more complicated than add.
524 We should take care of not to delete too much :-)
525
526 Scan address list to be sure that addresses are really gone.
527 */
528
529 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
530 if (ifa->ifa_local == ifa1->ifa_local)
531 ok |= LOCAL_OK;
532 if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
533 ok |= BRD_OK;
534 if (brd == ifa1->ifa_broadcast)
535 ok |= BRD1_OK;
536 if (any == ifa1->ifa_broadcast)
537 ok |= BRD0_OK;
538 }
539
540 if (!(ok&BRD_OK))
541 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
542 if (!(ok&BRD1_OK))
543 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
544 if (!(ok&BRD0_OK))
545 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
546 if (!(ok&LOCAL_OK)) {
547 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
548
549 /* Check, that this local address finally disappeared. */
550 if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
551 /* And the last, but not the least thing.
552 We must flush stray FIB entries.
553
554 First of all, we scan fib_info list searching
555 for stray nexthop entries, then ignite fib_flush.
556 */
557 if (fib_sync_down(ifa->ifa_local, NULL, 0))
558 fib_flush();
559 }
560 }
561 #undef LOCAL_OK
562 #undef BRD_OK
563 #undef BRD0_OK
564 #undef BRD1_OK
565 }
566
fib_disable_ip(struct net_device * dev,int force)567 static void fib_disable_ip(struct net_device *dev, int force)
568 {
569 if (fib_sync_down(0, dev, force))
570 fib_flush();
571 rt_cache_flush(0);
572 arp_ifdown(dev);
573 }
574
fib_inetaddr_event(struct notifier_block * this,unsigned long event,void * ptr)575 static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
576 {
577 struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
578
579 switch (event) {
580 case NETDEV_UP:
581 fib_add_ifaddr(ifa);
582 #ifdef CONFIG_IP_ROUTE_MULTIPATH
583 fib_sync_up(ifa->ifa_dev->dev);
584 #endif
585 rt_cache_flush(-1);
586 break;
587 case NETDEV_DOWN:
588 fib_del_ifaddr(ifa);
589 if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
590 /* Last address was deleted from this interface.
591 Disable IP.
592 */
593 fib_disable_ip(ifa->ifa_dev->dev, 1);
594 } else {
595 rt_cache_flush(-1);
596 }
597 break;
598 }
599 return NOTIFY_DONE;
600 }
601
fib_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)602 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
603 {
604 struct net_device *dev = ptr;
605 struct in_device *in_dev = __in_dev_get(dev);
606
607 if (event == NETDEV_UNREGISTER) {
608 fib_disable_ip(dev, 2);
609 return NOTIFY_DONE;
610 }
611
612 if (!in_dev)
613 return NOTIFY_DONE;
614
615 switch (event) {
616 case NETDEV_UP:
617 for_ifa(in_dev) {
618 fib_add_ifaddr(ifa);
619 } endfor_ifa(in_dev);
620 #ifdef CONFIG_IP_ROUTE_MULTIPATH
621 fib_sync_up(dev);
622 #endif
623 rt_cache_flush(-1);
624 break;
625 case NETDEV_DOWN:
626 fib_disable_ip(dev, 0);
627 break;
628 case NETDEV_CHANGEMTU:
629 case NETDEV_CHANGE:
630 rt_cache_flush(0);
631 break;
632 }
633 return NOTIFY_DONE;
634 }
635
636 struct notifier_block fib_inetaddr_notifier = {
637 notifier_call: fib_inetaddr_event,
638 };
639
640 struct notifier_block fib_netdev_notifier = {
641 notifier_call: fib_netdev_event,
642 };
643
ip_fib_init(void)644 void __init ip_fib_init(void)
645 {
646 #ifdef CONFIG_PROC_FS
647 proc_net_create("route",0,fib_get_procinfo);
648 #endif /* CONFIG_PROC_FS */
649
650 #ifndef CONFIG_IP_MULTIPLE_TABLES
651 local_table = fib_hash_init(RT_TABLE_LOCAL);
652 main_table = fib_hash_init(RT_TABLE_MAIN);
653 #else
654 fib_rules_init();
655 #endif
656
657 register_netdevice_notifier(&fib_netdev_notifier);
658 register_inetaddr_notifier(&fib_inetaddr_notifier);
659 }
660
661