1 /*
2 * IP multicast routing support for mrouted 3.6/3.8
3 *
4 * (c) 1995 Alan Cox, <alan@redhat.com>
5 * Linux Consultancy and Custom Driver Development
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
13 *
14 * Fixes:
15 * Michael Chastain : Incorrect size of copying.
16 * Alan Cox : Added the cache manager code
17 * Alan Cox : Fixed the clone/copy bug and device race.
18 * Mike McLagan : Routing by source
19 * Malcolm Beattie : Buffer handling fixes.
20 * Alexey Kuznetsov : Double buffer free and other fixes.
21 * SVR Anand : Fixed several multicast bugs and problems.
22 * Alexey Kuznetsov : Status, optimisations and more.
23 * Brad Parker : Better behaviour on mrouted upcall
24 * overflow.
25 * Carlos Picoto : PIMv1 Support
26 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
27 * Relax this requrement to work with older peers.
28 *
29 */
30
31 #include <linux/config.h>
32 #include <asm/system.h>
33 #include <asm/uaccess.h>
34 #include <linux/types.h>
35 #include <linux/sched.h>
36 #include <linux/errno.h>
37 #include <linux/timer.h>
38 #include <linux/mm.h>
39 #include <linux/kernel.h>
40 #include <linux/fcntl.h>
41 #include <linux/stat.h>
42 #include <linux/socket.h>
43 #include <linux/in.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/inetdevice.h>
47 #include <linux/igmp.h>
48 #include <linux/proc_fs.h>
49 #include <linux/mroute.h>
50 #include <linux/init.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/sock.h>
55 #include <net/icmp.h>
56 #include <net/udp.h>
57 #include <net/raw.h>
58 #include <linux/notifier.h>
59 #include <linux/if_arp.h>
60 #include <linux/netfilter_ipv4.h>
61 #include <net/ipip.h>
62 #include <net/checksum.h>
63
64 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
65 #define CONFIG_IP_PIMSM 1
66 #endif
67
68 static struct sock *mroute_socket;
69
70
71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
72 Note that the changes are semaphored via rtnl_lock.
73 */
74
75 static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
76
77 /*
78 * Multicast router control variables
79 */
80
81 static struct vif_device vif_table[MAXVIFS]; /* Devices */
82 static int maxvif;
83
84 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
85
86 int mroute_do_assert; /* Set in PIM assert */
87 int mroute_do_pim;
88
89 static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
90
91 static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
92 atomic_t cache_resolve_queue_len; /* Size of unresolved */
93
94 /* Special spinlock for queue of unresolved entries */
95 static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
96
97 /* We return to original Alan's scheme. Hash table of resolved
98 entries is changed only in process context and protected
99 with weak lock mrt_lock. Queue of unresolved entries is protected
100 with strong spinlock mfc_unres_lock.
101
102 In this case data path is free of exclusive locks at all.
103 */
104
105 kmem_cache_t *mrt_cachep;
106
107 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
108 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
109 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
110
111 extern struct inet_protocol pim_protocol;
112
113 static struct timer_list ipmr_expire_timer;
114
115 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
116
117 static
ipmr_new_tunnel(struct vifctl * v)118 struct net_device *ipmr_new_tunnel(struct vifctl *v)
119 {
120 struct net_device *dev;
121
122 dev = __dev_get_by_name("tunl0");
123
124 if (dev) {
125 int err;
126 struct ifreq ifr;
127 mm_segment_t oldfs;
128 struct ip_tunnel_parm p;
129 struct in_device *in_dev;
130
131 memset(&p, 0, sizeof(p));
132 p.iph.daddr = v->vifc_rmt_addr.s_addr;
133 p.iph.saddr = v->vifc_lcl_addr.s_addr;
134 p.iph.version = 4;
135 p.iph.ihl = 5;
136 p.iph.protocol = IPPROTO_IPIP;
137 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138 ifr.ifr_ifru.ifru_data = (void*)&p;
139
140 oldfs = get_fs(); set_fs(KERNEL_DS);
141 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
142 set_fs(oldfs);
143
144 dev = NULL;
145
146 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
147 dev->flags |= IFF_MULTICAST;
148
149 in_dev = __in_dev_get(dev);
150 if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
151 goto failure;
152 in_dev->cnf.rp_filter = 0;
153
154 if (dev_open(dev))
155 goto failure;
156 }
157 }
158 return dev;
159
160 failure:
161 unregister_netdevice(dev);
162 return NULL;
163 }
164
165 #ifdef CONFIG_IP_PIMSM
166
167 static int reg_vif_num = -1;
168
reg_vif_xmit(struct sk_buff * skb,struct net_device * dev)169 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
170 {
171 read_lock(&mrt_lock);
172 ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
173 ((struct net_device_stats*)dev->priv)->tx_packets++;
174 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
175 read_unlock(&mrt_lock);
176 kfree_skb(skb);
177 return 0;
178 }
179
reg_vif_get_stats(struct net_device * dev)180 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
181 {
182 return (struct net_device_stats*)dev->priv;
183 }
184
185 static
ipmr_reg_vif(struct vifctl * v)186 struct net_device *ipmr_reg_vif(struct vifctl *v)
187 {
188 struct net_device *dev;
189 struct in_device *in_dev;
190 int size;
191
192 size = sizeof(*dev) + sizeof(struct net_device_stats);
193 dev = kmalloc(size, GFP_KERNEL);
194 if (!dev)
195 return NULL;
196
197 memset(dev, 0, size);
198
199 dev->priv = dev + 1;
200
201 strcpy(dev->name, "pimreg");
202
203 dev->type = ARPHRD_PIMREG;
204 dev->mtu = 1500 - sizeof(struct iphdr) - 8;
205 dev->flags = IFF_NOARP;
206 dev->hard_start_xmit = reg_vif_xmit;
207 dev->get_stats = reg_vif_get_stats;
208 dev->features |= NETIF_F_DYNALLOC;
209
210 if (register_netdevice(dev)) {
211 kfree(dev);
212 return NULL;
213 }
214 dev->iflink = 0;
215
216 if ((in_dev = inetdev_init(dev)) == NULL)
217 goto failure;
218
219 in_dev->cnf.rp_filter = 0;
220
221 if (dev_open(dev))
222 goto failure;
223
224 return dev;
225
226 failure:
227 unregister_netdevice(dev);
228 return NULL;
229 }
230 #endif
231
232 /*
233 * Delete a VIF entry
234 */
235
vif_delete(int vifi)236 static int vif_delete(int vifi)
237 {
238 struct vif_device *v;
239 struct net_device *dev;
240 struct in_device *in_dev;
241
242 if (vifi < 0 || vifi >= maxvif)
243 return -EADDRNOTAVAIL;
244
245 v = &vif_table[vifi];
246
247 write_lock_bh(&mrt_lock);
248 dev = v->dev;
249 v->dev = NULL;
250
251 if (!dev) {
252 write_unlock_bh(&mrt_lock);
253 return -EADDRNOTAVAIL;
254 }
255
256 #ifdef CONFIG_IP_PIMSM
257 if (vifi == reg_vif_num)
258 reg_vif_num = -1;
259 #endif
260
261 if (vifi+1 == maxvif) {
262 int tmp;
263 for (tmp=vifi-1; tmp>=0; tmp--) {
264 if (VIF_EXISTS(tmp))
265 break;
266 }
267 maxvif = tmp+1;
268 }
269
270 write_unlock_bh(&mrt_lock);
271
272 dev_set_allmulti(dev, -1);
273
274 if ((in_dev = __in_dev_get(dev)) != NULL) {
275 in_dev->cnf.mc_forwarding--;
276 ip_rt_multicast_event(in_dev);
277 }
278
279 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
280 unregister_netdevice(dev);
281
282 dev_put(dev);
283 return 0;
284 }
285
286 /* Destroy an unresolved cache entry, killing queued skbs
287 and reporting error to netlink readers.
288 */
289
ipmr_destroy_unres(struct mfc_cache * c)290 static void ipmr_destroy_unres(struct mfc_cache *c)
291 {
292 struct sk_buff *skb;
293 struct nlmsgerr *e;
294
295 atomic_dec(&cache_resolve_queue_len);
296
297 while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
298 if (skb->nh.iph->version == 0) {
299 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
300 nlh->nlmsg_type = NLMSG_ERROR;
301 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
302 skb_trim(skb, nlh->nlmsg_len);
303 e = NLMSG_DATA(nlh);
304 e->error = -ETIMEDOUT;
305 memset(&e->msg, 0, sizeof(e->msg));
306 netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
307 } else
308 kfree_skb(skb);
309 }
310
311 kmem_cache_free(mrt_cachep, c);
312 }
313
314
315 /* Single timer process for all the unresolved queue. */
316
ipmr_expire_process(unsigned long dummy)317 void ipmr_expire_process(unsigned long dummy)
318 {
319 unsigned long now;
320 unsigned long expires;
321 struct mfc_cache *c, **cp;
322
323 if (!spin_trylock(&mfc_unres_lock)) {
324 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
325 return;
326 }
327
328 if (atomic_read(&cache_resolve_queue_len) == 0)
329 goto out;
330
331 now = jiffies;
332 expires = 10*HZ;
333 cp = &mfc_unres_queue;
334
335 while ((c=*cp) != NULL) {
336 long interval = c->mfc_un.unres.expires - now;
337
338 if (interval > 0) {
339 if (interval < expires)
340 expires = interval;
341 cp = &c->next;
342 continue;
343 }
344
345 *cp = c->next;
346
347 ipmr_destroy_unres(c);
348 }
349
350 if (atomic_read(&cache_resolve_queue_len))
351 mod_timer(&ipmr_expire_timer, jiffies + expires);
352
353 out:
354 spin_unlock(&mfc_unres_lock);
355 }
356
357 /* Fill oifs list. It is called under write locked mrt_lock. */
358
ipmr_update_threshoulds(struct mfc_cache * cache,unsigned char * ttls)359 static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
360 {
361 int vifi;
362
363 cache->mfc_un.res.minvif = MAXVIFS;
364 cache->mfc_un.res.maxvif = 0;
365 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
366
367 for (vifi=0; vifi<maxvif; vifi++) {
368 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
369 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
370 if (cache->mfc_un.res.minvif > vifi)
371 cache->mfc_un.res.minvif = vifi;
372 if (cache->mfc_un.res.maxvif <= vifi)
373 cache->mfc_un.res.maxvif = vifi + 1;
374 }
375 }
376 }
377
vif_add(struct vifctl * vifc,int mrtsock)378 static int vif_add(struct vifctl *vifc, int mrtsock)
379 {
380 int vifi = vifc->vifc_vifi;
381 struct vif_device *v = &vif_table[vifi];
382 struct net_device *dev;
383 struct in_device *in_dev;
384
385 /* Is vif busy ? */
386 if (VIF_EXISTS(vifi))
387 return -EADDRINUSE;
388
389 switch (vifc->vifc_flags) {
390 #ifdef CONFIG_IP_PIMSM
391 case VIFF_REGISTER:
392 /*
393 * Special Purpose VIF in PIM
394 * All the packets will be sent to the daemon
395 */
396 if (reg_vif_num >= 0)
397 return -EADDRINUSE;
398 dev = ipmr_reg_vif(vifc);
399 if (!dev)
400 return -ENOBUFS;
401 break;
402 #endif
403 case VIFF_TUNNEL:
404 dev = ipmr_new_tunnel(vifc);
405 if (!dev)
406 return -ENOBUFS;
407 break;
408 case 0:
409 dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
410 if (!dev)
411 return -EADDRNOTAVAIL;
412 __dev_put(dev);
413 break;
414 default:
415 return -EINVAL;
416 }
417
418 if ((in_dev = __in_dev_get(dev)) == NULL)
419 return -EADDRNOTAVAIL;
420 in_dev->cnf.mc_forwarding++;
421 dev_set_allmulti(dev, +1);
422 ip_rt_multicast_event(in_dev);
423
424 /*
425 * Fill in the VIF structures
426 */
427 v->rate_limit=vifc->vifc_rate_limit;
428 v->local=vifc->vifc_lcl_addr.s_addr;
429 v->remote=vifc->vifc_rmt_addr.s_addr;
430 v->flags=vifc->vifc_flags;
431 if (!mrtsock)
432 v->flags |= VIFF_STATIC;
433 v->threshold=vifc->vifc_threshold;
434 v->bytes_in = 0;
435 v->bytes_out = 0;
436 v->pkt_in = 0;
437 v->pkt_out = 0;
438 v->link = dev->ifindex;
439 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
440 v->link = dev->iflink;
441
442 /* And finish update writing critical data */
443 write_lock_bh(&mrt_lock);
444 dev_hold(dev);
445 v->dev=dev;
446 #ifdef CONFIG_IP_PIMSM
447 if (v->flags&VIFF_REGISTER)
448 reg_vif_num = vifi;
449 #endif
450 if (vifi+1 > maxvif)
451 maxvif = vifi+1;
452 write_unlock_bh(&mrt_lock);
453 return 0;
454 }
455
ipmr_cache_find(__u32 origin,__u32 mcastgrp)456 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
457 {
458 int line=MFC_HASH(mcastgrp,origin);
459 struct mfc_cache *c;
460
461 for (c=mfc_cache_array[line]; c; c = c->next) {
462 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
463 break;
464 }
465 return c;
466 }
467
468 /*
469 * Allocate a multicast cache entry
470 */
ipmr_cache_alloc(void)471 static struct mfc_cache *ipmr_cache_alloc(void)
472 {
473 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
474 if(c==NULL)
475 return NULL;
476 memset(c, 0, sizeof(*c));
477 c->mfc_un.res.minvif = MAXVIFS;
478 return c;
479 }
480
ipmr_cache_alloc_unres(void)481 static struct mfc_cache *ipmr_cache_alloc_unres(void)
482 {
483 struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
484 if(c==NULL)
485 return NULL;
486 memset(c, 0, sizeof(*c));
487 skb_queue_head_init(&c->mfc_un.unres.unresolved);
488 c->mfc_un.unres.expires = jiffies + 10*HZ;
489 return c;
490 }
491
492 /*
493 * A cache entry has gone into a resolved state from queued
494 */
495
ipmr_cache_resolve(struct mfc_cache * uc,struct mfc_cache * c)496 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
497 {
498 struct sk_buff *skb;
499 struct nlmsgerr *e;
500
501 /*
502 * Play the pending entries through our router
503 */
504
505 while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
506 if (skb->nh.iph->version == 0) {
507 int err;
508 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
509
510 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
511 nlh->nlmsg_len = skb->tail - (u8*)nlh;
512 } else {
513 nlh->nlmsg_type = NLMSG_ERROR;
514 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
515 skb_trim(skb, nlh->nlmsg_len);
516 e = NLMSG_DATA(nlh);
517 e->error = -EMSGSIZE;
518 memset(&e->msg, 0, sizeof(e->msg));
519 }
520 err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
521 } else
522 ip_mr_forward(skb, c, 0);
523 }
524 }
525
526 /*
527 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
528 * expects the following bizarre scheme.
529 *
530 * Called under mrt_lock.
531 */
532
ipmr_cache_report(struct sk_buff * pkt,vifi_t vifi,int assert)533 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
534 {
535 struct sk_buff *skb;
536 int ihl = pkt->nh.iph->ihl<<2;
537 struct igmphdr *igmp;
538 struct igmpmsg *msg;
539 int ret;
540
541 #ifdef CONFIG_IP_PIMSM
542 if (assert == IGMPMSG_WHOLEPKT)
543 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
544 else
545 #endif
546 skb = alloc_skb(128, GFP_ATOMIC);
547
548 if(!skb)
549 return -ENOBUFS;
550
551 #ifdef CONFIG_IP_PIMSM
552 if (assert == IGMPMSG_WHOLEPKT) {
553 /* Ugly, but we have no choice with this interface.
554 Duplicate old header, fix ihl, length etc.
555 And all this only to mangle msg->im_msgtype and
556 to set msg->im_mbz to "mbz" :-)
557 */
558 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
559 skb->nh.raw = skb->h.raw = (u8*)msg;
560 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
561 msg->im_msgtype = IGMPMSG_WHOLEPKT;
562 msg->im_mbz = 0;
563 msg->im_vif = reg_vif_num;
564 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
565 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
566 } else
567 #endif
568 {
569
570 /*
571 * Copy the IP header
572 */
573
574 skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
575 memcpy(skb->data,pkt->data,ihl);
576 skb->nh.iph->protocol = 0; /* Flag to the kernel this is a route add */
577 msg = (struct igmpmsg*)skb->nh.iph;
578 msg->im_vif = vifi;
579 skb->dst = dst_clone(pkt->dst);
580
581 /*
582 * Add our header
583 */
584
585 igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
586 igmp->type =
587 msg->im_msgtype = assert;
588 igmp->code = 0;
589 skb->nh.iph->tot_len=htons(skb->len); /* Fix the length */
590 skb->h.raw = skb->nh.raw;
591 }
592
593 if (mroute_socket == NULL) {
594 kfree_skb(skb);
595 return -EINVAL;
596 }
597
598 /*
599 * Deliver to mrouted
600 */
601 if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
602 if (net_ratelimit())
603 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
604 kfree_skb(skb);
605 }
606
607 return ret;
608 }
609
610 /*
611 * Queue a packet for resolution. It gets locked cache entry!
612 */
613
614 static int
ipmr_cache_unresolved(vifi_t vifi,struct sk_buff * skb)615 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
616 {
617 int err;
618 struct mfc_cache *c;
619
620 spin_lock_bh(&mfc_unres_lock);
621 for (c=mfc_unres_queue; c; c=c->next) {
622 if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
623 c->mfc_origin == skb->nh.iph->saddr)
624 break;
625 }
626
627 if (c == NULL) {
628 /*
629 * Create a new entry if allowable
630 */
631
632 if (atomic_read(&cache_resolve_queue_len)>=10 ||
633 (c=ipmr_cache_alloc_unres())==NULL) {
634 spin_unlock_bh(&mfc_unres_lock);
635
636 kfree_skb(skb);
637 return -ENOBUFS;
638 }
639
640 /*
641 * Fill in the new cache entry
642 */
643 c->mfc_parent=-1;
644 c->mfc_origin=skb->nh.iph->saddr;
645 c->mfc_mcastgrp=skb->nh.iph->daddr;
646
647 /*
648 * Reflect first query at mrouted.
649 */
650 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
651 /* If the report failed throw the cache entry
652 out - Brad Parker
653 */
654 spin_unlock_bh(&mfc_unres_lock);
655
656 kmem_cache_free(mrt_cachep, c);
657 kfree_skb(skb);
658 return err;
659 }
660
661 atomic_inc(&cache_resolve_queue_len);
662 c->next = mfc_unres_queue;
663 mfc_unres_queue = c;
664
665 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
666 }
667
668 /*
669 * See if we can append the packet
670 */
671 if (c->mfc_un.unres.unresolved.qlen>3) {
672 kfree_skb(skb);
673 err = -ENOBUFS;
674 } else {
675 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
676 err = 0;
677 }
678
679 spin_unlock_bh(&mfc_unres_lock);
680 return err;
681 }
682
683 /*
684 * MFC cache manipulation by user space mroute daemon
685 */
686
ipmr_mfc_delete(struct mfcctl * mfc)687 int ipmr_mfc_delete(struct mfcctl *mfc)
688 {
689 int line;
690 struct mfc_cache *c, **cp;
691
692 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
693
694 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
695 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
696 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
697 write_lock_bh(&mrt_lock);
698 *cp = c->next;
699 write_unlock_bh(&mrt_lock);
700
701 kmem_cache_free(mrt_cachep, c);
702 return 0;
703 }
704 }
705 return -ENOENT;
706 }
707
ipmr_mfc_add(struct mfcctl * mfc,int mrtsock)708 int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
709 {
710 int line;
711 struct mfc_cache *uc, *c, **cp;
712
713 line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
714
715 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
716 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
717 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
718 break;
719 }
720
721 if (c != NULL) {
722 write_lock_bh(&mrt_lock);
723 c->mfc_parent = mfc->mfcc_parent;
724 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
725 if (!mrtsock)
726 c->mfc_flags |= MFC_STATIC;
727 write_unlock_bh(&mrt_lock);
728 return 0;
729 }
730
731 if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
732 return -EINVAL;
733
734 c=ipmr_cache_alloc();
735 if (c==NULL)
736 return -ENOMEM;
737
738 c->mfc_origin=mfc->mfcc_origin.s_addr;
739 c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
740 c->mfc_parent=mfc->mfcc_parent;
741 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
742 if (!mrtsock)
743 c->mfc_flags |= MFC_STATIC;
744
745 write_lock_bh(&mrt_lock);
746 c->next = mfc_cache_array[line];
747 mfc_cache_array[line] = c;
748 write_unlock_bh(&mrt_lock);
749
750 /*
751 * Check to see if we resolved a queued list. If so we
752 * need to send on the frames and tidy up.
753 */
754 spin_lock_bh(&mfc_unres_lock);
755 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
756 cp = &uc->next) {
757 if (uc->mfc_origin == c->mfc_origin &&
758 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
759 *cp = uc->next;
760 if (atomic_dec_and_test(&cache_resolve_queue_len))
761 del_timer(&ipmr_expire_timer);
762 break;
763 }
764 }
765 spin_unlock_bh(&mfc_unres_lock);
766
767 if (uc) {
768 ipmr_cache_resolve(uc, c);
769 kmem_cache_free(mrt_cachep, uc);
770 }
771 return 0;
772 }
773
774 /*
775 * Close the multicast socket, and clear the vif tables etc
776 */
777
mroute_clean_tables(struct sock * sk)778 static void mroute_clean_tables(struct sock *sk)
779 {
780 int i;
781
782 /*
783 * Shut down all active vif entries
784 */
785 for(i=0; i<maxvif; i++) {
786 if (!(vif_table[i].flags&VIFF_STATIC))
787 vif_delete(i);
788 }
789
790 /*
791 * Wipe the cache
792 */
793 for (i=0;i<MFC_LINES;i++) {
794 struct mfc_cache *c, **cp;
795
796 cp = &mfc_cache_array[i];
797 while ((c = *cp) != NULL) {
798 if (c->mfc_flags&MFC_STATIC) {
799 cp = &c->next;
800 continue;
801 }
802 write_lock_bh(&mrt_lock);
803 *cp = c->next;
804 write_unlock_bh(&mrt_lock);
805
806 kmem_cache_free(mrt_cachep, c);
807 }
808 }
809
810 if (atomic_read(&cache_resolve_queue_len) != 0) {
811 struct mfc_cache *c;
812
813 spin_lock_bh(&mfc_unres_lock);
814 while (mfc_unres_queue != NULL) {
815 c = mfc_unres_queue;
816 mfc_unres_queue = c->next;
817 spin_unlock_bh(&mfc_unres_lock);
818
819 ipmr_destroy_unres(c);
820
821 spin_lock_bh(&mfc_unres_lock);
822 }
823 spin_unlock_bh(&mfc_unres_lock);
824 }
825 }
826
mrtsock_destruct(struct sock * sk)827 static void mrtsock_destruct(struct sock *sk)
828 {
829 rtnl_lock();
830 if (sk == mroute_socket) {
831 ipv4_devconf.mc_forwarding--;
832
833 write_lock_bh(&mrt_lock);
834 mroute_socket=NULL;
835 write_unlock_bh(&mrt_lock);
836
837 mroute_clean_tables(sk);
838 }
839 rtnl_unlock();
840 }
841
842 /*
843 * Socket options and virtual interface manipulation. The whole
844 * virtual interface system is a complete heap, but unfortunately
845 * that's how BSD mrouted happens to think. Maybe one day with a proper
846 * MOSPF/PIM router set up we can clean this up.
847 */
848
ip_mroute_setsockopt(struct sock * sk,int optname,char * optval,int optlen)849 int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
850 {
851 int ret;
852 struct vifctl vif;
853 struct mfcctl mfc;
854
855 if(optname!=MRT_INIT)
856 {
857 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
858 return -EACCES;
859 }
860
861 switch(optname)
862 {
863 case MRT_INIT:
864 if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP)
865 return -EOPNOTSUPP;
866 if(optlen!=sizeof(int))
867 return -ENOPROTOOPT;
868
869 rtnl_lock();
870 if (mroute_socket) {
871 rtnl_unlock();
872 return -EADDRINUSE;
873 }
874
875 ret = ip_ra_control(sk, 1, mrtsock_destruct);
876 if (ret == 0) {
877 write_lock_bh(&mrt_lock);
878 mroute_socket=sk;
879 write_unlock_bh(&mrt_lock);
880
881 ipv4_devconf.mc_forwarding++;
882 }
883 rtnl_unlock();
884 return ret;
885 case MRT_DONE:
886 if (sk!=mroute_socket)
887 return -EACCES;
888 return ip_ra_control(sk, 0, NULL);
889 case MRT_ADD_VIF:
890 case MRT_DEL_VIF:
891 if(optlen!=sizeof(vif))
892 return -EINVAL;
893 if (copy_from_user(&vif,optval,sizeof(vif)))
894 return -EFAULT;
895 if(vif.vifc_vifi >= MAXVIFS)
896 return -ENFILE;
897 rtnl_lock();
898 if (optname==MRT_ADD_VIF) {
899 ret = vif_add(&vif, sk==mroute_socket);
900 } else {
901 ret = vif_delete(vif.vifc_vifi);
902 }
903 rtnl_unlock();
904 return ret;
905
906 /*
907 * Manipulate the forwarding caches. These live
908 * in a sort of kernel/user symbiosis.
909 */
910 case MRT_ADD_MFC:
911 case MRT_DEL_MFC:
912 if(optlen!=sizeof(mfc))
913 return -EINVAL;
914 if (copy_from_user(&mfc,optval, sizeof(mfc)))
915 return -EFAULT;
916 rtnl_lock();
917 if (optname==MRT_DEL_MFC)
918 ret = ipmr_mfc_delete(&mfc);
919 else
920 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
921 rtnl_unlock();
922 return ret;
923 /*
924 * Control PIM assert.
925 */
926 case MRT_ASSERT:
927 {
928 int v;
929 if(get_user(v,(int *)optval))
930 return -EFAULT;
931 mroute_do_assert=(v)?1:0;
932 return 0;
933 }
934 #ifdef CONFIG_IP_PIMSM
935 case MRT_PIM:
936 {
937 int v;
938 if(get_user(v,(int *)optval))
939 return -EFAULT;
940 v = (v)?1:0;
941 rtnl_lock();
942 if (v != mroute_do_pim) {
943 mroute_do_pim = v;
944 mroute_do_assert = v;
945 #ifdef CONFIG_IP_PIMSM_V2
946 if (mroute_do_pim)
947 inet_add_protocol(&pim_protocol);
948 else
949 inet_del_protocol(&pim_protocol);
950 #endif
951 }
952 rtnl_unlock();
953 return 0;
954 }
955 #endif
956 /*
957 * Spurious command, or MRT_VERSION which you cannot
958 * set.
959 */
960 default:
961 return -ENOPROTOOPT;
962 }
963 }
964
965 /*
966 * Getsock opt support for the multicast routing system.
967 */
968
ip_mroute_getsockopt(struct sock * sk,int optname,char * optval,int * optlen)969 int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
970 {
971 int olr;
972 int val;
973
974 if(optname!=MRT_VERSION &&
975 #ifdef CONFIG_IP_PIMSM
976 optname!=MRT_PIM &&
977 #endif
978 optname!=MRT_ASSERT)
979 return -ENOPROTOOPT;
980
981 if (get_user(olr, optlen))
982 return -EFAULT;
983
984 olr = min_t(unsigned int, olr, sizeof(int));
985 if (olr < 0)
986 return -EINVAL;
987
988 if(put_user(olr,optlen))
989 return -EFAULT;
990 if(optname==MRT_VERSION)
991 val=0x0305;
992 #ifdef CONFIG_IP_PIMSM
993 else if(optname==MRT_PIM)
994 val=mroute_do_pim;
995 #endif
996 else
997 val=mroute_do_assert;
998 if(copy_to_user(optval,&val,olr))
999 return -EFAULT;
1000 return 0;
1001 }
1002
1003 /*
1004 * The IP multicast ioctl support routines.
1005 */
1006
ipmr_ioctl(struct sock * sk,int cmd,unsigned long arg)1007 int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
1008 {
1009 struct sioc_sg_req sr;
1010 struct sioc_vif_req vr;
1011 struct vif_device *vif;
1012 struct mfc_cache *c;
1013
1014 switch(cmd)
1015 {
1016 case SIOCGETVIFCNT:
1017 if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
1018 return -EFAULT;
1019 if(vr.vifi>=maxvif)
1020 return -EINVAL;
1021 read_lock(&mrt_lock);
1022 vif=&vif_table[vr.vifi];
1023 if(VIF_EXISTS(vr.vifi)) {
1024 vr.icount=vif->pkt_in;
1025 vr.ocount=vif->pkt_out;
1026 vr.ibytes=vif->bytes_in;
1027 vr.obytes=vif->bytes_out;
1028 read_unlock(&mrt_lock);
1029
1030 if (copy_to_user((void *)arg,&vr,sizeof(vr)))
1031 return -EFAULT;
1032 return 0;
1033 }
1034 read_unlock(&mrt_lock);
1035 return -EADDRNOTAVAIL;
1036 case SIOCGETSGCNT:
1037 if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
1038 return -EFAULT;
1039
1040 read_lock(&mrt_lock);
1041 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1042 if (c) {
1043 sr.pktcnt = c->mfc_un.res.pkt;
1044 sr.bytecnt = c->mfc_un.res.bytes;
1045 sr.wrong_if = c->mfc_un.res.wrong_if;
1046 read_unlock(&mrt_lock);
1047
1048 if (copy_to_user((void *)arg,&sr,sizeof(sr)))
1049 return -EFAULT;
1050 return 0;
1051 }
1052 read_unlock(&mrt_lock);
1053 return -EADDRNOTAVAIL;
1054 default:
1055 return -ENOIOCTLCMD;
1056 }
1057 }
1058
1059
ipmr_device_event(struct notifier_block * this,unsigned long event,void * ptr)1060 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1061 {
1062 struct vif_device *v;
1063 int ct;
1064 if (event != NETDEV_UNREGISTER)
1065 return NOTIFY_DONE;
1066 v=&vif_table[0];
1067 for(ct=0;ct<maxvif;ct++,v++) {
1068 if (v->dev==ptr)
1069 vif_delete(ct);
1070 }
1071 return NOTIFY_DONE;
1072 }
1073
1074
1075 static struct notifier_block ip_mr_notifier={
1076 ipmr_device_event,
1077 NULL,
1078 0
1079 };
1080
1081 /*
1082 * Encapsulate a packet by attaching a valid IPIP header to it.
1083 * This avoids tunnel drivers and other mess and gives us the speed so
1084 * important for multicast video.
1085 */
1086
ip_encap(struct sk_buff * skb,u32 saddr,u32 daddr)1087 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1088 {
1089 struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1090
1091 iph->version = 4;
1092 iph->tos = skb->nh.iph->tos;
1093 iph->ttl = skb->nh.iph->ttl;
1094 iph->frag_off = 0;
1095 iph->daddr = daddr;
1096 iph->saddr = saddr;
1097 iph->protocol = IPPROTO_IPIP;
1098 iph->ihl = 5;
1099 iph->tot_len = htons(skb->len);
1100 ip_select_ident(iph, skb->dst, NULL);
1101 ip_send_check(iph);
1102
1103 skb->h.ipiph = skb->nh.iph;
1104 skb->nh.iph = iph;
1105 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1106 nf_reset(skb);
1107 }
1108
ipmr_forward_finish(struct sk_buff * skb)1109 static inline int ipmr_forward_finish(struct sk_buff *skb)
1110 {
1111 struct ip_options *opt = &(IPCB(skb)->opt);
1112 struct dst_entry *dst = skb->dst;
1113
1114 if (unlikely(opt->optlen))
1115 ip_forward_options(skb);
1116
1117 if (skb->len <= dst->pmtu)
1118 return dst->output(skb);
1119 else
1120 return ip_fragment(skb, dst->output);
1121 }
1122
1123 /*
1124 * Processing handlers for ipmr_forward
1125 */
1126
ipmr_queue_xmit(struct sk_buff * skb,struct mfc_cache * c,int vifi,int last)1127 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
1128 int vifi, int last)
1129 {
1130 struct iphdr *iph = skb->nh.iph;
1131 struct vif_device *vif = &vif_table[vifi];
1132 struct net_device *dev;
1133 struct rtable *rt;
1134 int encap = 0;
1135 struct sk_buff *skb2;
1136
1137 if (vif->dev == NULL)
1138 return;
1139
1140 #ifdef CONFIG_IP_PIMSM
1141 if (vif->flags & VIFF_REGISTER) {
1142 vif->pkt_out++;
1143 vif->bytes_out+=skb->len;
1144 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1145 ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1146 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1147 return;
1148 }
1149 #endif
1150
1151 if (vif->flags&VIFF_TUNNEL) {
1152 if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
1153 return;
1154 encap = sizeof(struct iphdr);
1155 } else {
1156 if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
1157 return;
1158 }
1159
1160 dev = rt->u.dst.dev;
1161
1162 if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
1163 /* Do not fragment multicasts. Alas, IPv4 does not
1164 allow to send ICMP, so that packets will disappear
1165 to blackhole.
1166 */
1167
1168 IP_INC_STATS_BH(IpFragFails);
1169 ip_rt_put(rt);
1170 return;
1171 }
1172
1173 encap += dev->hard_header_len;
1174
1175 if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
1176 skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
1177 else if (atomic_read(&skb->users) != 1)
1178 skb2 = skb_clone(skb, GFP_ATOMIC);
1179 else {
1180 atomic_inc(&skb->users);
1181 skb2 = skb;
1182 }
1183
1184 if (skb2 == NULL) {
1185 ip_rt_put(rt);
1186 return;
1187 }
1188
1189 vif->pkt_out++;
1190 vif->bytes_out+=skb->len;
1191
1192 dst_release(skb2->dst);
1193 skb2->dst = &rt->u.dst;
1194 iph = skb2->nh.iph;
1195 ip_decrease_ttl(iph);
1196
1197 /* FIXME: forward and output firewalls used to be called here.
1198 * What do we do with netfilter? -- RR */
1199 if (vif->flags & VIFF_TUNNEL) {
1200 ip_encap(skb2, vif->local, vif->remote);
1201 /* FIXME: extra output firewall step used to be here. --RR */
1202 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1203 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
1204 }
1205
1206 IPCB(skb2)->flags |= IPSKB_FORWARDED;
1207
1208 /*
1209 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1210 * not only before forwarding, but after forwarding on all output
1211 * interfaces. It is clear, if mrouter runs a multicasting
1212 * program, it should receive packets not depending to what interface
1213 * program is joined.
1214 * If we will not make it, the program will have to join on all
1215 * interfaces. On the other hand, multihoming host (or router, but
1216 * not mrouter) cannot join to more than one interface - it will
1217 * result in receiving multiple packets.
1218 */
1219 NF_HOOK(PF_INET, NF_IP_FORWARD, skb2, skb->dev, dev,
1220 ipmr_forward_finish);
1221 }
1222
ipmr_find_vif(struct net_device * dev)1223 int ipmr_find_vif(struct net_device *dev)
1224 {
1225 int ct;
1226 for (ct=maxvif-1; ct>=0; ct--) {
1227 if (vif_table[ct].dev == dev)
1228 break;
1229 }
1230 return ct;
1231 }
1232
1233 /* "local" means that we should preserve one skb (for local delivery) */
1234
ip_mr_forward(struct sk_buff * skb,struct mfc_cache * cache,int local)1235 int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1236 {
1237 int psend = -1;
1238 int vif, ct;
1239
1240 vif = cache->mfc_parent;
1241 cache->mfc_un.res.pkt++;
1242 cache->mfc_un.res.bytes += skb->len;
1243
1244 /*
1245 * Wrong interface: drop packet and (maybe) send PIM assert.
1246 */
1247 if (vif_table[vif].dev != skb->dev) {
1248 int true_vifi;
1249
1250 if (((struct rtable*)skb->dst)->key.iif == 0) {
1251 /* It is our own packet, looped back.
1252 Very complicated situation...
1253
1254 The best workaround until routing daemons will be
1255 fixed is not to redistribute packet, if it was
1256 send through wrong interface. It means, that
1257 multicast applications WILL NOT work for
1258 (S,G), which have default multicast route pointing
1259 to wrong oif. In any case, it is not a good
1260 idea to use multicasting applications on router.
1261 */
1262 goto dont_forward;
1263 }
1264
1265 cache->mfc_un.res.wrong_if++;
1266 true_vifi = ipmr_find_vif(skb->dev);
1267
1268 if (true_vifi >= 0 && mroute_do_assert &&
1269 /* pimsm uses asserts, when switching from RPT to SPT,
1270 so that we cannot check that packet arrived on an oif.
1271 It is bad, but otherwise we would need to move pretty
1272 large chunk of pimd to kernel. Ough... --ANK
1273 */
1274 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1275 jiffies - cache->mfc_un.res.last_assert > MFC_ASSERT_THRESH) {
1276 cache->mfc_un.res.last_assert = jiffies;
1277 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1278 }
1279 goto dont_forward;
1280 }
1281
1282 vif_table[vif].pkt_in++;
1283 vif_table[vif].bytes_in+=skb->len;
1284
1285 /*
1286 * Forward the frame
1287 */
1288 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1289 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1290 if (psend != -1)
1291 ipmr_queue_xmit(skb, cache, psend, 0);
1292 psend=ct;
1293 }
1294 }
1295 if (psend != -1)
1296 ipmr_queue_xmit(skb, cache, psend, !local);
1297
1298 dont_forward:
1299 if (!local)
1300 kfree_skb(skb);
1301 return 0;
1302 }
1303
1304
1305 /*
1306 * Multicast packets for forwarding arrive here
1307 */
1308
ip_mr_input(struct sk_buff * skb)1309 int ip_mr_input(struct sk_buff *skb)
1310 {
1311 struct mfc_cache *cache;
1312 int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1313
1314 /* Packet is looped back after forward, it should not be
1315 forwarded second time, but still can be delivered locally.
1316 */
1317 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1318 goto dont_forward;
1319
1320 if (!local) {
1321 if (IPCB(skb)->opt.router_alert) {
1322 if (ip_call_ra_chain(skb))
1323 return 0;
1324 } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1325 /* IGMPv1 (and broken IGMPv2 implementations sort of
1326 Cisco IOS <= 11.2(8)) do not put router alert
1327 option to IGMP packets destined to routable
1328 groups. It is very bad, because it means
1329 that we can forward NO IGMP messages.
1330 */
1331 read_lock(&mrt_lock);
1332 if (mroute_socket) {
1333 raw_rcv(mroute_socket, skb);
1334 read_unlock(&mrt_lock);
1335 return 0;
1336 }
1337 read_unlock(&mrt_lock);
1338 }
1339 }
1340
1341 read_lock(&mrt_lock);
1342 cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1343
1344 /*
1345 * No usable cache entry
1346 */
1347 if (cache==NULL) {
1348 int vif;
1349
1350 if (local) {
1351 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1352 ip_local_deliver(skb);
1353 if (skb2 == NULL) {
1354 read_unlock(&mrt_lock);
1355 return -ENOBUFS;
1356 }
1357 skb = skb2;
1358 }
1359
1360 vif = ipmr_find_vif(skb->dev);
1361 if (vif >= 0) {
1362 int err = ipmr_cache_unresolved(vif, skb);
1363 read_unlock(&mrt_lock);
1364
1365 return err;
1366 }
1367 read_unlock(&mrt_lock);
1368 kfree_skb(skb);
1369 return -ENODEV;
1370 }
1371
1372 ip_mr_forward(skb, cache, local);
1373
1374 read_unlock(&mrt_lock);
1375
1376 if (local)
1377 return ip_local_deliver(skb);
1378
1379 return 0;
1380
1381 dont_forward:
1382 if (local)
1383 return ip_local_deliver(skb);
1384 kfree_skb(skb);
1385 return 0;
1386 }
1387
1388 #ifdef CONFIG_IP_PIMSM_V1
1389 /*
1390 * Handle IGMP messages of PIMv1
1391 */
1392
pim_rcv_v1(struct sk_buff * skb)1393 int pim_rcv_v1(struct sk_buff * skb)
1394 {
1395 struct igmphdr *pim = (struct igmphdr*)skb->h.raw;
1396 struct iphdr *encap;
1397 struct net_device *reg_dev = NULL;
1398
1399 if (skb_is_nonlinear(skb)) {
1400 if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1401 kfree_skb(skb);
1402 return -ENOMEM;
1403 }
1404 pim = (struct igmphdr*)skb->h.raw;
1405 }
1406
1407 if (!mroute_do_pim ||
1408 skb->len < sizeof(*pim) + sizeof(*encap) ||
1409 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) {
1410 kfree_skb(skb);
1411 return -EINVAL;
1412 }
1413
1414 encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1415 /*
1416 Check that:
1417 a. packet is really destinted to a multicast group
1418 b. packet is not a NULL-REGISTER
1419 c. packet is not truncated
1420 */
1421 if (!MULTICAST(encap->daddr) ||
1422 ntohs(encap->tot_len) == 0 ||
1423 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1424 kfree_skb(skb);
1425 return -EINVAL;
1426 }
1427
1428 read_lock(&mrt_lock);
1429 if (reg_vif_num >= 0)
1430 reg_dev = vif_table[reg_vif_num].dev;
1431 if (reg_dev)
1432 dev_hold(reg_dev);
1433 read_unlock(&mrt_lock);
1434
1435 if (reg_dev == NULL) {
1436 kfree_skb(skb);
1437 return -EINVAL;
1438 }
1439
1440 skb->mac.raw = skb->nh.raw;
1441 skb_pull(skb, (u8*)encap - skb->data);
1442 skb->nh.iph = (struct iphdr *)skb->data;
1443 skb->dev = reg_dev;
1444 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1445 skb->protocol = htons(ETH_P_IP);
1446 skb->ip_summed = 0;
1447 skb->pkt_type = PACKET_HOST;
1448 dst_release(skb->dst);
1449 skb->dst = NULL;
1450 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1451 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1452 nf_reset(skb);
1453 netif_rx(skb);
1454 dev_put(reg_dev);
1455 return 0;
1456 }
1457 #endif
1458
1459 #ifdef CONFIG_IP_PIMSM_V2
pim_rcv(struct sk_buff * skb)1460 int pim_rcv(struct sk_buff * skb)
1461 {
1462 struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw;
1463 struct iphdr *encap;
1464 struct net_device *reg_dev = NULL;
1465
1466 if (skb_is_nonlinear(skb)) {
1467 if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1468 kfree_skb(skb);
1469 return -ENOMEM;
1470 }
1471 pim = (struct pimreghdr*)skb->h.raw;
1472 }
1473
1474 if (skb->len < sizeof(*pim) + sizeof(*encap) ||
1475 pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1476 (pim->flags&PIM_NULL_REGISTER) ||
1477 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1478 ip_compute_csum((void *)pim, skb->len))) {
1479 kfree_skb(skb);
1480 return -EINVAL;
1481 }
1482
1483 /* check if the inner packet is destined to mcast group */
1484 encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1485 if (!MULTICAST(encap->daddr) ||
1486 ntohs(encap->tot_len) == 0 ||
1487 ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1488 kfree_skb(skb);
1489 return -EINVAL;
1490 }
1491
1492 read_lock(&mrt_lock);
1493 if (reg_vif_num >= 0)
1494 reg_dev = vif_table[reg_vif_num].dev;
1495 if (reg_dev)
1496 dev_hold(reg_dev);
1497 read_unlock(&mrt_lock);
1498
1499 if (reg_dev == NULL) {
1500 kfree_skb(skb);
1501 return -EINVAL;
1502 }
1503
1504 skb->mac.raw = skb->nh.raw;
1505 skb_pull(skb, (u8*)encap - skb->data);
1506 skb->nh.iph = (struct iphdr *)skb->data;
1507 skb->dev = reg_dev;
1508 memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1509 skb->protocol = htons(ETH_P_IP);
1510 skb->ip_summed = 0;
1511 skb->pkt_type = PACKET_HOST;
1512 dst_release(skb->dst);
1513 ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1514 ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1515 skb->dst = NULL;
1516 nf_reset(skb);
1517 netif_rx(skb);
1518 dev_put(reg_dev);
1519 return 0;
1520 }
1521 #endif
1522
1523 static int
ipmr_fill_mroute(struct sk_buff * skb,struct mfc_cache * c,struct rtmsg * rtm)1524 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1525 {
1526 int ct;
1527 struct rtnexthop *nhp;
1528 struct net_device *dev = vif_table[c->mfc_parent].dev;
1529 u8 *b = skb->tail;
1530 struct rtattr *mp_head;
1531
1532 if (dev)
1533 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1534
1535 mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1536
1537 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1538 if (c->mfc_un.res.ttls[ct] < 255) {
1539 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1540 goto rtattr_failure;
1541 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1542 nhp->rtnh_flags = 0;
1543 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1544 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1545 nhp->rtnh_len = sizeof(*nhp);
1546 }
1547 }
1548 mp_head->rta_type = RTA_MULTIPATH;
1549 mp_head->rta_len = skb->tail - (u8*)mp_head;
1550 rtm->rtm_type = RTN_MULTICAST;
1551 return 1;
1552
1553 rtattr_failure:
1554 skb_trim(skb, b - skb->data);
1555 return -EMSGSIZE;
1556 }
1557
ipmr_get_route(struct sk_buff * skb,struct rtmsg * rtm,int nowait)1558 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1559 {
1560 int err;
1561 struct mfc_cache *cache;
1562 struct rtable *rt = (struct rtable*)skb->dst;
1563
1564 read_lock(&mrt_lock);
1565 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1566
1567 if (cache==NULL) {
1568 struct net_device *dev;
1569 int vif;
1570
1571 if (nowait) {
1572 read_unlock(&mrt_lock);
1573 return -EAGAIN;
1574 }
1575
1576 dev = skb->dev;
1577 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1578 read_unlock(&mrt_lock);
1579 return -ENODEV;
1580 }
1581 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1582 skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1583 skb->nh.iph->saddr = rt->rt_src;
1584 skb->nh.iph->daddr = rt->rt_dst;
1585 skb->nh.iph->version = 0;
1586 err = ipmr_cache_unresolved(vif, skb);
1587 read_unlock(&mrt_lock);
1588 return err;
1589 }
1590
1591 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1592 cache->mfc_flags |= MFC_NOTIFY;
1593 err = ipmr_fill_mroute(skb, cache, rtm);
1594 read_unlock(&mrt_lock);
1595 return err;
1596 }
1597
1598 #ifdef CONFIG_PROC_FS
1599 /*
1600 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1601 */
1602
ipmr_vif_info(char * buffer,char ** start,off_t offset,int length)1603 static int ipmr_vif_info(char *buffer, char **start, off_t offset, int length)
1604 {
1605 struct vif_device *vif;
1606 int len=0;
1607 off_t pos=0;
1608 off_t begin=0;
1609 int size;
1610 int ct;
1611
1612 len += sprintf(buffer,
1613 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
1614 pos=len;
1615
1616 read_lock(&mrt_lock);
1617 for (ct=0;ct<maxvif;ct++)
1618 {
1619 char *name = "none";
1620 vif=&vif_table[ct];
1621 if(!VIF_EXISTS(ct))
1622 continue;
1623 if (vif->dev)
1624 name = vif->dev->name;
1625 size = sprintf(buffer+len, "%2d %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1626 ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
1627 vif->flags, vif->local, vif->remote);
1628 len+=size;
1629 pos+=size;
1630 if(pos<offset)
1631 {
1632 len=0;
1633 begin=pos;
1634 }
1635 if(pos>offset+length)
1636 break;
1637 }
1638 read_unlock(&mrt_lock);
1639
1640 *start=buffer+(offset-begin);
1641 len-=(offset-begin);
1642 if(len>length)
1643 len=length;
1644 if (len<0)
1645 len = 0;
1646 return len;
1647 }
1648
ipmr_mfc_info(char * buffer,char ** start,off_t offset,int length)1649 static int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length)
1650 {
1651 struct mfc_cache *mfc;
1652 int len=0;
1653 off_t pos=0;
1654 off_t begin=0;
1655 int size;
1656 int ct;
1657
1658 len += sprintf(buffer,
1659 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
1660 pos=len;
1661
1662 read_lock(&mrt_lock);
1663 for (ct=0;ct<MFC_LINES;ct++)
1664 {
1665 for(mfc=mfc_cache_array[ct]; mfc; mfc=mfc->next)
1666 {
1667 int n;
1668
1669 /*
1670 * Interface forwarding map
1671 */
1672 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
1673 (unsigned long)mfc->mfc_mcastgrp,
1674 (unsigned long)mfc->mfc_origin,
1675 mfc->mfc_parent,
1676 mfc->mfc_un.res.pkt,
1677 mfc->mfc_un.res.bytes,
1678 mfc->mfc_un.res.wrong_if);
1679 for(n=mfc->mfc_un.res.minvif;n<mfc->mfc_un.res.maxvif;n++)
1680 {
1681 if(VIF_EXISTS(n) && mfc->mfc_un.res.ttls[n] < 255)
1682 size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]);
1683 }
1684 size += sprintf(buffer+len+size, "\n");
1685 len+=size;
1686 pos+=size;
1687 if(pos<offset)
1688 {
1689 len=0;
1690 begin=pos;
1691 }
1692 if(pos>offset+length)
1693 goto done;
1694 }
1695 }
1696
1697 spin_lock_bh(&mfc_unres_lock);
1698 for(mfc=mfc_unres_queue; mfc; mfc=mfc->next) {
1699 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld\n",
1700 (unsigned long)mfc->mfc_mcastgrp,
1701 (unsigned long)mfc->mfc_origin,
1702 -1,
1703 (long)mfc->mfc_un.unres.unresolved.qlen,
1704 0L, 0L);
1705 len+=size;
1706 pos+=size;
1707 if(pos<offset)
1708 {
1709 len=0;
1710 begin=pos;
1711 }
1712 if(pos>offset+length)
1713 break;
1714 }
1715 spin_unlock_bh(&mfc_unres_lock);
1716
1717 done:
1718 read_unlock(&mrt_lock);
1719 *start=buffer+(offset-begin);
1720 len-=(offset-begin);
1721 if(len>length)
1722 len=length;
1723 if (len < 0) {
1724 len = 0;
1725 }
1726 return len;
1727 }
1728
1729 #endif
1730
1731 #ifdef CONFIG_IP_PIMSM_V2
1732 struct inet_protocol pim_protocol =
1733 {
1734 pim_rcv, /* PIM handler */
1735 NULL, /* PIM error control */
1736 NULL, /* next */
1737 IPPROTO_PIM, /* protocol ID */
1738 0, /* copy */
1739 NULL, /* data */
1740 "PIM" /* name */
1741 };
1742 #endif
1743
1744
1745 /*
1746 * Setup for IP multicast routing
1747 */
1748
ip_mr_init(void)1749 void __init ip_mr_init(void)
1750 {
1751 printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
1752 mrt_cachep = kmem_cache_create("ip_mrt_cache",
1753 sizeof(struct mfc_cache),
1754 0, SLAB_HWCACHE_ALIGN,
1755 NULL, NULL);
1756 init_timer(&ipmr_expire_timer);
1757 ipmr_expire_timer.function=ipmr_expire_process;
1758 register_netdevice_notifier(&ip_mr_notifier);
1759 #ifdef CONFIG_PROC_FS
1760 proc_net_create("ip_mr_vif",0,ipmr_vif_info);
1761 proc_net_create("ip_mr_cache",0,ipmr_mfc_info);
1762 #endif
1763 }
1764