1 /*
2  *	IP multicast routing support for mrouted 3.6/3.8
3  *
4  *		(c) 1995 Alan Cox, <alan@redhat.com>
5  *	  Linux Consultancy and Custom Driver Development
6  *
7  *	This program is free software; you can redistribute it and/or
8  *	modify it under the terms of the GNU General Public License
9  *	as published by the Free Software Foundation; either version
10  *	2 of the License, or (at your option) any later version.
11  *
12  *	Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
13  *
14  *	Fixes:
15  *	Michael Chastain	:	Incorrect size of copying.
16  *	Alan Cox		:	Added the cache manager code
17  *	Alan Cox		:	Fixed the clone/copy bug and device race.
18  *	Mike McLagan		:	Routing by source
19  *	Malcolm Beattie		:	Buffer handling fixes.
20  *	Alexey Kuznetsov	:	Double buffer free and other fixes.
21  *	SVR Anand		:	Fixed several multicast bugs and problems.
22  *	Alexey Kuznetsov	:	Status, optimisations and more.
23  *	Brad Parker		:	Better behaviour on mrouted upcall
24  *					overflow.
25  *      Carlos Picoto           :       PIMv1 Support
26  *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
27  *					Relax this requrement to work with older peers.
28  *
29  */
30 
31 #include <linux/config.h>
32 #include <asm/system.h>
33 #include <asm/uaccess.h>
34 #include <linux/types.h>
35 #include <linux/sched.h>
36 #include <linux/errno.h>
37 #include <linux/timer.h>
38 #include <linux/mm.h>
39 #include <linux/kernel.h>
40 #include <linux/fcntl.h>
41 #include <linux/stat.h>
42 #include <linux/socket.h>
43 #include <linux/in.h>
44 #include <linux/inet.h>
45 #include <linux/netdevice.h>
46 #include <linux/inetdevice.h>
47 #include <linux/igmp.h>
48 #include <linux/proc_fs.h>
49 #include <linux/mroute.h>
50 #include <linux/init.h>
51 #include <net/ip.h>
52 #include <net/protocol.h>
53 #include <linux/skbuff.h>
54 #include <net/sock.h>
55 #include <net/icmp.h>
56 #include <net/udp.h>
57 #include <net/raw.h>
58 #include <linux/notifier.h>
59 #include <linux/if_arp.h>
60 #include <linux/netfilter_ipv4.h>
61 #include <net/ipip.h>
62 #include <net/checksum.h>
63 
64 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
65 #define CONFIG_IP_PIMSM	1
66 #endif
67 
68 static struct sock *mroute_socket;
69 
70 
71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
72    Note that the changes are semaphored via rtnl_lock.
73  */
74 
75 static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
76 
77 /*
78  *	Multicast router control variables
79  */
80 
81 static struct vif_device vif_table[MAXVIFS];		/* Devices 		*/
82 static int maxvif;
83 
84 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
85 
86 int mroute_do_assert;					/* Set in PIM assert	*/
87 int mroute_do_pim;
88 
89 static struct mfc_cache *mfc_cache_array[MFC_LINES];	/* Forwarding cache	*/
90 
91 static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
92 atomic_t cache_resolve_queue_len;			/* Size of unresolved	*/
93 
94 /* Special spinlock for queue of unresolved entries */
95 static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
96 
97 /* We return to original Alan's scheme. Hash table of resolved
98    entries is changed only in process context and protected
99    with weak lock mrt_lock. Queue of unresolved entries is protected
100    with strong spinlock mfc_unres_lock.
101 
102    In this case data path is free of exclusive locks at all.
103  */
104 
105 kmem_cache_t *mrt_cachep;
106 
107 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
108 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
109 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
110 
111 extern struct inet_protocol pim_protocol;
112 
113 static struct timer_list ipmr_expire_timer;
114 
115 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
116 
117 static
ipmr_new_tunnel(struct vifctl * v)118 struct net_device *ipmr_new_tunnel(struct vifctl *v)
119 {
120 	struct net_device  *dev;
121 
122 	dev = __dev_get_by_name("tunl0");
123 
124 	if (dev) {
125 		int err;
126 		struct ifreq ifr;
127 		mm_segment_t	oldfs;
128 		struct ip_tunnel_parm p;
129 		struct in_device  *in_dev;
130 
131 		memset(&p, 0, sizeof(p));
132 		p.iph.daddr = v->vifc_rmt_addr.s_addr;
133 		p.iph.saddr = v->vifc_lcl_addr.s_addr;
134 		p.iph.version = 4;
135 		p.iph.ihl = 5;
136 		p.iph.protocol = IPPROTO_IPIP;
137 		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138 		ifr.ifr_ifru.ifru_data = (void*)&p;
139 
140 		oldfs = get_fs(); set_fs(KERNEL_DS);
141 		err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
142 		set_fs(oldfs);
143 
144 		dev = NULL;
145 
146 		if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
147 			dev->flags |= IFF_MULTICAST;
148 
149 			in_dev = __in_dev_get(dev);
150 			if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
151 				goto failure;
152 			in_dev->cnf.rp_filter = 0;
153 
154 			if (dev_open(dev))
155 				goto failure;
156 		}
157 	}
158 	return dev;
159 
160 failure:
161 	unregister_netdevice(dev);
162 	return NULL;
163 }
164 
165 #ifdef CONFIG_IP_PIMSM
166 
167 static int reg_vif_num = -1;
168 
reg_vif_xmit(struct sk_buff * skb,struct net_device * dev)169 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
170 {
171 	read_lock(&mrt_lock);
172 	((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
173 	((struct net_device_stats*)dev->priv)->tx_packets++;
174 	ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
175 	read_unlock(&mrt_lock);
176 	kfree_skb(skb);
177 	return 0;
178 }
179 
reg_vif_get_stats(struct net_device * dev)180 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
181 {
182 	return (struct net_device_stats*)dev->priv;
183 }
184 
185 static
ipmr_reg_vif(struct vifctl * v)186 struct net_device *ipmr_reg_vif(struct vifctl *v)
187 {
188 	struct net_device  *dev;
189 	struct in_device *in_dev;
190 	int size;
191 
192 	size = sizeof(*dev) + sizeof(struct net_device_stats);
193 	dev = kmalloc(size, GFP_KERNEL);
194 	if (!dev)
195 		return NULL;
196 
197 	memset(dev, 0, size);
198 
199 	dev->priv = dev + 1;
200 
201 	strcpy(dev->name, "pimreg");
202 
203 	dev->type		= ARPHRD_PIMREG;
204 	dev->mtu		= 1500 - sizeof(struct iphdr) - 8;
205 	dev->flags		= IFF_NOARP;
206 	dev->hard_start_xmit	= reg_vif_xmit;
207 	dev->get_stats		= reg_vif_get_stats;
208 	dev->features		|= NETIF_F_DYNALLOC;
209 
210 	if (register_netdevice(dev)) {
211 		kfree(dev);
212 		return NULL;
213 	}
214 	dev->iflink = 0;
215 
216 	if ((in_dev = inetdev_init(dev)) == NULL)
217 		goto failure;
218 
219 	in_dev->cnf.rp_filter = 0;
220 
221 	if (dev_open(dev))
222 		goto failure;
223 
224 	return dev;
225 
226 failure:
227 	unregister_netdevice(dev);
228 	return NULL;
229 }
230 #endif
231 
232 /*
233  *	Delete a VIF entry
234  */
235 
vif_delete(int vifi)236 static int vif_delete(int vifi)
237 {
238 	struct vif_device *v;
239 	struct net_device *dev;
240 	struct in_device *in_dev;
241 
242 	if (vifi < 0 || vifi >= maxvif)
243 		return -EADDRNOTAVAIL;
244 
245 	v = &vif_table[vifi];
246 
247 	write_lock_bh(&mrt_lock);
248 	dev = v->dev;
249 	v->dev = NULL;
250 
251 	if (!dev) {
252 		write_unlock_bh(&mrt_lock);
253 		return -EADDRNOTAVAIL;
254 	}
255 
256 #ifdef CONFIG_IP_PIMSM
257 	if (vifi == reg_vif_num)
258 		reg_vif_num = -1;
259 #endif
260 
261 	if (vifi+1 == maxvif) {
262 		int tmp;
263 		for (tmp=vifi-1; tmp>=0; tmp--) {
264 			if (VIF_EXISTS(tmp))
265 				break;
266 		}
267 		maxvif = tmp+1;
268 	}
269 
270 	write_unlock_bh(&mrt_lock);
271 
272 	dev_set_allmulti(dev, -1);
273 
274 	if ((in_dev = __in_dev_get(dev)) != NULL) {
275 		in_dev->cnf.mc_forwarding--;
276 		ip_rt_multicast_event(in_dev);
277 	}
278 
279 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
280 		unregister_netdevice(dev);
281 
282 	dev_put(dev);
283 	return 0;
284 }
285 
286 /* Destroy an unresolved cache entry, killing queued skbs
287    and reporting error to netlink readers.
288  */
289 
ipmr_destroy_unres(struct mfc_cache * c)290 static void ipmr_destroy_unres(struct mfc_cache *c)
291 {
292 	struct sk_buff *skb;
293 	struct nlmsgerr *e;
294 
295 	atomic_dec(&cache_resolve_queue_len);
296 
297 	while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
298 		if (skb->nh.iph->version == 0) {
299 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
300 			nlh->nlmsg_type = NLMSG_ERROR;
301 			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
302 			skb_trim(skb, nlh->nlmsg_len);
303 			e = NLMSG_DATA(nlh);
304 			e->error = -ETIMEDOUT;
305 			memset(&e->msg, 0, sizeof(e->msg));
306 			netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
307 		} else
308 			kfree_skb(skb);
309 	}
310 
311 	kmem_cache_free(mrt_cachep, c);
312 }
313 
314 
315 /* Single timer process for all the unresolved queue. */
316 
ipmr_expire_process(unsigned long dummy)317 void ipmr_expire_process(unsigned long dummy)
318 {
319 	unsigned long now;
320 	unsigned long expires;
321 	struct mfc_cache *c, **cp;
322 
323 	if (!spin_trylock(&mfc_unres_lock)) {
324 		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
325 		return;
326 	}
327 
328 	if (atomic_read(&cache_resolve_queue_len) == 0)
329 		goto out;
330 
331 	now = jiffies;
332 	expires = 10*HZ;
333 	cp = &mfc_unres_queue;
334 
335 	while ((c=*cp) != NULL) {
336 		long interval = c->mfc_un.unres.expires - now;
337 
338 		if (interval > 0) {
339 			if (interval < expires)
340 				expires = interval;
341 			cp = &c->next;
342 			continue;
343 		}
344 
345 		*cp = c->next;
346 
347 		ipmr_destroy_unres(c);
348 	}
349 
350 	if (atomic_read(&cache_resolve_queue_len))
351 		mod_timer(&ipmr_expire_timer, jiffies + expires);
352 
353 out:
354 	spin_unlock(&mfc_unres_lock);
355 }
356 
357 /* Fill oifs list. It is called under write locked mrt_lock. */
358 
ipmr_update_threshoulds(struct mfc_cache * cache,unsigned char * ttls)359 static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
360 {
361 	int vifi;
362 
363 	cache->mfc_un.res.minvif = MAXVIFS;
364 	cache->mfc_un.res.maxvif = 0;
365 	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
366 
367 	for (vifi=0; vifi<maxvif; vifi++) {
368 		if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
369 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
370 			if (cache->mfc_un.res.minvif > vifi)
371 				cache->mfc_un.res.minvif = vifi;
372 			if (cache->mfc_un.res.maxvif <= vifi)
373 				cache->mfc_un.res.maxvif = vifi + 1;
374 		}
375 	}
376 }
377 
vif_add(struct vifctl * vifc,int mrtsock)378 static int vif_add(struct vifctl *vifc, int mrtsock)
379 {
380 	int vifi = vifc->vifc_vifi;
381 	struct vif_device *v = &vif_table[vifi];
382 	struct net_device *dev;
383 	struct in_device *in_dev;
384 
385 	/* Is vif busy ? */
386 	if (VIF_EXISTS(vifi))
387 		return -EADDRINUSE;
388 
389 	switch (vifc->vifc_flags) {
390 #ifdef CONFIG_IP_PIMSM
391 	case VIFF_REGISTER:
392 		/*
393 		 * Special Purpose VIF in PIM
394 		 * All the packets will be sent to the daemon
395 		 */
396 		if (reg_vif_num >= 0)
397 			return -EADDRINUSE;
398 		dev = ipmr_reg_vif(vifc);
399 		if (!dev)
400 			return -ENOBUFS;
401 		break;
402 #endif
403 	case VIFF_TUNNEL:
404 		dev = ipmr_new_tunnel(vifc);
405 		if (!dev)
406 			return -ENOBUFS;
407 		break;
408 	case 0:
409 		dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
410 		if (!dev)
411 			return -EADDRNOTAVAIL;
412 		__dev_put(dev);
413 		break;
414 	default:
415 		return -EINVAL;
416 	}
417 
418 	if ((in_dev = __in_dev_get(dev)) == NULL)
419 		return -EADDRNOTAVAIL;
420 	in_dev->cnf.mc_forwarding++;
421 	dev_set_allmulti(dev, +1);
422 	ip_rt_multicast_event(in_dev);
423 
424 	/*
425 	 *	Fill in the VIF structures
426 	 */
427 	v->rate_limit=vifc->vifc_rate_limit;
428 	v->local=vifc->vifc_lcl_addr.s_addr;
429 	v->remote=vifc->vifc_rmt_addr.s_addr;
430 	v->flags=vifc->vifc_flags;
431 	if (!mrtsock)
432 		v->flags |= VIFF_STATIC;
433 	v->threshold=vifc->vifc_threshold;
434 	v->bytes_in = 0;
435 	v->bytes_out = 0;
436 	v->pkt_in = 0;
437 	v->pkt_out = 0;
438 	v->link = dev->ifindex;
439 	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
440 		v->link = dev->iflink;
441 
442 	/* And finish update writing critical data */
443 	write_lock_bh(&mrt_lock);
444 	dev_hold(dev);
445 	v->dev=dev;
446 #ifdef CONFIG_IP_PIMSM
447 	if (v->flags&VIFF_REGISTER)
448 		reg_vif_num = vifi;
449 #endif
450 	if (vifi+1 > maxvif)
451 		maxvif = vifi+1;
452 	write_unlock_bh(&mrt_lock);
453 	return 0;
454 }
455 
ipmr_cache_find(__u32 origin,__u32 mcastgrp)456 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
457 {
458 	int line=MFC_HASH(mcastgrp,origin);
459 	struct mfc_cache *c;
460 
461 	for (c=mfc_cache_array[line]; c; c = c->next) {
462 		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
463 			break;
464 	}
465 	return c;
466 }
467 
468 /*
469  *	Allocate a multicast cache entry
470  */
ipmr_cache_alloc(void)471 static struct mfc_cache *ipmr_cache_alloc(void)
472 {
473 	struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
474 	if(c==NULL)
475 		return NULL;
476 	memset(c, 0, sizeof(*c));
477 	c->mfc_un.res.minvif = MAXVIFS;
478 	return c;
479 }
480 
ipmr_cache_alloc_unres(void)481 static struct mfc_cache *ipmr_cache_alloc_unres(void)
482 {
483 	struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
484 	if(c==NULL)
485 		return NULL;
486 	memset(c, 0, sizeof(*c));
487 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
488 	c->mfc_un.unres.expires = jiffies + 10*HZ;
489 	return c;
490 }
491 
492 /*
493  *	A cache entry has gone into a resolved state from queued
494  */
495 
ipmr_cache_resolve(struct mfc_cache * uc,struct mfc_cache * c)496 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
497 {
498 	struct sk_buff *skb;
499 	struct nlmsgerr *e;
500 
501 	/*
502 	 *	Play the pending entries through our router
503 	 */
504 
505 	while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
506 		if (skb->nh.iph->version == 0) {
507 			int err;
508 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
509 
510 			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
511 				nlh->nlmsg_len = skb->tail - (u8*)nlh;
512 			} else {
513 				nlh->nlmsg_type = NLMSG_ERROR;
514 				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
515 				skb_trim(skb, nlh->nlmsg_len);
516 				e = NLMSG_DATA(nlh);
517 				e->error = -EMSGSIZE;
518 				memset(&e->msg, 0, sizeof(e->msg));
519 			}
520 			err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
521 		} else
522 			ip_mr_forward(skb, c, 0);
523 	}
524 }
525 
526 /*
527  *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
528  *	expects the following bizarre scheme.
529  *
530  *	Called under mrt_lock.
531  */
532 
ipmr_cache_report(struct sk_buff * pkt,vifi_t vifi,int assert)533 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
534 {
535 	struct sk_buff *skb;
536 	int ihl = pkt->nh.iph->ihl<<2;
537 	struct igmphdr *igmp;
538 	struct igmpmsg *msg;
539 	int ret;
540 
541 #ifdef CONFIG_IP_PIMSM
542 	if (assert == IGMPMSG_WHOLEPKT)
543 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
544 	else
545 #endif
546 		skb = alloc_skb(128, GFP_ATOMIC);
547 
548 	if(!skb)
549 		return -ENOBUFS;
550 
551 #ifdef CONFIG_IP_PIMSM
552 	if (assert == IGMPMSG_WHOLEPKT) {
553 		/* Ugly, but we have no choice with this interface.
554 		   Duplicate old header, fix ihl, length etc.
555 		   And all this only to mangle msg->im_msgtype and
556 		   to set msg->im_mbz to "mbz" :-)
557 		 */
558 		msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
559 		skb->nh.raw = skb->h.raw = (u8*)msg;
560 		memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
561 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
562 		msg->im_mbz = 0;
563  		msg->im_vif = reg_vif_num;
564 		skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
565 		skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
566 	} else
567 #endif
568 	{
569 
570 	/*
571 	 *	Copy the IP header
572 	 */
573 
574 	skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
575 	memcpy(skb->data,pkt->data,ihl);
576 	skb->nh.iph->protocol = 0;			/* Flag to the kernel this is a route add */
577 	msg = (struct igmpmsg*)skb->nh.iph;
578 	msg->im_vif = vifi;
579 	skb->dst = dst_clone(pkt->dst);
580 
581 	/*
582 	 *	Add our header
583 	 */
584 
585 	igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
586 	igmp->type	=
587 	msg->im_msgtype = assert;
588 	igmp->code 	=	0;
589 	skb->nh.iph->tot_len=htons(skb->len);			/* Fix the length */
590 	skb->h.raw = skb->nh.raw;
591         }
592 
593 	if (mroute_socket == NULL) {
594 		kfree_skb(skb);
595 		return -EINVAL;
596 	}
597 
598 	/*
599 	 *	Deliver to mrouted
600 	 */
601 	if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
602 		if (net_ratelimit())
603 			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
604 		kfree_skb(skb);
605 	}
606 
607 	return ret;
608 }
609 
610 /*
611  *	Queue a packet for resolution. It gets locked cache entry!
612  */
613 
614 static int
ipmr_cache_unresolved(vifi_t vifi,struct sk_buff * skb)615 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
616 {
617 	int err;
618 	struct mfc_cache *c;
619 
620 	spin_lock_bh(&mfc_unres_lock);
621 	for (c=mfc_unres_queue; c; c=c->next) {
622 		if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
623 		    c->mfc_origin == skb->nh.iph->saddr)
624 			break;
625 	}
626 
627 	if (c == NULL) {
628 		/*
629 		 *	Create a new entry if allowable
630 		 */
631 
632 		if (atomic_read(&cache_resolve_queue_len)>=10 ||
633 		    (c=ipmr_cache_alloc_unres())==NULL) {
634 			spin_unlock_bh(&mfc_unres_lock);
635 
636 			kfree_skb(skb);
637 			return -ENOBUFS;
638 		}
639 
640 		/*
641 		 *	Fill in the new cache entry
642 		 */
643 		c->mfc_parent=-1;
644 		c->mfc_origin=skb->nh.iph->saddr;
645 		c->mfc_mcastgrp=skb->nh.iph->daddr;
646 
647 		/*
648 		 *	Reflect first query at mrouted.
649 		 */
650 		if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
651 			/* If the report failed throw the cache entry
652 			   out - Brad Parker
653 			 */
654 			spin_unlock_bh(&mfc_unres_lock);
655 
656 			kmem_cache_free(mrt_cachep, c);
657 			kfree_skb(skb);
658 			return err;
659 		}
660 
661 		atomic_inc(&cache_resolve_queue_len);
662 		c->next = mfc_unres_queue;
663 		mfc_unres_queue = c;
664 
665 		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
666 	}
667 
668 	/*
669 	 *	See if we can append the packet
670 	 */
671 	if (c->mfc_un.unres.unresolved.qlen>3) {
672 		kfree_skb(skb);
673 		err = -ENOBUFS;
674 	} else {
675 		skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
676 		err = 0;
677 	}
678 
679 	spin_unlock_bh(&mfc_unres_lock);
680 	return err;
681 }
682 
683 /*
684  *	MFC cache manipulation by user space mroute daemon
685  */
686 
ipmr_mfc_delete(struct mfcctl * mfc)687 int ipmr_mfc_delete(struct mfcctl *mfc)
688 {
689 	int line;
690 	struct mfc_cache *c, **cp;
691 
692 	line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
693 
694 	for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
695 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
696 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
697 			write_lock_bh(&mrt_lock);
698 			*cp = c->next;
699 			write_unlock_bh(&mrt_lock);
700 
701 			kmem_cache_free(mrt_cachep, c);
702 			return 0;
703 		}
704 	}
705 	return -ENOENT;
706 }
707 
ipmr_mfc_add(struct mfcctl * mfc,int mrtsock)708 int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
709 {
710 	int line;
711 	struct mfc_cache *uc, *c, **cp;
712 
713 	line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
714 
715 	for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
716 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
717 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
718 			break;
719 	}
720 
721 	if (c != NULL) {
722 		write_lock_bh(&mrt_lock);
723 		c->mfc_parent = mfc->mfcc_parent;
724 		ipmr_update_threshoulds(c, mfc->mfcc_ttls);
725 		if (!mrtsock)
726 			c->mfc_flags |= MFC_STATIC;
727 		write_unlock_bh(&mrt_lock);
728 		return 0;
729 	}
730 
731 	if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
732 		return -EINVAL;
733 
734 	c=ipmr_cache_alloc();
735 	if (c==NULL)
736 		return -ENOMEM;
737 
738 	c->mfc_origin=mfc->mfcc_origin.s_addr;
739 	c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
740 	c->mfc_parent=mfc->mfcc_parent;
741 	ipmr_update_threshoulds(c, mfc->mfcc_ttls);
742 	if (!mrtsock)
743 		c->mfc_flags |= MFC_STATIC;
744 
745 	write_lock_bh(&mrt_lock);
746 	c->next = mfc_cache_array[line];
747 	mfc_cache_array[line] = c;
748 	write_unlock_bh(&mrt_lock);
749 
750 	/*
751 	 *	Check to see if we resolved a queued list. If so we
752 	 *	need to send on the frames and tidy up.
753 	 */
754 	spin_lock_bh(&mfc_unres_lock);
755 	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
756 	     cp = &uc->next) {
757 		if (uc->mfc_origin == c->mfc_origin &&
758 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
759 			*cp = uc->next;
760 			if (atomic_dec_and_test(&cache_resolve_queue_len))
761 				del_timer(&ipmr_expire_timer);
762 			break;
763 		}
764 	}
765 	spin_unlock_bh(&mfc_unres_lock);
766 
767 	if (uc) {
768 		ipmr_cache_resolve(uc, c);
769 		kmem_cache_free(mrt_cachep, uc);
770 	}
771 	return 0;
772 }
773 
774 /*
775  *	Close the multicast socket, and clear the vif tables etc
776  */
777 
mroute_clean_tables(struct sock * sk)778 static void mroute_clean_tables(struct sock *sk)
779 {
780 	int i;
781 
782 	/*
783 	 *	Shut down all active vif entries
784 	 */
785 	for(i=0; i<maxvif; i++) {
786 		if (!(vif_table[i].flags&VIFF_STATIC))
787 			vif_delete(i);
788 	}
789 
790 	/*
791 	 *	Wipe the cache
792 	 */
793 	for (i=0;i<MFC_LINES;i++) {
794 		struct mfc_cache *c, **cp;
795 
796 		cp = &mfc_cache_array[i];
797 		while ((c = *cp) != NULL) {
798 			if (c->mfc_flags&MFC_STATIC) {
799 				cp = &c->next;
800 				continue;
801 			}
802 			write_lock_bh(&mrt_lock);
803 			*cp = c->next;
804 			write_unlock_bh(&mrt_lock);
805 
806 			kmem_cache_free(mrt_cachep, c);
807 		}
808 	}
809 
810 	if (atomic_read(&cache_resolve_queue_len) != 0) {
811 		struct mfc_cache *c;
812 
813 		spin_lock_bh(&mfc_unres_lock);
814 		while (mfc_unres_queue != NULL) {
815 			c = mfc_unres_queue;
816 			mfc_unres_queue = c->next;
817 			spin_unlock_bh(&mfc_unres_lock);
818 
819 			ipmr_destroy_unres(c);
820 
821 			spin_lock_bh(&mfc_unres_lock);
822 		}
823 		spin_unlock_bh(&mfc_unres_lock);
824 	}
825 }
826 
mrtsock_destruct(struct sock * sk)827 static void mrtsock_destruct(struct sock *sk)
828 {
829 	rtnl_lock();
830 	if (sk == mroute_socket) {
831 		ipv4_devconf.mc_forwarding--;
832 
833 		write_lock_bh(&mrt_lock);
834 		mroute_socket=NULL;
835 		write_unlock_bh(&mrt_lock);
836 
837 		mroute_clean_tables(sk);
838 	}
839 	rtnl_unlock();
840 }
841 
842 /*
843  *	Socket options and virtual interface manipulation. The whole
844  *	virtual interface system is a complete heap, but unfortunately
845  *	that's how BSD mrouted happens to think. Maybe one day with a proper
846  *	MOSPF/PIM router set up we can clean this up.
847  */
848 
ip_mroute_setsockopt(struct sock * sk,int optname,char * optval,int optlen)849 int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
850 {
851 	int ret;
852 	struct vifctl vif;
853 	struct mfcctl mfc;
854 
855 	if(optname!=MRT_INIT)
856 	{
857 		if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
858 			return -EACCES;
859 	}
860 
861 	switch(optname)
862 	{
863 		case MRT_INIT:
864 			if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP)
865 				return -EOPNOTSUPP;
866 			if(optlen!=sizeof(int))
867 				return -ENOPROTOOPT;
868 
869 			rtnl_lock();
870 			if (mroute_socket) {
871 				rtnl_unlock();
872 				return -EADDRINUSE;
873 			}
874 
875 			ret = ip_ra_control(sk, 1, mrtsock_destruct);
876 			if (ret == 0) {
877 				write_lock_bh(&mrt_lock);
878 				mroute_socket=sk;
879 				write_unlock_bh(&mrt_lock);
880 
881 				ipv4_devconf.mc_forwarding++;
882 			}
883 			rtnl_unlock();
884 			return ret;
885 		case MRT_DONE:
886 			if (sk!=mroute_socket)
887 				return -EACCES;
888 			return ip_ra_control(sk, 0, NULL);
889 		case MRT_ADD_VIF:
890 		case MRT_DEL_VIF:
891 			if(optlen!=sizeof(vif))
892 				return -EINVAL;
893 			if (copy_from_user(&vif,optval,sizeof(vif)))
894 				return -EFAULT;
895 			if(vif.vifc_vifi >= MAXVIFS)
896 				return -ENFILE;
897 			rtnl_lock();
898 			if (optname==MRT_ADD_VIF) {
899 				ret = vif_add(&vif, sk==mroute_socket);
900 			} else {
901 				ret = vif_delete(vif.vifc_vifi);
902 			}
903 			rtnl_unlock();
904 			return ret;
905 
906 		/*
907 		 *	Manipulate the forwarding caches. These live
908 		 *	in a sort of kernel/user symbiosis.
909 		 */
910 		case MRT_ADD_MFC:
911 		case MRT_DEL_MFC:
912 			if(optlen!=sizeof(mfc))
913 				return -EINVAL;
914 			if (copy_from_user(&mfc,optval, sizeof(mfc)))
915 				return -EFAULT;
916 			rtnl_lock();
917 			if (optname==MRT_DEL_MFC)
918 				ret = ipmr_mfc_delete(&mfc);
919 			else
920 				ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
921 			rtnl_unlock();
922 			return ret;
923 		/*
924 		 *	Control PIM assert.
925 		 */
926 		case MRT_ASSERT:
927 		{
928 			int v;
929 			if(get_user(v,(int *)optval))
930 				return -EFAULT;
931 			mroute_do_assert=(v)?1:0;
932 			return 0;
933 		}
934 #ifdef CONFIG_IP_PIMSM
935 		case MRT_PIM:
936 		{
937 			int v;
938 			if(get_user(v,(int *)optval))
939 				return -EFAULT;
940 			v = (v)?1:0;
941 			rtnl_lock();
942 			if (v != mroute_do_pim) {
943 				mroute_do_pim = v;
944 				mroute_do_assert = v;
945 #ifdef CONFIG_IP_PIMSM_V2
946 				if (mroute_do_pim)
947 					inet_add_protocol(&pim_protocol);
948 				else
949 					inet_del_protocol(&pim_protocol);
950 #endif
951 			}
952 			rtnl_unlock();
953 			return 0;
954 		}
955 #endif
956 		/*
957 		 *	Spurious command, or MRT_VERSION which you cannot
958 		 *	set.
959 		 */
960 		default:
961 			return -ENOPROTOOPT;
962 	}
963 }
964 
965 /*
966  *	Getsock opt support for the multicast routing system.
967  */
968 
ip_mroute_getsockopt(struct sock * sk,int optname,char * optval,int * optlen)969 int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
970 {
971 	int olr;
972 	int val;
973 
974 	if(optname!=MRT_VERSION &&
975 #ifdef CONFIG_IP_PIMSM
976 	   optname!=MRT_PIM &&
977 #endif
978 	   optname!=MRT_ASSERT)
979 		return -ENOPROTOOPT;
980 
981 	if (get_user(olr, optlen))
982 		return -EFAULT;
983 
984 	olr = min_t(unsigned int, olr, sizeof(int));
985 	if (olr < 0)
986 		return -EINVAL;
987 
988 	if(put_user(olr,optlen))
989 		return -EFAULT;
990 	if(optname==MRT_VERSION)
991 		val=0x0305;
992 #ifdef CONFIG_IP_PIMSM
993 	else if(optname==MRT_PIM)
994 		val=mroute_do_pim;
995 #endif
996 	else
997 		val=mroute_do_assert;
998 	if(copy_to_user(optval,&val,olr))
999 		return -EFAULT;
1000 	return 0;
1001 }
1002 
1003 /*
1004  *	The IP multicast ioctl support routines.
1005  */
1006 
ipmr_ioctl(struct sock * sk,int cmd,unsigned long arg)1007 int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
1008 {
1009 	struct sioc_sg_req sr;
1010 	struct sioc_vif_req vr;
1011 	struct vif_device *vif;
1012 	struct mfc_cache *c;
1013 
1014 	switch(cmd)
1015 	{
1016 		case SIOCGETVIFCNT:
1017 			if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
1018 				return -EFAULT;
1019 			if(vr.vifi>=maxvif)
1020 				return -EINVAL;
1021 			read_lock(&mrt_lock);
1022 			vif=&vif_table[vr.vifi];
1023 			if(VIF_EXISTS(vr.vifi))	{
1024 				vr.icount=vif->pkt_in;
1025 				vr.ocount=vif->pkt_out;
1026 				vr.ibytes=vif->bytes_in;
1027 				vr.obytes=vif->bytes_out;
1028 				read_unlock(&mrt_lock);
1029 
1030 				if (copy_to_user((void *)arg,&vr,sizeof(vr)))
1031 					return -EFAULT;
1032 				return 0;
1033 			}
1034 			read_unlock(&mrt_lock);
1035 			return -EADDRNOTAVAIL;
1036 		case SIOCGETSGCNT:
1037 			if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
1038 				return -EFAULT;
1039 
1040 			read_lock(&mrt_lock);
1041 			c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1042 			if (c) {
1043 				sr.pktcnt = c->mfc_un.res.pkt;
1044 				sr.bytecnt = c->mfc_un.res.bytes;
1045 				sr.wrong_if = c->mfc_un.res.wrong_if;
1046 				read_unlock(&mrt_lock);
1047 
1048 				if (copy_to_user((void *)arg,&sr,sizeof(sr)))
1049 					return -EFAULT;
1050 				return 0;
1051 			}
1052 			read_unlock(&mrt_lock);
1053 			return -EADDRNOTAVAIL;
1054 		default:
1055 			return -ENOIOCTLCMD;
1056 	}
1057 }
1058 
1059 
ipmr_device_event(struct notifier_block * this,unsigned long event,void * ptr)1060 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1061 {
1062 	struct vif_device *v;
1063 	int ct;
1064 	if (event != NETDEV_UNREGISTER)
1065 		return NOTIFY_DONE;
1066 	v=&vif_table[0];
1067 	for(ct=0;ct<maxvif;ct++,v++) {
1068 		if (v->dev==ptr)
1069 			vif_delete(ct);
1070 	}
1071 	return NOTIFY_DONE;
1072 }
1073 
1074 
1075 static struct notifier_block ip_mr_notifier={
1076 	ipmr_device_event,
1077 	NULL,
1078 	0
1079 };
1080 
1081 /*
1082  * 	Encapsulate a packet by attaching a valid IPIP header to it.
1083  *	This avoids tunnel drivers and other mess and gives us the speed so
1084  *	important for multicast video.
1085  */
1086 
ip_encap(struct sk_buff * skb,u32 saddr,u32 daddr)1087 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1088 {
1089 	struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1090 
1091 	iph->version	= 	4;
1092 	iph->tos	=	skb->nh.iph->tos;
1093 	iph->ttl	=	skb->nh.iph->ttl;
1094 	iph->frag_off	=	0;
1095 	iph->daddr	=	daddr;
1096 	iph->saddr	=	saddr;
1097 	iph->protocol	=	IPPROTO_IPIP;
1098 	iph->ihl	=	5;
1099 	iph->tot_len	=	htons(skb->len);
1100 	ip_select_ident(iph, skb->dst, NULL);
1101 	ip_send_check(iph);
1102 
1103 	skb->h.ipiph = skb->nh.iph;
1104 	skb->nh.iph = iph;
1105 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1106 	nf_reset(skb);
1107 }
1108 
ipmr_forward_finish(struct sk_buff * skb)1109 static inline int ipmr_forward_finish(struct sk_buff *skb)
1110 {
1111 	struct ip_options *opt = &(IPCB(skb)->opt);
1112 	struct dst_entry *dst = skb->dst;
1113 
1114 	if (unlikely(opt->optlen))
1115 		ip_forward_options(skb);
1116 
1117 	if (skb->len <= dst->pmtu)
1118 		return dst->output(skb);
1119 	else
1120 		return ip_fragment(skb, dst->output);
1121 }
1122 
1123 /*
1124  *	Processing handlers for ipmr_forward
1125  */
1126 
ipmr_queue_xmit(struct sk_buff * skb,struct mfc_cache * c,int vifi,int last)1127 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
1128 			   int vifi, int last)
1129 {
1130 	struct iphdr *iph = skb->nh.iph;
1131 	struct vif_device *vif = &vif_table[vifi];
1132 	struct net_device *dev;
1133 	struct rtable *rt;
1134 	int    encap = 0;
1135 	struct sk_buff *skb2;
1136 
1137 	if (vif->dev == NULL)
1138 		return;
1139 
1140 #ifdef CONFIG_IP_PIMSM
1141 	if (vif->flags & VIFF_REGISTER) {
1142 		vif->pkt_out++;
1143 		vif->bytes_out+=skb->len;
1144 		((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1145 		((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1146 		ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1147 		return;
1148 	}
1149 #endif
1150 
1151 	if (vif->flags&VIFF_TUNNEL) {
1152 		if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
1153 			return;
1154 		encap = sizeof(struct iphdr);
1155 	} else {
1156 		if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
1157 			return;
1158 	}
1159 
1160 	dev = rt->u.dst.dev;
1161 
1162 	if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
1163 		/* Do not fragment multicasts. Alas, IPv4 does not
1164 		   allow to send ICMP, so that packets will disappear
1165 		   to blackhole.
1166 		 */
1167 
1168 		IP_INC_STATS_BH(IpFragFails);
1169 		ip_rt_put(rt);
1170 		return;
1171 	}
1172 
1173 	encap += dev->hard_header_len;
1174 
1175 	if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
1176 		skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
1177 	else if (atomic_read(&skb->users) != 1)
1178 		skb2 = skb_clone(skb, GFP_ATOMIC);
1179 	else {
1180 		atomic_inc(&skb->users);
1181 		skb2 = skb;
1182 	}
1183 
1184 	if (skb2 == NULL) {
1185 		ip_rt_put(rt);
1186 		return;
1187 	}
1188 
1189 	vif->pkt_out++;
1190 	vif->bytes_out+=skb->len;
1191 
1192 	dst_release(skb2->dst);
1193 	skb2->dst = &rt->u.dst;
1194 	iph = skb2->nh.iph;
1195 	ip_decrease_ttl(iph);
1196 
1197 	/* FIXME: forward and output firewalls used to be called here.
1198 	 * What do we do with netfilter? -- RR */
1199 	if (vif->flags & VIFF_TUNNEL) {
1200 		ip_encap(skb2, vif->local, vif->remote);
1201 		/* FIXME: extra output firewall step used to be here. --RR */
1202 		((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1203 		((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
1204 	}
1205 
1206 	IPCB(skb2)->flags |= IPSKB_FORWARDED;
1207 
1208 	/*
1209 	 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1210 	 * not only before forwarding, but after forwarding on all output
1211 	 * interfaces. It is clear, if mrouter runs a multicasting
1212 	 * program, it should receive packets not depending to what interface
1213 	 * program is joined.
1214 	 * If we will not make it, the program will have to join on all
1215 	 * interfaces. On the other hand, multihoming host (or router, but
1216 	 * not mrouter) cannot join to more than one interface - it will
1217 	 * result in receiving multiple packets.
1218 	 */
1219 	NF_HOOK(PF_INET, NF_IP_FORWARD, skb2, skb->dev, dev,
1220 		ipmr_forward_finish);
1221 }
1222 
ipmr_find_vif(struct net_device * dev)1223 int ipmr_find_vif(struct net_device *dev)
1224 {
1225 	int ct;
1226 	for (ct=maxvif-1; ct>=0; ct--) {
1227 		if (vif_table[ct].dev == dev)
1228 			break;
1229 	}
1230 	return ct;
1231 }
1232 
1233 /* "local" means that we should preserve one skb (for local delivery) */
1234 
ip_mr_forward(struct sk_buff * skb,struct mfc_cache * cache,int local)1235 int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1236 {
1237 	int psend = -1;
1238 	int vif, ct;
1239 
1240 	vif = cache->mfc_parent;
1241 	cache->mfc_un.res.pkt++;
1242 	cache->mfc_un.res.bytes += skb->len;
1243 
1244 	/*
1245 	 * Wrong interface: drop packet and (maybe) send PIM assert.
1246 	 */
1247 	if (vif_table[vif].dev != skb->dev) {
1248 		int true_vifi;
1249 
1250 		if (((struct rtable*)skb->dst)->key.iif == 0) {
1251 			/* It is our own packet, looped back.
1252 			   Very complicated situation...
1253 
1254 			   The best workaround until routing daemons will be
1255 			   fixed is not to redistribute packet, if it was
1256 			   send through wrong interface. It means, that
1257 			   multicast applications WILL NOT work for
1258 			   (S,G), which have default multicast route pointing
1259 			   to wrong oif. In any case, it is not a good
1260 			   idea to use multicasting applications on router.
1261 			 */
1262 			goto dont_forward;
1263 		}
1264 
1265 		cache->mfc_un.res.wrong_if++;
1266 		true_vifi = ipmr_find_vif(skb->dev);
1267 
1268 		if (true_vifi >= 0 && mroute_do_assert &&
1269 		    /* pimsm uses asserts, when switching from RPT to SPT,
1270 		       so that we cannot check that packet arrived on an oif.
1271 		       It is bad, but otherwise we would need to move pretty
1272 		       large chunk of pimd to kernel. Ough... --ANK
1273 		     */
1274 		    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1275 		    jiffies - cache->mfc_un.res.last_assert > MFC_ASSERT_THRESH) {
1276 			cache->mfc_un.res.last_assert = jiffies;
1277 			ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1278 		}
1279 		goto dont_forward;
1280 	}
1281 
1282 	vif_table[vif].pkt_in++;
1283 	vif_table[vif].bytes_in+=skb->len;
1284 
1285 	/*
1286 	 *	Forward the frame
1287 	 */
1288 	for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1289 		if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1290 			if (psend != -1)
1291 				ipmr_queue_xmit(skb, cache, psend, 0);
1292 			psend=ct;
1293 		}
1294 	}
1295 	if (psend != -1)
1296 		ipmr_queue_xmit(skb, cache, psend, !local);
1297 
1298 dont_forward:
1299 	if (!local)
1300 		kfree_skb(skb);
1301 	return 0;
1302 }
1303 
1304 
1305 /*
1306  *	Multicast packets for forwarding arrive here
1307  */
1308 
ip_mr_input(struct sk_buff * skb)1309 int ip_mr_input(struct sk_buff *skb)
1310 {
1311 	struct mfc_cache *cache;
1312 	int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1313 
1314 	/* Packet is looped back after forward, it should not be
1315 	   forwarded second time, but still can be delivered locally.
1316 	 */
1317 	if (IPCB(skb)->flags&IPSKB_FORWARDED)
1318 		goto dont_forward;
1319 
1320 	if (!local) {
1321 		    if (IPCB(skb)->opt.router_alert) {
1322 			    if (ip_call_ra_chain(skb))
1323 				    return 0;
1324 		    } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1325 			    /* IGMPv1 (and broken IGMPv2 implementations sort of
1326 			       Cisco IOS <= 11.2(8)) do not put router alert
1327 			       option to IGMP packets destined to routable
1328 			       groups. It is very bad, because it means
1329 			       that we can forward NO IGMP messages.
1330 			     */
1331 			    read_lock(&mrt_lock);
1332 			    if (mroute_socket) {
1333 				    raw_rcv(mroute_socket, skb);
1334 				    read_unlock(&mrt_lock);
1335 				    return 0;
1336 			    }
1337 			    read_unlock(&mrt_lock);
1338 		    }
1339 	}
1340 
1341 	read_lock(&mrt_lock);
1342 	cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1343 
1344 	/*
1345 	 *	No usable cache entry
1346 	 */
1347 	if (cache==NULL) {
1348 		int vif;
1349 
1350 		if (local) {
1351 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1352 			ip_local_deliver(skb);
1353 			if (skb2 == NULL) {
1354 				read_unlock(&mrt_lock);
1355 				return -ENOBUFS;
1356 			}
1357 			skb = skb2;
1358 		}
1359 
1360 		vif = ipmr_find_vif(skb->dev);
1361 		if (vif >= 0) {
1362 			int err = ipmr_cache_unresolved(vif, skb);
1363 			read_unlock(&mrt_lock);
1364 
1365 			return err;
1366 		}
1367 		read_unlock(&mrt_lock);
1368 		kfree_skb(skb);
1369 		return -ENODEV;
1370 	}
1371 
1372 	ip_mr_forward(skb, cache, local);
1373 
1374 	read_unlock(&mrt_lock);
1375 
1376 	if (local)
1377 		return ip_local_deliver(skb);
1378 
1379 	return 0;
1380 
1381 dont_forward:
1382 	if (local)
1383 		return ip_local_deliver(skb);
1384 	kfree_skb(skb);
1385 	return 0;
1386 }
1387 
1388 #ifdef CONFIG_IP_PIMSM_V1
1389 /*
1390  * Handle IGMP messages of PIMv1
1391  */
1392 
pim_rcv_v1(struct sk_buff * skb)1393 int pim_rcv_v1(struct sk_buff * skb)
1394 {
1395 	struct igmphdr *pim = (struct igmphdr*)skb->h.raw;
1396 	struct iphdr   *encap;
1397 	struct net_device  *reg_dev = NULL;
1398 
1399 	if (skb_is_nonlinear(skb)) {
1400 		if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1401 			kfree_skb(skb);
1402 			return -ENOMEM;
1403 		}
1404 		pim = (struct igmphdr*)skb->h.raw;
1405 	}
1406 
1407         if (!mroute_do_pim ||
1408 	    skb->len < sizeof(*pim) + sizeof(*encap) ||
1409 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) {
1410 		kfree_skb(skb);
1411                 return -EINVAL;
1412         }
1413 
1414 	encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1415 	/*
1416 	   Check that:
1417 	   a. packet is really destinted to a multicast group
1418 	   b. packet is not a NULL-REGISTER
1419 	   c. packet is not truncated
1420 	 */
1421 	if (!MULTICAST(encap->daddr) ||
1422 	    ntohs(encap->tot_len) == 0 ||
1423 	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1424 		kfree_skb(skb);
1425 		return -EINVAL;
1426 	}
1427 
1428 	read_lock(&mrt_lock);
1429 	if (reg_vif_num >= 0)
1430 		reg_dev = vif_table[reg_vif_num].dev;
1431 	if (reg_dev)
1432 		dev_hold(reg_dev);
1433 	read_unlock(&mrt_lock);
1434 
1435 	if (reg_dev == NULL) {
1436 		kfree_skb(skb);
1437 		return -EINVAL;
1438 	}
1439 
1440 	skb->mac.raw = skb->nh.raw;
1441 	skb_pull(skb, (u8*)encap - skb->data);
1442 	skb->nh.iph = (struct iphdr *)skb->data;
1443 	skb->dev = reg_dev;
1444 	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1445 	skb->protocol = htons(ETH_P_IP);
1446 	skb->ip_summed = 0;
1447 	skb->pkt_type = PACKET_HOST;
1448 	dst_release(skb->dst);
1449 	skb->dst = NULL;
1450 	((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1451 	((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1452 	nf_reset(skb);
1453 	netif_rx(skb);
1454 	dev_put(reg_dev);
1455 	return 0;
1456 }
1457 #endif
1458 
1459 #ifdef CONFIG_IP_PIMSM_V2
pim_rcv(struct sk_buff * skb)1460 int pim_rcv(struct sk_buff * skb)
1461 {
1462 	struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw;
1463 	struct iphdr   *encap;
1464 	struct net_device  *reg_dev = NULL;
1465 
1466 	if (skb_is_nonlinear(skb)) {
1467 		if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1468 			kfree_skb(skb);
1469 			return -ENOMEM;
1470 		}
1471 		pim = (struct pimreghdr*)skb->h.raw;
1472 	}
1473 
1474         if (skb->len < sizeof(*pim) + sizeof(*encap) ||
1475 	    pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1476 	    (pim->flags&PIM_NULL_REGISTER) ||
1477 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1478 	     ip_compute_csum((void *)pim, skb->len))) {
1479 		kfree_skb(skb);
1480                 return -EINVAL;
1481         }
1482 
1483 	/* check if the inner packet is destined to mcast group */
1484 	encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1485 	if (!MULTICAST(encap->daddr) ||
1486 	    ntohs(encap->tot_len) == 0 ||
1487 	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1488 		kfree_skb(skb);
1489 		return -EINVAL;
1490 	}
1491 
1492 	read_lock(&mrt_lock);
1493 	if (reg_vif_num >= 0)
1494 		reg_dev = vif_table[reg_vif_num].dev;
1495 	if (reg_dev)
1496 		dev_hold(reg_dev);
1497 	read_unlock(&mrt_lock);
1498 
1499 	if (reg_dev == NULL) {
1500 		kfree_skb(skb);
1501 		return -EINVAL;
1502 	}
1503 
1504 	skb->mac.raw = skb->nh.raw;
1505 	skb_pull(skb, (u8*)encap - skb->data);
1506 	skb->nh.iph = (struct iphdr *)skb->data;
1507 	skb->dev = reg_dev;
1508 	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1509 	skb->protocol = htons(ETH_P_IP);
1510 	skb->ip_summed = 0;
1511 	skb->pkt_type = PACKET_HOST;
1512 	dst_release(skb->dst);
1513 	((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1514 	((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1515 	skb->dst = NULL;
1516 	nf_reset(skb);
1517 	netif_rx(skb);
1518 	dev_put(reg_dev);
1519 	return 0;
1520 }
1521 #endif
1522 
1523 static int
ipmr_fill_mroute(struct sk_buff * skb,struct mfc_cache * c,struct rtmsg * rtm)1524 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1525 {
1526 	int ct;
1527 	struct rtnexthop *nhp;
1528 	struct net_device *dev = vif_table[c->mfc_parent].dev;
1529 	u8 *b = skb->tail;
1530 	struct rtattr *mp_head;
1531 
1532 	if (dev)
1533 		RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1534 
1535 	mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1536 
1537 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1538 		if (c->mfc_un.res.ttls[ct] < 255) {
1539 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1540 				goto rtattr_failure;
1541 			nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1542 			nhp->rtnh_flags = 0;
1543 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1544 			nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1545 			nhp->rtnh_len = sizeof(*nhp);
1546 		}
1547 	}
1548 	mp_head->rta_type = RTA_MULTIPATH;
1549 	mp_head->rta_len = skb->tail - (u8*)mp_head;
1550 	rtm->rtm_type = RTN_MULTICAST;
1551 	return 1;
1552 
1553 rtattr_failure:
1554 	skb_trim(skb, b - skb->data);
1555 	return -EMSGSIZE;
1556 }
1557 
ipmr_get_route(struct sk_buff * skb,struct rtmsg * rtm,int nowait)1558 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1559 {
1560 	int err;
1561 	struct mfc_cache *cache;
1562 	struct rtable *rt = (struct rtable*)skb->dst;
1563 
1564 	read_lock(&mrt_lock);
1565 	cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1566 
1567 	if (cache==NULL) {
1568 		struct net_device *dev;
1569 		int vif;
1570 
1571 		if (nowait) {
1572 			read_unlock(&mrt_lock);
1573 			return -EAGAIN;
1574 		}
1575 
1576 		dev = skb->dev;
1577 		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1578 			read_unlock(&mrt_lock);
1579 			return -ENODEV;
1580 		}
1581 		skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1582 		skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1583 		skb->nh.iph->saddr = rt->rt_src;
1584 		skb->nh.iph->daddr = rt->rt_dst;
1585 		skb->nh.iph->version = 0;
1586 		err = ipmr_cache_unresolved(vif, skb);
1587 		read_unlock(&mrt_lock);
1588 		return err;
1589 	}
1590 
1591 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1592 		cache->mfc_flags |= MFC_NOTIFY;
1593 	err = ipmr_fill_mroute(skb, cache, rtm);
1594 	read_unlock(&mrt_lock);
1595 	return err;
1596 }
1597 
1598 #ifdef CONFIG_PROC_FS
1599 /*
1600  *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1601  */
1602 
ipmr_vif_info(char * buffer,char ** start,off_t offset,int length)1603 static int ipmr_vif_info(char *buffer, char **start, off_t offset, int length)
1604 {
1605 	struct vif_device *vif;
1606 	int len=0;
1607 	off_t pos=0;
1608 	off_t begin=0;
1609 	int size;
1610 	int ct;
1611 
1612 	len += sprintf(buffer,
1613 		 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1614 	pos=len;
1615 
1616 	read_lock(&mrt_lock);
1617 	for (ct=0;ct<maxvif;ct++)
1618 	{
1619 		char *name = "none";
1620 		vif=&vif_table[ct];
1621 		if(!VIF_EXISTS(ct))
1622 			continue;
1623 		if (vif->dev)
1624 			name = vif->dev->name;
1625         	size = sprintf(buffer+len, "%2d %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1626         		ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
1627         		vif->flags, vif->local, vif->remote);
1628 		len+=size;
1629 		pos+=size;
1630 		if(pos<offset)
1631 		{
1632 			len=0;
1633 			begin=pos;
1634 		}
1635 		if(pos>offset+length)
1636 			break;
1637   	}
1638 	read_unlock(&mrt_lock);
1639 
1640   	*start=buffer+(offset-begin);
1641   	len-=(offset-begin);
1642   	if(len>length)
1643   		len=length;
1644 	if (len<0)
1645 		len = 0;
1646   	return len;
1647 }
1648 
ipmr_mfc_info(char * buffer,char ** start,off_t offset,int length)1649 static int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length)
1650 {
1651 	struct mfc_cache *mfc;
1652 	int len=0;
1653 	off_t pos=0;
1654 	off_t begin=0;
1655 	int size;
1656 	int ct;
1657 
1658 	len += sprintf(buffer,
1659 		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1660 	pos=len;
1661 
1662 	read_lock(&mrt_lock);
1663 	for (ct=0;ct<MFC_LINES;ct++)
1664 	{
1665 		for(mfc=mfc_cache_array[ct]; mfc; mfc=mfc->next)
1666 		{
1667 			int n;
1668 
1669 			/*
1670 			 *	Interface forwarding map
1671 			 */
1672 			size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
1673 				(unsigned long)mfc->mfc_mcastgrp,
1674 				(unsigned long)mfc->mfc_origin,
1675 				mfc->mfc_parent,
1676 				mfc->mfc_un.res.pkt,
1677 				mfc->mfc_un.res.bytes,
1678 				mfc->mfc_un.res.wrong_if);
1679 			for(n=mfc->mfc_un.res.minvif;n<mfc->mfc_un.res.maxvif;n++)
1680 			{
1681 				if(VIF_EXISTS(n) && mfc->mfc_un.res.ttls[n] < 255)
1682 					size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]);
1683 			}
1684 			size += sprintf(buffer+len+size, "\n");
1685 			len+=size;
1686 			pos+=size;
1687 			if(pos<offset)
1688 			{
1689 				len=0;
1690 				begin=pos;
1691 			}
1692 			if(pos>offset+length)
1693 				goto done;
1694 	  	}
1695   	}
1696 
1697 	spin_lock_bh(&mfc_unres_lock);
1698 	for(mfc=mfc_unres_queue; mfc; mfc=mfc->next) {
1699 		size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld\n",
1700 			       (unsigned long)mfc->mfc_mcastgrp,
1701 			       (unsigned long)mfc->mfc_origin,
1702 			       -1,
1703 				(long)mfc->mfc_un.unres.unresolved.qlen,
1704 				0L, 0L);
1705 		len+=size;
1706 		pos+=size;
1707 		if(pos<offset)
1708 		{
1709 			len=0;
1710 			begin=pos;
1711 		}
1712 		if(pos>offset+length)
1713 			break;
1714 	}
1715 	spin_unlock_bh(&mfc_unres_lock);
1716 
1717 done:
1718 	read_unlock(&mrt_lock);
1719   	*start=buffer+(offset-begin);
1720   	len-=(offset-begin);
1721   	if(len>length)
1722   		len=length;
1723 	if (len < 0) {
1724 		len = 0;
1725 	}
1726   	return len;
1727 }
1728 
1729 #endif
1730 
1731 #ifdef CONFIG_IP_PIMSM_V2
1732 struct inet_protocol pim_protocol =
1733 {
1734 	pim_rcv,		/* PIM handler		*/
1735 	NULL,			/* PIM error control	*/
1736 	NULL,			/* next			*/
1737 	IPPROTO_PIM,		/* protocol ID		*/
1738 	0,			/* copy			*/
1739 	NULL,			/* data			*/
1740 	"PIM"			/* name			*/
1741 };
1742 #endif
1743 
1744 
1745 /*
1746  *	Setup for IP multicast routing
1747  */
1748 
ip_mr_init(void)1749 void __init ip_mr_init(void)
1750 {
1751 	printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
1752 	mrt_cachep = kmem_cache_create("ip_mrt_cache",
1753 				       sizeof(struct mfc_cache),
1754 				       0, SLAB_HWCACHE_ALIGN,
1755 				       NULL, NULL);
1756 	init_timer(&ipmr_expire_timer);
1757 	ipmr_expire_timer.function=ipmr_expire_process;
1758 	register_netdevice_notifier(&ip_mr_notifier);
1759 #ifdef CONFIG_PROC_FS
1760 	proc_net_create("ip_mr_vif",0,ipmr_vif_info);
1761 	proc_net_create("ip_mr_cache",0,ipmr_mfc_info);
1762 #endif
1763 }
1764