1 /*
2  * INET		An implementation of the TCP/IP protocol suite for the LINUX
3  *		operating system.  INET is implemented using the  BSD Socket
4  *		interface as the means of communication with the user level.
5  *
6  *		The User Datagram Protocol (UDP).
7  *
8  * Version:	$Id: udp.c,v 1.100.2.4 2002/03/05 12:47:34 davem Exp $
9  *
10  * Authors:	Ross Biro, <bir7@leland.Stanford.Edu>
11  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13  *		Alan Cox, <Alan.Cox@linux.org>
14  *
15  * Fixes:
16  *		Alan Cox	:	verify_area() calls
17  *		Alan Cox	: 	stopped close while in use off icmp
18  *					messages. Not a fix but a botch that
19  *					for udp at least is 'valid'.
20  *		Alan Cox	:	Fixed icmp handling properly
21  *		Alan Cox	: 	Correct error for oversized datagrams
22  *		Alan Cox	:	Tidied select() semantics.
23  *		Alan Cox	:	udp_err() fixed properly, also now
24  *					select and read wake correctly on errors
25  *		Alan Cox	:	udp_send verify_area moved to avoid mem leak
26  *		Alan Cox	:	UDP can count its memory
27  *		Alan Cox	:	send to an unknown connection causes
28  *					an ECONNREFUSED off the icmp, but
29  *					does NOT close.
30  *		Alan Cox	:	Switched to new sk_buff handlers. No more backlog!
31  *		Alan Cox	:	Using generic datagram code. Even smaller and the PEEK
32  *					bug no longer crashes it.
33  *		Fred Van Kempen	: 	Net2e support for sk->broadcast.
34  *		Alan Cox	:	Uses skb_free_datagram
35  *		Alan Cox	:	Added get/set sockopt support.
36  *		Alan Cox	:	Broadcasting without option set returns EACCES.
37  *		Alan Cox	:	No wakeup calls. Instead we now use the callbacks.
38  *		Alan Cox	:	Use ip_tos and ip_ttl
39  *		Alan Cox	:	SNMP Mibs
40  *		Alan Cox	:	MSG_DONTROUTE, and 0.0.0.0 support.
41  *		Matt Dillon	:	UDP length checks.
42  *		Alan Cox	:	Smarter af_inet used properly.
43  *		Alan Cox	:	Use new kernel side addressing.
44  *		Alan Cox	:	Incorrect return on truncated datagram receive.
45  *	Arnt Gulbrandsen 	:	New udp_send and stuff
46  *		Alan Cox	:	Cache last socket
47  *		Alan Cox	:	Route cache
48  *		Jon Peatfield	:	Minor efficiency fix to sendto().
49  *		Mike Shaver	:	RFC1122 checks.
50  *		Alan Cox	:	Nonblocking error fix.
51  *	Willy Konynenberg	:	Transparent proxying support.
52  *		Mike McLagan	:	Routing by source
53  *		David S. Miller	:	New socket lookup architecture.
54  *					Last socket cache retained as it
55  *					does have a high hit rate.
56  *		Olaf Kirch	:	Don't linearise iovec on sendmsg.
57  *		Andi Kleen	:	Some cleanups, cache destination entry
58  *					for connect.
59  *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
60  *		Melvin Smith	:	Check msg_name not msg_namelen in sendto(),
61  *					return ENOTCONN for unconnected sockets (POSIX)
62  *		Janos Farkas	:	don't deliver multi/broadcasts to a different
63  *					bound-to-device socket
64  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
65  *	Alexey Kuznetsov:		allow both IPv4 and IPv6 sockets to bind
66  *					a single port at the same time.
67  *
68  *
69  *		This program is free software; you can redistribute it and/or
70  *		modify it under the terms of the GNU General Public License
71  *		as published by the Free Software Foundation; either version
72  *		2 of the License, or (at your option) any later version.
73  */
74 
75 #include <asm/system.h>
76 #include <asm/uaccess.h>
77 #include <asm/ioctls.h>
78 #include <linux/types.h>
79 #include <linux/fcntl.h>
80 #include <linux/socket.h>
81 #include <linux/sockios.h>
82 #include <linux/in.h>
83 #include <linux/errno.h>
84 #include <linux/timer.h>
85 #include <linux/mm.h>
86 #include <linux/config.h>
87 #include <linux/inet.h>
88 #include <linux/netdevice.h>
89 #include <net/snmp.h>
90 #include <net/ip.h>
91 #include <net/ipv6.h>
92 #include <net/protocol.h>
93 #include <linux/skbuff.h>
94 #include <net/sock.h>
95 #include <net/udp.h>
96 #include <net/icmp.h>
97 #include <net/route.h>
98 #include <net/inet_common.h>
99 #include <net/checksum.h>
100 
101 /*
102  *	Snmp MIB for the UDP layer
103  */
104 
105 struct udp_mib		udp_statistics[NR_CPUS*2];
106 
107 struct sock *udp_hash[UDP_HTABLE_SIZE];
108 rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED;
109 
110 /* Shared by v4/v6 udp. */
111 int udp_port_rover;
112 
udp_v4_get_port(struct sock * sk,unsigned short snum)113 static int udp_v4_get_port(struct sock *sk, unsigned short snum)
114 {
115 	write_lock_bh(&udp_hash_lock);
116 	if (snum == 0) {
117 		int best_size_so_far, best, result, i;
118 
119 		if (udp_port_rover > sysctl_local_port_range[1] ||
120 		    udp_port_rover < sysctl_local_port_range[0])
121 			udp_port_rover = sysctl_local_port_range[0];
122 		best_size_so_far = 32767;
123 		best = result = udp_port_rover;
124 		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
125 			struct sock *sk;
126 			int size;
127 
128 			sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
129 			if (!sk) {
130 				if (result > sysctl_local_port_range[1])
131 					result = sysctl_local_port_range[0] +
132 						((result - sysctl_local_port_range[0]) &
133 						 (UDP_HTABLE_SIZE - 1));
134 				goto gotit;
135 			}
136 			size = 0;
137 			do {
138 				if (++size >= best_size_so_far)
139 					goto next;
140 			} while ((sk = sk->next) != NULL);
141 			best_size_so_far = size;
142 			best = result;
143 		next:;
144 		}
145 		result = best;
146 		for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
147 			if (result > sysctl_local_port_range[1])
148 				result = sysctl_local_port_range[0]
149 					+ ((result - sysctl_local_port_range[0]) &
150 					   (UDP_HTABLE_SIZE - 1));
151 			if (!udp_lport_inuse(result))
152 				break;
153 		}
154 		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
155 			goto fail;
156 gotit:
157 		udp_port_rover = snum = result;
158 	} else {
159 		struct sock *sk2;
160 
161 		for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
162 		     sk2 != NULL;
163 		     sk2 = sk2->next) {
164 			if (sk2->num == snum &&
165 			    sk2 != sk &&
166 			    !ipv6_only_sock(sk2) &&
167 			    (!sk2->bound_dev_if ||
168 			     !sk->bound_dev_if ||
169 			     sk2->bound_dev_if == sk->bound_dev_if) &&
170 			    (!sk2->rcv_saddr ||
171 			     !sk->rcv_saddr ||
172 			     sk2->rcv_saddr == sk->rcv_saddr) &&
173 			    (!sk2->reuse || !sk->reuse))
174 				goto fail;
175 		}
176 	}
177 	sk->num = snum;
178 	if (sk->pprev == NULL) {
179 		struct sock **skp = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
180 		if ((sk->next = *skp) != NULL)
181 			(*skp)->pprev = &sk->next;
182 		*skp = sk;
183 		sk->pprev = skp;
184 		sock_prot_inc_use(sk->prot);
185 		sock_hold(sk);
186 	}
187 	write_unlock_bh(&udp_hash_lock);
188 	return 0;
189 
190 fail:
191 	write_unlock_bh(&udp_hash_lock);
192 	return 1;
193 }
194 
udp_v4_hash(struct sock * sk)195 static void udp_v4_hash(struct sock *sk)
196 {
197 	BUG();
198 }
199 
udp_v4_unhash(struct sock * sk)200 static void udp_v4_unhash(struct sock *sk)
201 {
202 	write_lock_bh(&udp_hash_lock);
203 	if (sk->pprev) {
204 		if (sk->next)
205 			sk->next->pprev = sk->pprev;
206 		*sk->pprev = sk->next;
207 		sk->pprev = NULL;
208 		sk->num = 0;
209 		sock_prot_dec_use(sk->prot);
210 		__sock_put(sk);
211 	}
212 	write_unlock_bh(&udp_hash_lock);
213 }
214 
215 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
216  * harder than this. -DaveM
217  */
udp_v4_lookup_longway(u32 saddr,u16 sport,u32 daddr,u16 dport,int dif)218 struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
219 {
220 	struct sock *sk, *result = NULL;
221 	unsigned short hnum = ntohs(dport);
222 	int badness = -1;
223 
224 	for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
225 		if(sk->num == hnum && !ipv6_only_sock(sk)) {
226 			int score;
227 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
228 			score = sk->family == PF_INET ? 1 : 0;
229 #else
230 			score = 1;
231 #endif
232 			if(sk->rcv_saddr) {
233 				if(sk->rcv_saddr != daddr)
234 					continue;
235 				score+=2;
236 			}
237 			if(sk->daddr) {
238 				if(sk->daddr != saddr)
239 					continue;
240 				score+=2;
241 			}
242 			if(sk->dport) {
243 				if(sk->dport != sport)
244 					continue;
245 				score+=2;
246 			}
247 			if(sk->bound_dev_if) {
248 				if(sk->bound_dev_if != dif)
249 					continue;
250 				score+=2;
251 			}
252 			if(score == 9) {
253 				result = sk;
254 				break;
255 			} else if(score > badness) {
256 				result = sk;
257 				badness = score;
258 			}
259 		}
260 	}
261 	return result;
262 }
263 
udp_v4_lookup(u32 saddr,u16 sport,u32 daddr,u16 dport,int dif)264 __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
265 {
266 	struct sock *sk;
267 
268 	read_lock(&udp_hash_lock);
269 	sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
270 	if (sk)
271 		sock_hold(sk);
272 	read_unlock(&udp_hash_lock);
273 	return sk;
274 }
275 
276 extern int ip_mc_sf_allow(struct sock *sk, u32 local, u32 rmt, int dif);
277 
udp_v4_mcast_next(struct sock * sk,u16 loc_port,u32 loc_addr,u16 rmt_port,u32 rmt_addr,int dif)278 static inline struct sock *udp_v4_mcast_next(struct sock *sk,
279 					     u16 loc_port, u32 loc_addr,
280 					     u16 rmt_port, u32 rmt_addr,
281 					     int dif)
282 {
283 	struct sock *s = sk;
284 	unsigned short hnum = ntohs(loc_port);
285 	for(; s; s = s->next) {
286 		if ((s->num != hnum)					||
287 		    (s->daddr && s->daddr!=rmt_addr)			||
288 		    (s->dport != rmt_port && s->dport != 0)			||
289 		    (s->rcv_saddr  && s->rcv_saddr != loc_addr)		||
290 		    ipv6_only_sock(s)					||
291 		    (s->bound_dev_if && s->bound_dev_if != dif))
292 			continue;
293 		if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
294 			continue;
295 		break;
296   	}
297   	return s;
298 }
299 
300 /*
301  * This routine is called by the ICMP module when it gets some
302  * sort of error condition.  If err < 0 then the socket should
303  * be closed and the error returned to the user.  If err > 0
304  * it's just the icmp type << 8 | icmp code.
305  * Header points to the ip header of the error packet. We move
306  * on past this. Then (as it used to claim before adjustment)
307  * header points to the first 8 bytes of the udp header.  We need
308  * to find the appropriate port.
309  */
310 
udp_err(struct sk_buff * skb,u32 info)311 void udp_err(struct sk_buff *skb, u32 info)
312 {
313 	struct iphdr *iph = (struct iphdr*)skb->data;
314 	struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
315 	int type = skb->h.icmph->type;
316 	int code = skb->h.icmph->code;
317 	struct sock *sk;
318 	int harderr;
319 	int err;
320 
321 	sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
322 	if (sk == NULL) {
323 		ICMP_INC_STATS_BH(IcmpInErrors);
324     	  	return;	/* No socket for error */
325 	}
326 
327 	err = 0;
328 	harderr = 0;
329 
330 	switch (type) {
331 	default:
332 	case ICMP_TIME_EXCEEDED:
333 		err = EHOSTUNREACH;
334 		break;
335 	case ICMP_SOURCE_QUENCH:
336 		goto out;
337 	case ICMP_PARAMETERPROB:
338 		err = EPROTO;
339 		harderr = 1;
340 		break;
341 	case ICMP_DEST_UNREACH:
342 		if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
343 			if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT) {
344 				err = EMSGSIZE;
345 				harderr = 1;
346 				break;
347 			}
348 			goto out;
349 		}
350 		err = EHOSTUNREACH;
351 		if (code <= NR_ICMP_UNREACH) {
352 			harderr = icmp_err_convert[code].fatal;
353 			err = icmp_err_convert[code].errno;
354 		}
355 		break;
356 	}
357 
358 	/*
359 	 *      RFC1122: OK.  Passes ICMP errors back to application, as per
360 	 *	4.1.3.3.
361 	 */
362 	if (!sk->protinfo.af_inet.recverr) {
363 		if (!harderr || sk->state != TCP_ESTABLISHED)
364 			goto out;
365 	} else {
366 		ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
367 	}
368 	sk->err = err;
369 	sk->error_report(sk);
370 out:
371 	sock_put(sk);
372 }
373 
udp_check(struct udphdr * uh,int len,unsigned long saddr,unsigned long daddr,unsigned long base)374 static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
375 {
376 	return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
377 }
378 
379 struct udpfakehdr
380 {
381 	struct udphdr uh;
382 	u32 saddr;
383 	u32 daddr;
384 	struct iovec *iov;
385 	u32 wcheck;
386 };
387 
388 /*
389  *	Copy and checksum a UDP packet from user space into a buffer.
390  */
391 
udp_getfrag(const void * p,char * to,unsigned int offset,unsigned int fraglen,struct sk_buff * skb)392 static int udp_getfrag(const void *p, char * to, unsigned int offset,
393                        unsigned int fraglen, struct sk_buff *skb)
394 {
395 	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
396 	if (offset==0) {
397 		if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
398 						   fraglen-sizeof(struct udphdr), &ufh->wcheck))
399 			return -EFAULT;
400  		ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
401 					   ufh->wcheck);
402 		ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr,
403 					  ntohs(ufh->uh.len),
404 					  IPPROTO_UDP, ufh->wcheck);
405 		if (ufh->uh.check == 0)
406 			ufh->uh.check = -1;
407 		memcpy(to, ufh, sizeof(struct udphdr));
408 		return 0;
409 	}
410 	if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
411 					   fraglen, &ufh->wcheck))
412 		return -EFAULT;
413 	return 0;
414 }
415 
416 /*
417  *	Copy a UDP packet from user space into a buffer without checksumming.
418  */
419 
udp_getfrag_nosum(const void * p,char * to,unsigned int offset,unsigned int fraglen,struct sk_buff * skb)420 static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset,
421                              unsigned int fraglen, struct sk_buff *skb)
422 {
423 	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
424 
425 	if (offset==0) {
426 		memcpy(to, ufh, sizeof(struct udphdr));
427 		return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
428 					   fraglen-sizeof(struct udphdr));
429 	}
430 	return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
431 				   fraglen);
432 }
433 
udp_sendmsg(struct sock * sk,struct msghdr * msg,int len)434 int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
435 {
436 	int ulen = len + sizeof(struct udphdr);
437 	struct ipcm_cookie ipc;
438 	struct udpfakehdr ufh;
439 	struct rtable *rt = NULL;
440 	int free = 0;
441 	int connected = 0;
442 	u32 daddr;
443 	u8  tos;
444 	int err;
445 
446 	/* This check is ONLY to check for arithmetic overflow
447 	   on integer(!) len. Not more! Real check will be made
448 	   in ip_build_xmit --ANK
449 
450 	   BTW socket.c -> af_*.c -> ... make multiple
451 	   invalid conversions size_t -> int. We MUST repair it f.e.
452 	   by replacing all of them with size_t and revise all
453 	   the places sort of len += sizeof(struct iphdr)
454 	   If len was ULONG_MAX-10 it would be cathastrophe  --ANK
455 	 */
456 
457 	if (len < 0 || len > 0xFFFF)
458 		return -EMSGSIZE;
459 
460 	/*
461 	 *	Check the flags.
462 	 */
463 
464 	if (msg->msg_flags&MSG_OOB)	/* Mirror BSD error message compatibility */
465 		return -EOPNOTSUPP;
466 
467 	/*
468 	 *	Get and verify the address.
469 	 */
470 
471 	if (msg->msg_name) {
472 		struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
473 		if (msg->msg_namelen < sizeof(*usin))
474 			return -EINVAL;
475 		if (usin->sin_family != AF_INET) {
476 			if (usin->sin_family != AF_UNSPEC)
477 				return -EINVAL;
478 		}
479 
480 		ufh.daddr = usin->sin_addr.s_addr;
481 		ufh.uh.dest = usin->sin_port;
482 		if (ufh.uh.dest == 0)
483 			return -EINVAL;
484 	} else {
485 		if (sk->state != TCP_ESTABLISHED)
486 			return -EDESTADDRREQ;
487 		ufh.daddr = sk->daddr;
488 		ufh.uh.dest = sk->dport;
489 		/* Open fast path for connected socket.
490 		   Route will not be used, if at least one option is set.
491 		 */
492 		connected = 1;
493   	}
494 	ipc.addr = sk->saddr;
495 	ufh.uh.source = sk->sport;
496 
497 	ipc.opt = NULL;
498 	ipc.oif = sk->bound_dev_if;
499 	if (msg->msg_controllen) {
500 		err = ip_cmsg_send(msg, &ipc);
501 		if (err)
502 			return err;
503 		if (ipc.opt)
504 			free = 1;
505 		connected = 0;
506 	}
507 	if (!ipc.opt)
508 		ipc.opt = sk->protinfo.af_inet.opt;
509 
510 	ufh.saddr = ipc.addr;
511 	ipc.addr = daddr = ufh.daddr;
512 
513 	if (ipc.opt && ipc.opt->srr) {
514 		if (!daddr)
515 			return -EINVAL;
516 		daddr = ipc.opt->faddr;
517 		connected = 0;
518 	}
519 	tos = RT_TOS(sk->protinfo.af_inet.tos);
520 	if (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) ||
521 	    (ipc.opt && ipc.opt->is_strictroute)) {
522 		tos |= RTO_ONLINK;
523 		connected = 0;
524 	}
525 
526 	if (MULTICAST(daddr)) {
527 		if (!ipc.oif)
528 			ipc.oif = sk->protinfo.af_inet.mc_index;
529 		if (!ufh.saddr)
530 			ufh.saddr = sk->protinfo.af_inet.mc_addr;
531 		connected = 0;
532 	}
533 
534 	if (connected)
535 		rt = (struct rtable*)sk_dst_check(sk, 0);
536 
537 	if (rt == NULL) {
538 		err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
539 		if (err)
540 			goto out;
541 
542 		err = -EACCES;
543 		if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast)
544 			goto out;
545 		if (connected)
546 			sk_dst_set(sk, dst_clone(&rt->u.dst));
547 	}
548 
549 	if (msg->msg_flags&MSG_CONFIRM)
550 		goto do_confirm;
551 back_from_confirm:
552 
553 	ufh.saddr = rt->rt_src;
554 	if (!ipc.addr)
555 		ufh.daddr = ipc.addr = rt->rt_dst;
556 	ufh.uh.len = htons(ulen);
557 	ufh.uh.check = 0;
558 	ufh.iov = msg->msg_iov;
559 	ufh.wcheck = 0;
560 
561 	/* RFC1122: OK.  Provides the checksumming facility (MUST) as per */
562 	/* 4.1.3.4. It's configurable by the application via setsockopt() */
563 	/* (MAY) and it defaults to on (MUST). */
564 
565 	err = ip_build_xmit(sk,
566 			    (sk->no_check == UDP_CSUM_NOXMIT ?
567 			     udp_getfrag_nosum :
568 			     udp_getfrag),
569 			    &ufh, ulen, &ipc, rt, msg->msg_flags);
570 
571 out:
572 	ip_rt_put(rt);
573 	if (free)
574 		kfree(ipc.opt);
575 	if (!err) {
576 		UDP_INC_STATS_USER(UdpOutDatagrams);
577 		return len;
578 	}
579 	return err;
580 
581 do_confirm:
582 	dst_confirm(&rt->u.dst);
583 	if (!(msg->msg_flags&MSG_PROBE) || len)
584 		goto back_from_confirm;
585 	err = 0;
586 	goto out;
587 }
588 
589 /*
590  *	IOCTL requests applicable to the UDP protocol
591  */
592 
udp_ioctl(struct sock * sk,int cmd,unsigned long arg)593 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
594 {
595 	switch(cmd)
596 	{
597 		case SIOCOUTQ:
598 		{
599 			int amount = atomic_read(&sk->wmem_alloc);
600 			return put_user(amount, (int *)arg);
601 		}
602 
603 		case SIOCINQ:
604 		{
605 			struct sk_buff *skb;
606 			unsigned long amount;
607 
608 			amount = 0;
609 			spin_lock_irq(&sk->receive_queue.lock);
610 			skb = skb_peek(&sk->receive_queue);
611 			if (skb != NULL) {
612 				/*
613 				 * We will only return the amount
614 				 * of this packet since that is all
615 				 * that will be read.
616 				 */
617 				amount = skb->len - sizeof(struct udphdr);
618 			}
619 			spin_unlock_irq(&sk->receive_queue.lock);
620 			return put_user(amount, (int *)arg);
621 		}
622 
623 		default:
624 			return -ENOIOCTLCMD;
625 	}
626 	return(0);
627 }
628 
__udp_checksum_complete(struct sk_buff * skb)629 static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
630 {
631 	return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
632 }
633 
udp_checksum_complete(struct sk_buff * skb)634 static __inline__ int udp_checksum_complete(struct sk_buff *skb)
635 {
636 	return skb->ip_summed != CHECKSUM_UNNECESSARY &&
637 		__udp_checksum_complete(skb);
638 }
639 
640 
641 /**
642  * 	udp_poll - wait for a UDP event.
643  *	@file - file struct
644  *	@sock - socket
645  *	@wait - poll table
646  *
647  *	This is same as datagram poll, except for the special case of
648  *	blocking sockets. If application is using a blocking fd
649  *	and a packet with checksum error is in the queue;
650  *	then it could get return from select indicating data available
651  *	but then block when reading it. Add special case code
652  *	to work around these arguably broken applications.
653  */
udp_poll(struct file * file,struct socket * sock,poll_table * wait)654 unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
655 {
656 	unsigned int mask = datagram_poll(file, sock, wait);
657 	struct sock *sk = sock->sk;
658 
659 	/* Check for false positives due to checksum errors */
660 	if ( (mask & POLLRDNORM) &&
661 	     !(file->f_flags & O_NONBLOCK) &&
662 	     !(sk->shutdown & RCV_SHUTDOWN)){
663 		struct sk_buff_head *rcvq = &sk->receive_queue;
664 		struct sk_buff *skb;
665 
666 		spin_lock_irq(&rcvq->lock);
667 		while ((skb = skb_peek(rcvq)) != NULL) {
668 			if (udp_checksum_complete(skb)) {
669 				UDP_INC_STATS_BH(UdpInErrors);
670 				IP_INC_STATS_BH(IpInDiscards);
671 				__skb_unlink(skb, rcvq);
672 				kfree_skb(skb);
673 			} else {
674 				skb->ip_summed = CHECKSUM_UNNECESSARY;
675 				break;
676 			}
677 		}
678 		spin_unlock_irq(&rcvq->lock);
679 
680 		/* nothing to see, move along */
681 		if (skb == NULL)
682 			mask &= ~(POLLIN | POLLRDNORM);
683 	}
684 
685 	return mask;
686 
687 }
688 
689 /*
690  * 	This should be easy, if there is something there we
691  * 	return it, otherwise we block.
692  */
693 
udp_recvmsg(struct sock * sk,struct msghdr * msg,int len,int noblock,int flags,int * addr_len)694 int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
695 		int noblock, int flags, int *addr_len)
696 {
697   	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
698   	struct sk_buff *skb;
699   	int copied, err;
700 
701 	/*
702 	 *	Check any passed addresses
703 	 */
704 	if (addr_len)
705 		*addr_len=sizeof(*sin);
706 
707 	if (flags & MSG_ERRQUEUE)
708 		return ip_recv_error(sk, msg, len);
709 
710 try_again:
711 	skb = skb_recv_datagram(sk, flags, noblock, &err);
712 	if (!skb)
713 		goto out;
714 
715   	copied = skb->len - sizeof(struct udphdr);
716 	if (copied > len) {
717 		copied = len;
718 		msg->msg_flags |= MSG_TRUNC;
719 	}
720 
721 	if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
722 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
723 					      copied);
724 	} else if (msg->msg_flags&MSG_TRUNC) {
725 		if (__udp_checksum_complete(skb))
726 			goto csum_copy_err;
727 		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
728 					      copied);
729 	} else {
730 		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
731 
732 		if (err == -EINVAL)
733 			goto csum_copy_err;
734 	}
735 
736 	if (err)
737 		goto out_free;
738 
739 	sock_recv_timestamp(msg, sk, skb);
740 
741 	/* Copy the address. */
742 	if (sin)
743 	{
744 		sin->sin_family = AF_INET;
745 		sin->sin_port = skb->h.uh->source;
746 		sin->sin_addr.s_addr = skb->nh.iph->saddr;
747 		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
748   	}
749 	if (sk->protinfo.af_inet.cmsg_flags)
750 		ip_cmsg_recv(msg, skb);
751 
752 	err = copied;
753 	if (flags & MSG_TRUNC)
754 		err = skb->len - sizeof(struct udphdr);
755 
756 out_free:
757   	skb_free_datagram(sk, skb);
758 out:
759   	return err;
760 
761 csum_copy_err:
762 	UDP_INC_STATS_BH(UdpInErrors);
763 
764 	/* Clear queue. */
765 	if (flags&MSG_PEEK) {
766 		int clear = 0;
767 		spin_lock_irq(&sk->receive_queue.lock);
768 		if (skb == skb_peek(&sk->receive_queue)) {
769 			__skb_unlink(skb, &sk->receive_queue);
770 			clear = 1;
771 		}
772 		spin_unlock_irq(&sk->receive_queue.lock);
773 		if (clear)
774 			kfree_skb(skb);
775 	}
776 
777 	skb_free_datagram(sk, skb);
778 
779 	if (noblock)
780 		return -EAGAIN;
781 	goto try_again;
782 }
783 
udp_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)784 int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
785 {
786 	struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
787 	struct rtable *rt;
788 	u32 saddr;
789 	int oif;
790 	int err;
791 
792 
793 	if (addr_len < sizeof(*usin))
794 	  	return -EINVAL;
795 
796 	if (usin->sin_family != AF_INET)
797 	  	return -EAFNOSUPPORT;
798 
799 	sk_dst_reset(sk);
800 
801 	oif = sk->bound_dev_if;
802 	saddr = sk->saddr;
803 	if (MULTICAST(usin->sin_addr.s_addr)) {
804 		if (!oif)
805 			oif = sk->protinfo.af_inet.mc_index;
806 		if (!saddr)
807 			saddr = sk->protinfo.af_inet.mc_addr;
808 	}
809 	err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr,
810 			       RT_CONN_FLAGS(sk), oif);
811 	if (err)
812 		return err;
813 	if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
814 		ip_rt_put(rt);
815 		return -EACCES;
816 	}
817   	if(!sk->saddr)
818 	  	sk->saddr = rt->rt_src;		/* Update source address */
819 	if(!sk->rcv_saddr)
820 		sk->rcv_saddr = rt->rt_src;
821 	sk->daddr = rt->rt_dst;
822 	sk->dport = usin->sin_port;
823 	sk->state = TCP_ESTABLISHED;
824 	sk->protinfo.af_inet.id = jiffies;
825 
826 	sk_dst_set(sk, &rt->u.dst);
827 	return(0);
828 }
829 
udp_disconnect(struct sock * sk,int flags)830 int udp_disconnect(struct sock *sk, int flags)
831 {
832 	/*
833 	 *	1003.1g - break association.
834 	 */
835 
836 	sk->state = TCP_CLOSE;
837 	sk->daddr = 0;
838 	sk->dport = 0;
839 	sk->bound_dev_if = 0;
840 	if (!(sk->userlocks&SOCK_BINDADDR_LOCK)) {
841 		sk->rcv_saddr = 0;
842 		sk->saddr = 0;
843 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
844 		memset(&sk->net_pinfo.af_inet6.saddr, 0, 16);
845 		memset(&sk->net_pinfo.af_inet6.rcv_saddr, 0, 16);
846 #endif
847 	}
848 	if (!(sk->userlocks&SOCK_BINDPORT_LOCK)) {
849 		sk->prot->unhash(sk);
850 		sk->sport = 0;
851 	}
852 	sk_dst_reset(sk);
853 	return 0;
854 }
855 
udp_close(struct sock * sk,long timeout)856 static void udp_close(struct sock *sk, long timeout)
857 {
858 	inet_sock_release(sk);
859 }
860 
udp_queue_rcv_skb(struct sock * sk,struct sk_buff * skb)861 static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
862 {
863 	/*
864 	 *	Charge it to the socket, dropping if the queue is full.
865 	 */
866 
867 #if defined(CONFIG_FILTER)
868 	if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
869 		if (__udp_checksum_complete(skb)) {
870 			UDP_INC_STATS_BH(UdpInErrors);
871 			IP_INC_STATS_BH(IpInDiscards);
872 			ip_statistics[smp_processor_id()*2].IpInDelivers--;
873 			kfree_skb(skb);
874 			return -1;
875 		}
876 		skb->ip_summed = CHECKSUM_UNNECESSARY;
877 	}
878 #endif
879 
880 	if (sock_queue_rcv_skb(sk,skb)<0) {
881 		UDP_INC_STATS_BH(UdpInErrors);
882 		IP_INC_STATS_BH(IpInDiscards);
883 		ip_statistics[smp_processor_id()*2].IpInDelivers--;
884 		kfree_skb(skb);
885 		return -1;
886 	}
887 	UDP_INC_STATS_BH(UdpInDatagrams);
888 	return 0;
889 }
890 
891 /*
892  *	Multicasts and broadcasts go to each listener.
893  *
894  *	Note: called only from the BH handler context,
895  *	so we don't need to lock the hashes.
896  */
udp_v4_mcast_deliver(struct sk_buff * skb,struct udphdr * uh,u32 saddr,u32 daddr)897 static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
898 				 u32 saddr, u32 daddr)
899 {
900 	struct sock *sk;
901 	int dif;
902 
903 	read_lock(&udp_hash_lock);
904 	sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
905 	dif = skb->dev->ifindex;
906 	sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
907 	if (sk) {
908 		struct sock *sknext = NULL;
909 
910 		do {
911 			struct sk_buff *skb1 = skb;
912 
913 			sknext = udp_v4_mcast_next(sk->next, uh->dest, daddr,
914 						   uh->source, saddr, dif);
915 			if(sknext)
916 				skb1 = skb_clone(skb, GFP_ATOMIC);
917 
918 			if(skb1)
919 				udp_queue_rcv_skb(sk, skb1);
920 			sk = sknext;
921 		} while(sknext);
922 	} else
923 		kfree_skb(skb);
924 	read_unlock(&udp_hash_lock);
925 	return 0;
926 }
927 
928 /* Initialize UDP checksum. If exited with zero value (success),
929  * CHECKSUM_UNNECESSARY means, that no more checks are required.
930  * Otherwise, csum completion requires chacksumming packet body,
931  * including udp header and folding it to skb->csum.
932  */
udp_checksum_init(struct sk_buff * skb,struct udphdr * uh,unsigned short ulen,u32 saddr,u32 daddr)933 static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
934 			     unsigned short ulen, u32 saddr, u32 daddr)
935 {
936 	if (uh->check == 0) {
937 		skb->ip_summed = CHECKSUM_UNNECESSARY;
938 	} else if (skb->ip_summed == CHECKSUM_HW) {
939 		skb->ip_summed = CHECKSUM_UNNECESSARY;
940 		if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
941 			return 0;
942 		NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v4 hw csum failure.\n"));
943 		skb->ip_summed = CHECKSUM_NONE;
944 	}
945 	if (skb->ip_summed != CHECKSUM_UNNECESSARY)
946 		skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
947 	/* Probably, we should checksum udp header (it should be in cache
948 	 * in any case) and data in tiny packets (< rx copybreak).
949 	 */
950 	return 0;
951 }
952 
953 /*
954  *	All we need to do is get the socket, and then do a checksum.
955  */
956 
udp_rcv(struct sk_buff * skb)957 int udp_rcv(struct sk_buff *skb)
958 {
959   	struct sock *sk;
960   	struct udphdr *uh;
961 	unsigned short ulen;
962 	struct rtable *rt = (struct rtable*)skb->dst;
963 	u32 saddr = skb->nh.iph->saddr;
964 	u32 daddr = skb->nh.iph->daddr;
965 	int len = skb->len;
966 
967   	IP_INC_STATS_BH(IpInDelivers);
968 
969 	/*
970 	 *	Validate the packet and the UDP length.
971 	 */
972 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
973 		goto no_header;
974 
975   	uh = skb->h.uh;
976 
977 	ulen = ntohs(uh->len);
978 
979 	if (ulen > len || ulen < sizeof(*uh))
980 		goto short_packet;
981 
982 	if (pskb_trim(skb, ulen))
983 		goto short_packet;
984 
985 	if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
986 		goto csum_error;
987 
988 	if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
989 		return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
990 
991 	sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
992 
993 	if (sk != NULL) {
994 		udp_queue_rcv_skb(sk, skb);
995 		sock_put(sk);
996 		return 0;
997 	}
998 
999 	/* No socket. Drop packet silently, if checksum is wrong */
1000 	if (udp_checksum_complete(skb))
1001 		goto csum_error;
1002 
1003 	UDP_INC_STATS_BH(UdpNoPorts);
1004 	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1005 
1006 	/*
1007 	 * Hmm.  We got an UDP packet to a port to which we
1008 	 * don't wanna listen.  Ignore it.
1009 	 */
1010 	kfree_skb(skb);
1011 	return(0);
1012 
1013 short_packet:
1014 	NETDEBUG(if (net_ratelimit())
1015 		 printk(KERN_DEBUG "UDP: short packet: %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
1016 			NIPQUAD(saddr),
1017 			ntohs(uh->source),
1018 			ulen,
1019 			len,
1020 			NIPQUAD(daddr),
1021 			ntohs(uh->dest)));
1022 no_header:
1023 	UDP_INC_STATS_BH(UdpInErrors);
1024 	kfree_skb(skb);
1025 	return(0);
1026 
1027 csum_error:
1028 	/*
1029 	 * RFC1122: OK.  Discards the bad packet silently (as far as
1030 	 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
1031 	 */
1032 	NETDEBUG(if (net_ratelimit())
1033 		 printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
1034 			NIPQUAD(saddr),
1035 			ntohs(uh->source),
1036 			NIPQUAD(daddr),
1037 			ntohs(uh->dest),
1038 			ulen));
1039 	UDP_INC_STATS_BH(UdpInErrors);
1040 	kfree_skb(skb);
1041 	return(0);
1042 }
1043 
get_udp_sock(struct sock * sp,char * tmpbuf,int i)1044 static void get_udp_sock(struct sock *sp, char *tmpbuf, int i)
1045 {
1046 	unsigned int dest, src;
1047 	__u16 destp, srcp;
1048 
1049 	dest  = sp->daddr;
1050 	src   = sp->rcv_saddr;
1051 	destp = ntohs(sp->dport);
1052 	srcp  = ntohs(sp->sport);
1053 	sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1054 		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
1055 		i, src, srcp, dest, destp, sp->state,
1056 		atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
1057 		0, 0L, 0,
1058 		sock_i_uid(sp), 0,
1059 		sock_i_ino(sp),
1060 		atomic_read(&sp->refcnt), sp);
1061 }
1062 
udp_get_info(char * buffer,char ** start,off_t offset,int length)1063 int udp_get_info(char *buffer, char **start, off_t offset, int length)
1064 {
1065 	int len = 0, num = 0, i;
1066 	off_t pos = 0;
1067 	off_t begin;
1068 	char tmpbuf[129];
1069 
1070 	if (offset < 128)
1071 		len += sprintf(buffer, "%-127s\n",
1072 			       "  sl  local_address rem_address   st tx_queue "
1073 			       "rx_queue tr tm->when retrnsmt   uid  timeout inode");
1074 	pos = 128;
1075 	read_lock(&udp_hash_lock);
1076 	for (i = 0; i < UDP_HTABLE_SIZE; i++) {
1077 		struct sock *sk;
1078 
1079 		for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
1080 			if (sk->family != PF_INET)
1081 				continue;
1082 			pos += 128;
1083 			if (pos <= offset)
1084 				continue;
1085 			get_udp_sock(sk, tmpbuf, i);
1086 			len += sprintf(buffer+len, "%-127s\n", tmpbuf);
1087 			if(len >= length)
1088 				goto out;
1089 		}
1090 	}
1091 out:
1092 	read_unlock(&udp_hash_lock);
1093 	begin = len - (pos - offset);
1094 	*start = buffer + begin;
1095 	len -= begin;
1096 	if(len > length)
1097 		len = length;
1098 	if (len < 0)
1099 		len = 0;
1100 	return len;
1101 }
1102 
1103 struct proto udp_prot = {
1104  	name:		"UDP",
1105 	close:		udp_close,
1106 	connect:	udp_connect,
1107 	disconnect:	udp_disconnect,
1108 	ioctl:		udp_ioctl,
1109 	setsockopt:	ip_setsockopt,
1110 	getsockopt:	ip_getsockopt,
1111 	sendmsg:	udp_sendmsg,
1112 	recvmsg:	udp_recvmsg,
1113 	backlog_rcv:	udp_queue_rcv_skb,
1114 	hash:		udp_v4_hash,
1115 	unhash:		udp_v4_unhash,
1116 	get_port:	udp_v4_get_port,
1117 };
1118