1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * The User Datagram Protocol (UDP).
7 *
8 * Version: $Id: udp.c,v 1.100.2.4 2002/03/05 12:47:34 davem Exp $
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13 * Alan Cox, <Alan.Cox@linux.org>
14 *
15 * Fixes:
16 * Alan Cox : verify_area() calls
17 * Alan Cox : stopped close while in use off icmp
18 * messages. Not a fix but a botch that
19 * for udp at least is 'valid'.
20 * Alan Cox : Fixed icmp handling properly
21 * Alan Cox : Correct error for oversized datagrams
22 * Alan Cox : Tidied select() semantics.
23 * Alan Cox : udp_err() fixed properly, also now
24 * select and read wake correctly on errors
25 * Alan Cox : udp_send verify_area moved to avoid mem leak
26 * Alan Cox : UDP can count its memory
27 * Alan Cox : send to an unknown connection causes
28 * an ECONNREFUSED off the icmp, but
29 * does NOT close.
30 * Alan Cox : Switched to new sk_buff handlers. No more backlog!
31 * Alan Cox : Using generic datagram code. Even smaller and the PEEK
32 * bug no longer crashes it.
33 * Fred Van Kempen : Net2e support for sk->broadcast.
34 * Alan Cox : Uses skb_free_datagram
35 * Alan Cox : Added get/set sockopt support.
36 * Alan Cox : Broadcasting without option set returns EACCES.
37 * Alan Cox : No wakeup calls. Instead we now use the callbacks.
38 * Alan Cox : Use ip_tos and ip_ttl
39 * Alan Cox : SNMP Mibs
40 * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support.
41 * Matt Dillon : UDP length checks.
42 * Alan Cox : Smarter af_inet used properly.
43 * Alan Cox : Use new kernel side addressing.
44 * Alan Cox : Incorrect return on truncated datagram receive.
45 * Arnt Gulbrandsen : New udp_send and stuff
46 * Alan Cox : Cache last socket
47 * Alan Cox : Route cache
48 * Jon Peatfield : Minor efficiency fix to sendto().
49 * Mike Shaver : RFC1122 checks.
50 * Alan Cox : Nonblocking error fix.
51 * Willy Konynenberg : Transparent proxying support.
52 * Mike McLagan : Routing by source
53 * David S. Miller : New socket lookup architecture.
54 * Last socket cache retained as it
55 * does have a high hit rate.
56 * Olaf Kirch : Don't linearise iovec on sendmsg.
57 * Andi Kleen : Some cleanups, cache destination entry
58 * for connect.
59 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
60 * Melvin Smith : Check msg_name not msg_namelen in sendto(),
61 * return ENOTCONN for unconnected sockets (POSIX)
62 * Janos Farkas : don't deliver multi/broadcasts to a different
63 * bound-to-device socket
64 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
65 * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind
66 * a single port at the same time.
67 *
68 *
69 * This program is free software; you can redistribute it and/or
70 * modify it under the terms of the GNU General Public License
71 * as published by the Free Software Foundation; either version
72 * 2 of the License, or (at your option) any later version.
73 */
74
75 #include <asm/system.h>
76 #include <asm/uaccess.h>
77 #include <asm/ioctls.h>
78 #include <linux/types.h>
79 #include <linux/fcntl.h>
80 #include <linux/socket.h>
81 #include <linux/sockios.h>
82 #include <linux/in.h>
83 #include <linux/errno.h>
84 #include <linux/timer.h>
85 #include <linux/mm.h>
86 #include <linux/config.h>
87 #include <linux/inet.h>
88 #include <linux/netdevice.h>
89 #include <net/snmp.h>
90 #include <net/ip.h>
91 #include <net/ipv6.h>
92 #include <net/protocol.h>
93 #include <linux/skbuff.h>
94 #include <net/sock.h>
95 #include <net/udp.h>
96 #include <net/icmp.h>
97 #include <net/route.h>
98 #include <net/inet_common.h>
99 #include <net/checksum.h>
100
101 /*
102 * Snmp MIB for the UDP layer
103 */
104
105 struct udp_mib udp_statistics[NR_CPUS*2];
106
107 struct sock *udp_hash[UDP_HTABLE_SIZE];
108 rwlock_t udp_hash_lock = RW_LOCK_UNLOCKED;
109
110 /* Shared by v4/v6 udp. */
111 int udp_port_rover;
112
udp_v4_get_port(struct sock * sk,unsigned short snum)113 static int udp_v4_get_port(struct sock *sk, unsigned short snum)
114 {
115 write_lock_bh(&udp_hash_lock);
116 if (snum == 0) {
117 int best_size_so_far, best, result, i;
118
119 if (udp_port_rover > sysctl_local_port_range[1] ||
120 udp_port_rover < sysctl_local_port_range[0])
121 udp_port_rover = sysctl_local_port_range[0];
122 best_size_so_far = 32767;
123 best = result = udp_port_rover;
124 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
125 struct sock *sk;
126 int size;
127
128 sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
129 if (!sk) {
130 if (result > sysctl_local_port_range[1])
131 result = sysctl_local_port_range[0] +
132 ((result - sysctl_local_port_range[0]) &
133 (UDP_HTABLE_SIZE - 1));
134 goto gotit;
135 }
136 size = 0;
137 do {
138 if (++size >= best_size_so_far)
139 goto next;
140 } while ((sk = sk->next) != NULL);
141 best_size_so_far = size;
142 best = result;
143 next:;
144 }
145 result = best;
146 for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) {
147 if (result > sysctl_local_port_range[1])
148 result = sysctl_local_port_range[0]
149 + ((result - sysctl_local_port_range[0]) &
150 (UDP_HTABLE_SIZE - 1));
151 if (!udp_lport_inuse(result))
152 break;
153 }
154 if (i >= (1 << 16) / UDP_HTABLE_SIZE)
155 goto fail;
156 gotit:
157 udp_port_rover = snum = result;
158 } else {
159 struct sock *sk2;
160
161 for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
162 sk2 != NULL;
163 sk2 = sk2->next) {
164 if (sk2->num == snum &&
165 sk2 != sk &&
166 !ipv6_only_sock(sk2) &&
167 (!sk2->bound_dev_if ||
168 !sk->bound_dev_if ||
169 sk2->bound_dev_if == sk->bound_dev_if) &&
170 (!sk2->rcv_saddr ||
171 !sk->rcv_saddr ||
172 sk2->rcv_saddr == sk->rcv_saddr) &&
173 (!sk2->reuse || !sk->reuse))
174 goto fail;
175 }
176 }
177 sk->num = snum;
178 if (sk->pprev == NULL) {
179 struct sock **skp = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
180 if ((sk->next = *skp) != NULL)
181 (*skp)->pprev = &sk->next;
182 *skp = sk;
183 sk->pprev = skp;
184 sock_prot_inc_use(sk->prot);
185 sock_hold(sk);
186 }
187 write_unlock_bh(&udp_hash_lock);
188 return 0;
189
190 fail:
191 write_unlock_bh(&udp_hash_lock);
192 return 1;
193 }
194
udp_v4_hash(struct sock * sk)195 static void udp_v4_hash(struct sock *sk)
196 {
197 BUG();
198 }
199
udp_v4_unhash(struct sock * sk)200 static void udp_v4_unhash(struct sock *sk)
201 {
202 write_lock_bh(&udp_hash_lock);
203 if (sk->pprev) {
204 if (sk->next)
205 sk->next->pprev = sk->pprev;
206 *sk->pprev = sk->next;
207 sk->pprev = NULL;
208 sk->num = 0;
209 sock_prot_dec_use(sk->prot);
210 __sock_put(sk);
211 }
212 write_unlock_bh(&udp_hash_lock);
213 }
214
215 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
216 * harder than this. -DaveM
217 */
udp_v4_lookup_longway(u32 saddr,u16 sport,u32 daddr,u16 dport,int dif)218 struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
219 {
220 struct sock *sk, *result = NULL;
221 unsigned short hnum = ntohs(dport);
222 int badness = -1;
223
224 for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
225 if(sk->num == hnum && !ipv6_only_sock(sk)) {
226 int score;
227 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
228 score = sk->family == PF_INET ? 1 : 0;
229 #else
230 score = 1;
231 #endif
232 if(sk->rcv_saddr) {
233 if(sk->rcv_saddr != daddr)
234 continue;
235 score+=2;
236 }
237 if(sk->daddr) {
238 if(sk->daddr != saddr)
239 continue;
240 score+=2;
241 }
242 if(sk->dport) {
243 if(sk->dport != sport)
244 continue;
245 score+=2;
246 }
247 if(sk->bound_dev_if) {
248 if(sk->bound_dev_if != dif)
249 continue;
250 score+=2;
251 }
252 if(score == 9) {
253 result = sk;
254 break;
255 } else if(score > badness) {
256 result = sk;
257 badness = score;
258 }
259 }
260 }
261 return result;
262 }
263
udp_v4_lookup(u32 saddr,u16 sport,u32 daddr,u16 dport,int dif)264 __inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
265 {
266 struct sock *sk;
267
268 read_lock(&udp_hash_lock);
269 sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
270 if (sk)
271 sock_hold(sk);
272 read_unlock(&udp_hash_lock);
273 return sk;
274 }
275
276 extern int ip_mc_sf_allow(struct sock *sk, u32 local, u32 rmt, int dif);
277
udp_v4_mcast_next(struct sock * sk,u16 loc_port,u32 loc_addr,u16 rmt_port,u32 rmt_addr,int dif)278 static inline struct sock *udp_v4_mcast_next(struct sock *sk,
279 u16 loc_port, u32 loc_addr,
280 u16 rmt_port, u32 rmt_addr,
281 int dif)
282 {
283 struct sock *s = sk;
284 unsigned short hnum = ntohs(loc_port);
285 for(; s; s = s->next) {
286 if ((s->num != hnum) ||
287 (s->daddr && s->daddr!=rmt_addr) ||
288 (s->dport != rmt_port && s->dport != 0) ||
289 (s->rcv_saddr && s->rcv_saddr != loc_addr) ||
290 ipv6_only_sock(s) ||
291 (s->bound_dev_if && s->bound_dev_if != dif))
292 continue;
293 if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
294 continue;
295 break;
296 }
297 return s;
298 }
299
300 /*
301 * This routine is called by the ICMP module when it gets some
302 * sort of error condition. If err < 0 then the socket should
303 * be closed and the error returned to the user. If err > 0
304 * it's just the icmp type << 8 | icmp code.
305 * Header points to the ip header of the error packet. We move
306 * on past this. Then (as it used to claim before adjustment)
307 * header points to the first 8 bytes of the udp header. We need
308 * to find the appropriate port.
309 */
310
udp_err(struct sk_buff * skb,u32 info)311 void udp_err(struct sk_buff *skb, u32 info)
312 {
313 struct iphdr *iph = (struct iphdr*)skb->data;
314 struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
315 int type = skb->h.icmph->type;
316 int code = skb->h.icmph->code;
317 struct sock *sk;
318 int harderr;
319 int err;
320
321 sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
322 if (sk == NULL) {
323 ICMP_INC_STATS_BH(IcmpInErrors);
324 return; /* No socket for error */
325 }
326
327 err = 0;
328 harderr = 0;
329
330 switch (type) {
331 default:
332 case ICMP_TIME_EXCEEDED:
333 err = EHOSTUNREACH;
334 break;
335 case ICMP_SOURCE_QUENCH:
336 goto out;
337 case ICMP_PARAMETERPROB:
338 err = EPROTO;
339 harderr = 1;
340 break;
341 case ICMP_DEST_UNREACH:
342 if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
343 if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT) {
344 err = EMSGSIZE;
345 harderr = 1;
346 break;
347 }
348 goto out;
349 }
350 err = EHOSTUNREACH;
351 if (code <= NR_ICMP_UNREACH) {
352 harderr = icmp_err_convert[code].fatal;
353 err = icmp_err_convert[code].errno;
354 }
355 break;
356 }
357
358 /*
359 * RFC1122: OK. Passes ICMP errors back to application, as per
360 * 4.1.3.3.
361 */
362 if (!sk->protinfo.af_inet.recverr) {
363 if (!harderr || sk->state != TCP_ESTABLISHED)
364 goto out;
365 } else {
366 ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
367 }
368 sk->err = err;
369 sk->error_report(sk);
370 out:
371 sock_put(sk);
372 }
373
udp_check(struct udphdr * uh,int len,unsigned long saddr,unsigned long daddr,unsigned long base)374 static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
375 {
376 return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
377 }
378
379 struct udpfakehdr
380 {
381 struct udphdr uh;
382 u32 saddr;
383 u32 daddr;
384 struct iovec *iov;
385 u32 wcheck;
386 };
387
388 /*
389 * Copy and checksum a UDP packet from user space into a buffer.
390 */
391
udp_getfrag(const void * p,char * to,unsigned int offset,unsigned int fraglen,struct sk_buff * skb)392 static int udp_getfrag(const void *p, char * to, unsigned int offset,
393 unsigned int fraglen, struct sk_buff *skb)
394 {
395 struct udpfakehdr *ufh = (struct udpfakehdr *)p;
396 if (offset==0) {
397 if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
398 fraglen-sizeof(struct udphdr), &ufh->wcheck))
399 return -EFAULT;
400 ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
401 ufh->wcheck);
402 ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr,
403 ntohs(ufh->uh.len),
404 IPPROTO_UDP, ufh->wcheck);
405 if (ufh->uh.check == 0)
406 ufh->uh.check = -1;
407 memcpy(to, ufh, sizeof(struct udphdr));
408 return 0;
409 }
410 if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
411 fraglen, &ufh->wcheck))
412 return -EFAULT;
413 return 0;
414 }
415
416 /*
417 * Copy a UDP packet from user space into a buffer without checksumming.
418 */
419
udp_getfrag_nosum(const void * p,char * to,unsigned int offset,unsigned int fraglen,struct sk_buff * skb)420 static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset,
421 unsigned int fraglen, struct sk_buff *skb)
422 {
423 struct udpfakehdr *ufh = (struct udpfakehdr *)p;
424
425 if (offset==0) {
426 memcpy(to, ufh, sizeof(struct udphdr));
427 return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
428 fraglen-sizeof(struct udphdr));
429 }
430 return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
431 fraglen);
432 }
433
udp_sendmsg(struct sock * sk,struct msghdr * msg,int len)434 int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
435 {
436 int ulen = len + sizeof(struct udphdr);
437 struct ipcm_cookie ipc;
438 struct udpfakehdr ufh;
439 struct rtable *rt = NULL;
440 int free = 0;
441 int connected = 0;
442 u32 daddr;
443 u8 tos;
444 int err;
445
446 /* This check is ONLY to check for arithmetic overflow
447 on integer(!) len. Not more! Real check will be made
448 in ip_build_xmit --ANK
449
450 BTW socket.c -> af_*.c -> ... make multiple
451 invalid conversions size_t -> int. We MUST repair it f.e.
452 by replacing all of them with size_t and revise all
453 the places sort of len += sizeof(struct iphdr)
454 If len was ULONG_MAX-10 it would be cathastrophe --ANK
455 */
456
457 if (len < 0 || len > 0xFFFF)
458 return -EMSGSIZE;
459
460 /*
461 * Check the flags.
462 */
463
464 if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
465 return -EOPNOTSUPP;
466
467 /*
468 * Get and verify the address.
469 */
470
471 if (msg->msg_name) {
472 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
473 if (msg->msg_namelen < sizeof(*usin))
474 return -EINVAL;
475 if (usin->sin_family != AF_INET) {
476 if (usin->sin_family != AF_UNSPEC)
477 return -EINVAL;
478 }
479
480 ufh.daddr = usin->sin_addr.s_addr;
481 ufh.uh.dest = usin->sin_port;
482 if (ufh.uh.dest == 0)
483 return -EINVAL;
484 } else {
485 if (sk->state != TCP_ESTABLISHED)
486 return -EDESTADDRREQ;
487 ufh.daddr = sk->daddr;
488 ufh.uh.dest = sk->dport;
489 /* Open fast path for connected socket.
490 Route will not be used, if at least one option is set.
491 */
492 connected = 1;
493 }
494 ipc.addr = sk->saddr;
495 ufh.uh.source = sk->sport;
496
497 ipc.opt = NULL;
498 ipc.oif = sk->bound_dev_if;
499 if (msg->msg_controllen) {
500 err = ip_cmsg_send(msg, &ipc);
501 if (err)
502 return err;
503 if (ipc.opt)
504 free = 1;
505 connected = 0;
506 }
507 if (!ipc.opt)
508 ipc.opt = sk->protinfo.af_inet.opt;
509
510 ufh.saddr = ipc.addr;
511 ipc.addr = daddr = ufh.daddr;
512
513 if (ipc.opt && ipc.opt->srr) {
514 if (!daddr)
515 return -EINVAL;
516 daddr = ipc.opt->faddr;
517 connected = 0;
518 }
519 tos = RT_TOS(sk->protinfo.af_inet.tos);
520 if (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) ||
521 (ipc.opt && ipc.opt->is_strictroute)) {
522 tos |= RTO_ONLINK;
523 connected = 0;
524 }
525
526 if (MULTICAST(daddr)) {
527 if (!ipc.oif)
528 ipc.oif = sk->protinfo.af_inet.mc_index;
529 if (!ufh.saddr)
530 ufh.saddr = sk->protinfo.af_inet.mc_addr;
531 connected = 0;
532 }
533
534 if (connected)
535 rt = (struct rtable*)sk_dst_check(sk, 0);
536
537 if (rt == NULL) {
538 err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
539 if (err)
540 goto out;
541
542 err = -EACCES;
543 if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast)
544 goto out;
545 if (connected)
546 sk_dst_set(sk, dst_clone(&rt->u.dst));
547 }
548
549 if (msg->msg_flags&MSG_CONFIRM)
550 goto do_confirm;
551 back_from_confirm:
552
553 ufh.saddr = rt->rt_src;
554 if (!ipc.addr)
555 ufh.daddr = ipc.addr = rt->rt_dst;
556 ufh.uh.len = htons(ulen);
557 ufh.uh.check = 0;
558 ufh.iov = msg->msg_iov;
559 ufh.wcheck = 0;
560
561 /* RFC1122: OK. Provides the checksumming facility (MUST) as per */
562 /* 4.1.3.4. It's configurable by the application via setsockopt() */
563 /* (MAY) and it defaults to on (MUST). */
564
565 err = ip_build_xmit(sk,
566 (sk->no_check == UDP_CSUM_NOXMIT ?
567 udp_getfrag_nosum :
568 udp_getfrag),
569 &ufh, ulen, &ipc, rt, msg->msg_flags);
570
571 out:
572 ip_rt_put(rt);
573 if (free)
574 kfree(ipc.opt);
575 if (!err) {
576 UDP_INC_STATS_USER(UdpOutDatagrams);
577 return len;
578 }
579 return err;
580
581 do_confirm:
582 dst_confirm(&rt->u.dst);
583 if (!(msg->msg_flags&MSG_PROBE) || len)
584 goto back_from_confirm;
585 err = 0;
586 goto out;
587 }
588
589 /*
590 * IOCTL requests applicable to the UDP protocol
591 */
592
udp_ioctl(struct sock * sk,int cmd,unsigned long arg)593 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
594 {
595 switch(cmd)
596 {
597 case SIOCOUTQ:
598 {
599 int amount = atomic_read(&sk->wmem_alloc);
600 return put_user(amount, (int *)arg);
601 }
602
603 case SIOCINQ:
604 {
605 struct sk_buff *skb;
606 unsigned long amount;
607
608 amount = 0;
609 spin_lock_irq(&sk->receive_queue.lock);
610 skb = skb_peek(&sk->receive_queue);
611 if (skb != NULL) {
612 /*
613 * We will only return the amount
614 * of this packet since that is all
615 * that will be read.
616 */
617 amount = skb->len - sizeof(struct udphdr);
618 }
619 spin_unlock_irq(&sk->receive_queue.lock);
620 return put_user(amount, (int *)arg);
621 }
622
623 default:
624 return -ENOIOCTLCMD;
625 }
626 return(0);
627 }
628
__udp_checksum_complete(struct sk_buff * skb)629 static __inline__ int __udp_checksum_complete(struct sk_buff *skb)
630 {
631 return (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
632 }
633
udp_checksum_complete(struct sk_buff * skb)634 static __inline__ int udp_checksum_complete(struct sk_buff *skb)
635 {
636 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
637 __udp_checksum_complete(skb);
638 }
639
640
641 /**
642 * udp_poll - wait for a UDP event.
643 * @file - file struct
644 * @sock - socket
645 * @wait - poll table
646 *
647 * This is same as datagram poll, except for the special case of
648 * blocking sockets. If application is using a blocking fd
649 * and a packet with checksum error is in the queue;
650 * then it could get return from select indicating data available
651 * but then block when reading it. Add special case code
652 * to work around these arguably broken applications.
653 */
udp_poll(struct file * file,struct socket * sock,poll_table * wait)654 unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
655 {
656 unsigned int mask = datagram_poll(file, sock, wait);
657 struct sock *sk = sock->sk;
658
659 /* Check for false positives due to checksum errors */
660 if ( (mask & POLLRDNORM) &&
661 !(file->f_flags & O_NONBLOCK) &&
662 !(sk->shutdown & RCV_SHUTDOWN)){
663 struct sk_buff_head *rcvq = &sk->receive_queue;
664 struct sk_buff *skb;
665
666 spin_lock_irq(&rcvq->lock);
667 while ((skb = skb_peek(rcvq)) != NULL) {
668 if (udp_checksum_complete(skb)) {
669 UDP_INC_STATS_BH(UdpInErrors);
670 IP_INC_STATS_BH(IpInDiscards);
671 __skb_unlink(skb, rcvq);
672 kfree_skb(skb);
673 } else {
674 skb->ip_summed = CHECKSUM_UNNECESSARY;
675 break;
676 }
677 }
678 spin_unlock_irq(&rcvq->lock);
679
680 /* nothing to see, move along */
681 if (skb == NULL)
682 mask &= ~(POLLIN | POLLRDNORM);
683 }
684
685 return mask;
686
687 }
688
689 /*
690 * This should be easy, if there is something there we
691 * return it, otherwise we block.
692 */
693
udp_recvmsg(struct sock * sk,struct msghdr * msg,int len,int noblock,int flags,int * addr_len)694 int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
695 int noblock, int flags, int *addr_len)
696 {
697 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
698 struct sk_buff *skb;
699 int copied, err;
700
701 /*
702 * Check any passed addresses
703 */
704 if (addr_len)
705 *addr_len=sizeof(*sin);
706
707 if (flags & MSG_ERRQUEUE)
708 return ip_recv_error(sk, msg, len);
709
710 try_again:
711 skb = skb_recv_datagram(sk, flags, noblock, &err);
712 if (!skb)
713 goto out;
714
715 copied = skb->len - sizeof(struct udphdr);
716 if (copied > len) {
717 copied = len;
718 msg->msg_flags |= MSG_TRUNC;
719 }
720
721 if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
722 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
723 copied);
724 } else if (msg->msg_flags&MSG_TRUNC) {
725 if (__udp_checksum_complete(skb))
726 goto csum_copy_err;
727 err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
728 copied);
729 } else {
730 err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
731
732 if (err == -EINVAL)
733 goto csum_copy_err;
734 }
735
736 if (err)
737 goto out_free;
738
739 sock_recv_timestamp(msg, sk, skb);
740
741 /* Copy the address. */
742 if (sin)
743 {
744 sin->sin_family = AF_INET;
745 sin->sin_port = skb->h.uh->source;
746 sin->sin_addr.s_addr = skb->nh.iph->saddr;
747 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
748 }
749 if (sk->protinfo.af_inet.cmsg_flags)
750 ip_cmsg_recv(msg, skb);
751
752 err = copied;
753 if (flags & MSG_TRUNC)
754 err = skb->len - sizeof(struct udphdr);
755
756 out_free:
757 skb_free_datagram(sk, skb);
758 out:
759 return err;
760
761 csum_copy_err:
762 UDP_INC_STATS_BH(UdpInErrors);
763
764 /* Clear queue. */
765 if (flags&MSG_PEEK) {
766 int clear = 0;
767 spin_lock_irq(&sk->receive_queue.lock);
768 if (skb == skb_peek(&sk->receive_queue)) {
769 __skb_unlink(skb, &sk->receive_queue);
770 clear = 1;
771 }
772 spin_unlock_irq(&sk->receive_queue.lock);
773 if (clear)
774 kfree_skb(skb);
775 }
776
777 skb_free_datagram(sk, skb);
778
779 if (noblock)
780 return -EAGAIN;
781 goto try_again;
782 }
783
udp_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)784 int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
785 {
786 struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
787 struct rtable *rt;
788 u32 saddr;
789 int oif;
790 int err;
791
792
793 if (addr_len < sizeof(*usin))
794 return -EINVAL;
795
796 if (usin->sin_family != AF_INET)
797 return -EAFNOSUPPORT;
798
799 sk_dst_reset(sk);
800
801 oif = sk->bound_dev_if;
802 saddr = sk->saddr;
803 if (MULTICAST(usin->sin_addr.s_addr)) {
804 if (!oif)
805 oif = sk->protinfo.af_inet.mc_index;
806 if (!saddr)
807 saddr = sk->protinfo.af_inet.mc_addr;
808 }
809 err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr,
810 RT_CONN_FLAGS(sk), oif);
811 if (err)
812 return err;
813 if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
814 ip_rt_put(rt);
815 return -EACCES;
816 }
817 if(!sk->saddr)
818 sk->saddr = rt->rt_src; /* Update source address */
819 if(!sk->rcv_saddr)
820 sk->rcv_saddr = rt->rt_src;
821 sk->daddr = rt->rt_dst;
822 sk->dport = usin->sin_port;
823 sk->state = TCP_ESTABLISHED;
824 sk->protinfo.af_inet.id = jiffies;
825
826 sk_dst_set(sk, &rt->u.dst);
827 return(0);
828 }
829
udp_disconnect(struct sock * sk,int flags)830 int udp_disconnect(struct sock *sk, int flags)
831 {
832 /*
833 * 1003.1g - break association.
834 */
835
836 sk->state = TCP_CLOSE;
837 sk->daddr = 0;
838 sk->dport = 0;
839 sk->bound_dev_if = 0;
840 if (!(sk->userlocks&SOCK_BINDADDR_LOCK)) {
841 sk->rcv_saddr = 0;
842 sk->saddr = 0;
843 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
844 memset(&sk->net_pinfo.af_inet6.saddr, 0, 16);
845 memset(&sk->net_pinfo.af_inet6.rcv_saddr, 0, 16);
846 #endif
847 }
848 if (!(sk->userlocks&SOCK_BINDPORT_LOCK)) {
849 sk->prot->unhash(sk);
850 sk->sport = 0;
851 }
852 sk_dst_reset(sk);
853 return 0;
854 }
855
udp_close(struct sock * sk,long timeout)856 static void udp_close(struct sock *sk, long timeout)
857 {
858 inet_sock_release(sk);
859 }
860
udp_queue_rcv_skb(struct sock * sk,struct sk_buff * skb)861 static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
862 {
863 /*
864 * Charge it to the socket, dropping if the queue is full.
865 */
866
867 #if defined(CONFIG_FILTER)
868 if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
869 if (__udp_checksum_complete(skb)) {
870 UDP_INC_STATS_BH(UdpInErrors);
871 IP_INC_STATS_BH(IpInDiscards);
872 ip_statistics[smp_processor_id()*2].IpInDelivers--;
873 kfree_skb(skb);
874 return -1;
875 }
876 skb->ip_summed = CHECKSUM_UNNECESSARY;
877 }
878 #endif
879
880 if (sock_queue_rcv_skb(sk,skb)<0) {
881 UDP_INC_STATS_BH(UdpInErrors);
882 IP_INC_STATS_BH(IpInDiscards);
883 ip_statistics[smp_processor_id()*2].IpInDelivers--;
884 kfree_skb(skb);
885 return -1;
886 }
887 UDP_INC_STATS_BH(UdpInDatagrams);
888 return 0;
889 }
890
891 /*
892 * Multicasts and broadcasts go to each listener.
893 *
894 * Note: called only from the BH handler context,
895 * so we don't need to lock the hashes.
896 */
udp_v4_mcast_deliver(struct sk_buff * skb,struct udphdr * uh,u32 saddr,u32 daddr)897 static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
898 u32 saddr, u32 daddr)
899 {
900 struct sock *sk;
901 int dif;
902
903 read_lock(&udp_hash_lock);
904 sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
905 dif = skb->dev->ifindex;
906 sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
907 if (sk) {
908 struct sock *sknext = NULL;
909
910 do {
911 struct sk_buff *skb1 = skb;
912
913 sknext = udp_v4_mcast_next(sk->next, uh->dest, daddr,
914 uh->source, saddr, dif);
915 if(sknext)
916 skb1 = skb_clone(skb, GFP_ATOMIC);
917
918 if(skb1)
919 udp_queue_rcv_skb(sk, skb1);
920 sk = sknext;
921 } while(sknext);
922 } else
923 kfree_skb(skb);
924 read_unlock(&udp_hash_lock);
925 return 0;
926 }
927
928 /* Initialize UDP checksum. If exited with zero value (success),
929 * CHECKSUM_UNNECESSARY means, that no more checks are required.
930 * Otherwise, csum completion requires chacksumming packet body,
931 * including udp header and folding it to skb->csum.
932 */
udp_checksum_init(struct sk_buff * skb,struct udphdr * uh,unsigned short ulen,u32 saddr,u32 daddr)933 static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
934 unsigned short ulen, u32 saddr, u32 daddr)
935 {
936 if (uh->check == 0) {
937 skb->ip_summed = CHECKSUM_UNNECESSARY;
938 } else if (skb->ip_summed == CHECKSUM_HW) {
939 skb->ip_summed = CHECKSUM_UNNECESSARY;
940 if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
941 return 0;
942 NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v4 hw csum failure.\n"));
943 skb->ip_summed = CHECKSUM_NONE;
944 }
945 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
946 skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
947 /* Probably, we should checksum udp header (it should be in cache
948 * in any case) and data in tiny packets (< rx copybreak).
949 */
950 return 0;
951 }
952
953 /*
954 * All we need to do is get the socket, and then do a checksum.
955 */
956
udp_rcv(struct sk_buff * skb)957 int udp_rcv(struct sk_buff *skb)
958 {
959 struct sock *sk;
960 struct udphdr *uh;
961 unsigned short ulen;
962 struct rtable *rt = (struct rtable*)skb->dst;
963 u32 saddr = skb->nh.iph->saddr;
964 u32 daddr = skb->nh.iph->daddr;
965 int len = skb->len;
966
967 IP_INC_STATS_BH(IpInDelivers);
968
969 /*
970 * Validate the packet and the UDP length.
971 */
972 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
973 goto no_header;
974
975 uh = skb->h.uh;
976
977 ulen = ntohs(uh->len);
978
979 if (ulen > len || ulen < sizeof(*uh))
980 goto short_packet;
981
982 if (pskb_trim(skb, ulen))
983 goto short_packet;
984
985 if (udp_checksum_init(skb, uh, ulen, saddr, daddr) < 0)
986 goto csum_error;
987
988 if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
989 return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
990
991 sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
992
993 if (sk != NULL) {
994 udp_queue_rcv_skb(sk, skb);
995 sock_put(sk);
996 return 0;
997 }
998
999 /* No socket. Drop packet silently, if checksum is wrong */
1000 if (udp_checksum_complete(skb))
1001 goto csum_error;
1002
1003 UDP_INC_STATS_BH(UdpNoPorts);
1004 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1005
1006 /*
1007 * Hmm. We got an UDP packet to a port to which we
1008 * don't wanna listen. Ignore it.
1009 */
1010 kfree_skb(skb);
1011 return(0);
1012
1013 short_packet:
1014 NETDEBUG(if (net_ratelimit())
1015 printk(KERN_DEBUG "UDP: short packet: %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
1016 NIPQUAD(saddr),
1017 ntohs(uh->source),
1018 ulen,
1019 len,
1020 NIPQUAD(daddr),
1021 ntohs(uh->dest)));
1022 no_header:
1023 UDP_INC_STATS_BH(UdpInErrors);
1024 kfree_skb(skb);
1025 return(0);
1026
1027 csum_error:
1028 /*
1029 * RFC1122: OK. Discards the bad packet silently (as far as
1030 * the network is concerned, anyway) as per 4.1.3.4 (MUST).
1031 */
1032 NETDEBUG(if (net_ratelimit())
1033 printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
1034 NIPQUAD(saddr),
1035 ntohs(uh->source),
1036 NIPQUAD(daddr),
1037 ntohs(uh->dest),
1038 ulen));
1039 UDP_INC_STATS_BH(UdpInErrors);
1040 kfree_skb(skb);
1041 return(0);
1042 }
1043
get_udp_sock(struct sock * sp,char * tmpbuf,int i)1044 static void get_udp_sock(struct sock *sp, char *tmpbuf, int i)
1045 {
1046 unsigned int dest, src;
1047 __u16 destp, srcp;
1048
1049 dest = sp->daddr;
1050 src = sp->rcv_saddr;
1051 destp = ntohs(sp->dport);
1052 srcp = ntohs(sp->sport);
1053 sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
1054 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
1055 i, src, srcp, dest, destp, sp->state,
1056 atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
1057 0, 0L, 0,
1058 sock_i_uid(sp), 0,
1059 sock_i_ino(sp),
1060 atomic_read(&sp->refcnt), sp);
1061 }
1062
udp_get_info(char * buffer,char ** start,off_t offset,int length)1063 int udp_get_info(char *buffer, char **start, off_t offset, int length)
1064 {
1065 int len = 0, num = 0, i;
1066 off_t pos = 0;
1067 off_t begin;
1068 char tmpbuf[129];
1069
1070 if (offset < 128)
1071 len += sprintf(buffer, "%-127s\n",
1072 " sl local_address rem_address st tx_queue "
1073 "rx_queue tr tm->when retrnsmt uid timeout inode");
1074 pos = 128;
1075 read_lock(&udp_hash_lock);
1076 for (i = 0; i < UDP_HTABLE_SIZE; i++) {
1077 struct sock *sk;
1078
1079 for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
1080 if (sk->family != PF_INET)
1081 continue;
1082 pos += 128;
1083 if (pos <= offset)
1084 continue;
1085 get_udp_sock(sk, tmpbuf, i);
1086 len += sprintf(buffer+len, "%-127s\n", tmpbuf);
1087 if(len >= length)
1088 goto out;
1089 }
1090 }
1091 out:
1092 read_unlock(&udp_hash_lock);
1093 begin = len - (pos - offset);
1094 *start = buffer + begin;
1095 len -= begin;
1096 if(len > length)
1097 len = length;
1098 if (len < 0)
1099 len = 0;
1100 return len;
1101 }
1102
1103 struct proto udp_prot = {
1104 name: "UDP",
1105 close: udp_close,
1106 connect: udp_connect,
1107 disconnect: udp_disconnect,
1108 ioctl: udp_ioctl,
1109 setsockopt: ip_setsockopt,
1110 getsockopt: ip_getsockopt,
1111 sendmsg: udp_sendmsg,
1112 recvmsg: udp_recvmsg,
1113 backlog_rcv: udp_queue_rcv_skb,
1114 hash: udp_v4_hash,
1115 unhash: udp_v4_unhash,
1116 get_port: udp_v4_get_port,
1117 };
1118