1 /*
2  *	SUCS NET3:
3  *
4  *	Generic datagram handling routines. These are generic for all protocols. Possibly a generic IP version on top
5  *	of these would make sense. Not tonight however 8-).
6  *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and NetROM layer all have identical poll code and mostly
7  *	identical recvmsg() code. So we share it here. The poll was shared before but buried in udp.c so I moved it.
8  *
9  *	Authors:	Alan Cox <alan@redhat.com>. (datagram_poll() from old udp.c code)
10  *
11  *	Fixes:
12  *		Alan Cox	:	NULL return from skb_peek_copy() understood
13  *		Alan Cox	:	Rewrote skb_read_datagram to avoid the skb_peek_copy stuff.
14  *		Alan Cox	:	Added support for SOCK_SEQPACKET. IPX can no longer use the SO_TYPE hack but
15  *					AX.25 now works right, and SPX is feasible.
16  *		Alan Cox	:	Fixed write poll of non IP protocol crash.
17  *		Florian  La Roche:	Changed for my new skbuff handling.
18  *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
19  *		Linus Torvalds	:	BSD semantic fixes.
20  *		Alan Cox	:	Datagram iovec handling
21  *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
22  *		Alan Cox	:	POSIXisms
23  *		Pete Wyckoff    :       Unconnected accept() fix.
24  *
25  */
26 
27 #include <linux/types.h>
28 #include <linux/kernel.h>
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/mm.h>
32 #include <linux/interrupt.h>
33 #include <linux/errno.h>
34 #include <linux/sched.h>
35 #include <linux/inet.h>
36 #include <linux/netdevice.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/poll.h>
39 #include <linux/highmem.h>
40 
41 #include <net/protocol.h>
42 #include <linux/skbuff.h>
43 #include <net/sock.h>
44 #include <net/checksum.h>
45 
46 
47 /*
48  *	Is a socket 'connection oriented' ?
49  */
50 
connection_based(struct sock * sk)51 static inline int connection_based(struct sock *sk)
52 {
53 	return (sk->type==SOCK_SEQPACKET || sk->type==SOCK_STREAM);
54 }
55 
56 
57 /*
58  * Wait for a packet..
59  */
60 
wait_for_packet(struct sock * sk,int * err,long * timeo_p)61 static int wait_for_packet(struct sock * sk, int *err, long *timeo_p)
62 {
63 	int error;
64 
65 	DECLARE_WAITQUEUE(wait, current);
66 
67 	__set_current_state(TASK_INTERRUPTIBLE);
68 	add_wait_queue_exclusive(sk->sleep, &wait);
69 
70 	/* Socket errors? */
71 	error = sock_error(sk);
72 	if (error)
73 		goto out_err;
74 
75 	if (!skb_queue_empty(&sk->receive_queue))
76 		goto ready;
77 
78 	/* Socket shut down? */
79 	if (sk->shutdown & RCV_SHUTDOWN)
80 		goto out_noerr;
81 
82 	/* Sequenced packets can come disconnected. If so we report the problem */
83 	error = -ENOTCONN;
84 	if(connection_based(sk) && !(sk->state==TCP_ESTABLISHED || sk->state==TCP_LISTEN))
85 		goto out_err;
86 
87 	/* handle signals */
88 	if (signal_pending(current))
89 		goto interrupted;
90 
91 	*timeo_p = schedule_timeout(*timeo_p);
92 
93 ready:
94 	current->state = TASK_RUNNING;
95 	remove_wait_queue(sk->sleep, &wait);
96 	return 0;
97 
98 interrupted:
99 	error = sock_intr_errno(*timeo_p);
100 out_err:
101 	*err = error;
102 out:
103 	current->state = TASK_RUNNING;
104 	remove_wait_queue(sk->sleep, &wait);
105 	return error;
106 out_noerr:
107 	*err = 0;
108 	error = 1;
109 	goto out;
110 }
111 
112 /*
113  *	Get a datagram skbuff, understands the peeking, nonblocking wakeups and possible
114  *	races. This replaces identical code in packet,raw and udp, as well as the IPX
115  *	AX.25 and Appletalk. It also finally fixes the long standing peek and read
116  *	race for datagram sockets. If you alter this routine remember it must be
117  *	re-entrant.
118  *
119  *	This function will lock the socket if a skb is returned, so the caller
120  *	needs to unlock the socket in that case (usually by calling skb_free_datagram)
121  *
122  *	* It does not lock socket since today. This function is
123  *	* free of race conditions. This measure should/can improve
124  *	* significantly datagram socket latencies at high loads,
125  *	* when data copying to user space takes lots of time.
126  *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
127  *	*  8) Great win.)
128  *	*			                    --ANK (980729)
129  *
130  *	The order of the tests when we find no data waiting are specified
131  *	quite explicitly by POSIX 1003.1g, don't change them without having
132  *	the standard around please.
133  */
134 
skb_recv_datagram(struct sock * sk,unsigned flags,int noblock,int * err)135 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err)
136 {
137 	int error;
138 	struct sk_buff *skb;
139 	long timeo;
140 
141 	/* Caller is allowed not to check sk->err before skb_recv_datagram() */
142 	error = sock_error(sk);
143 	if (error)
144 		goto no_packet;
145 
146 	timeo = sock_rcvtimeo(sk, noblock);
147 
148 	do {
149 		/* Again only user level code calls this function, so nothing interrupt level
150 		   will suddenly eat the receive_queue.
151 
152 		   Look at current nfs client by the way...
153 		   However, this function was corrent in any case. 8)
154 		 */
155 		if (flags & MSG_PEEK)
156 		{
157 			unsigned long cpu_flags;
158 
159 			spin_lock_irqsave(&sk->receive_queue.lock, cpu_flags);
160 			skb = skb_peek(&sk->receive_queue);
161 			if(skb!=NULL)
162 				atomic_inc(&skb->users);
163 			spin_unlock_irqrestore(&sk->receive_queue.lock, cpu_flags);
164 		} else
165 			skb = skb_dequeue(&sk->receive_queue);
166 
167 		if (skb)
168 			return skb;
169 
170 		/* User doesn't want to wait */
171 		error = -EAGAIN;
172 		if (!timeo)
173 			goto no_packet;
174 
175 	} while (wait_for_packet(sk, err, &timeo) == 0);
176 
177 	return NULL;
178 
179 no_packet:
180 	*err = error;
181 	return NULL;
182 }
183 
skb_free_datagram(struct sock * sk,struct sk_buff * skb)184 void skb_free_datagram(struct sock * sk, struct sk_buff *skb)
185 {
186 	kfree_skb(skb);
187 }
188 
189 /*
190  *	Copy a datagram to a linear buffer.
191  */
192 
skb_copy_datagram(const struct sk_buff * skb,int offset,char * to,int size)193 int skb_copy_datagram(const struct sk_buff *skb, int offset, char *to, int size)
194 {
195 	struct iovec iov = { to, size };
196 
197 	return skb_copy_datagram_iovec(skb, offset, &iov, size);
198 }
199 
200 /*
201  *	Copy a datagram to an iovec.
202  *	Note: the iovec is modified during the copy.
203  */
skb_copy_datagram_iovec(const struct sk_buff * skb,int offset,struct iovec * to,int len)204 int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, struct iovec *to,
205 			    int len)
206 {
207 	int i, copy;
208 	int start = skb->len - skb->data_len;
209 
210 	/* Copy header. */
211 	if ((copy = start-offset) > 0) {
212 		if (copy > len)
213 			copy = len;
214 		if (memcpy_toiovec(to, skb->data + offset, copy))
215 			goto fault;
216 		if ((len -= copy) == 0)
217 			return 0;
218 		offset += copy;
219 	}
220 
221 	/* Copy paged appendix. Hmm... why does this look so complicated? */
222 	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
223 		int end;
224 
225 		BUG_TRAP(start <= offset+len);
226 
227 		end = start + skb_shinfo(skb)->frags[i].size;
228 		if ((copy = end-offset) > 0) {
229 			int err;
230 			u8  *vaddr;
231 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
232 			struct page *page = frag->page;
233 
234 			if (copy > len)
235 				copy = len;
236 			vaddr = kmap(page);
237 			err = memcpy_toiovec(to, vaddr + frag->page_offset +
238 					     offset-start, copy);
239 			kunmap(page);
240 			if (err)
241 				goto fault;
242 			if (!(len -= copy))
243 				return 0;
244 			offset += copy;
245 		}
246 		start = end;
247 	}
248 
249 	if (skb_shinfo(skb)->frag_list) {
250 		struct sk_buff *list;
251 
252 		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
253 			int end;
254 
255 			BUG_TRAP(start <= offset+len);
256 
257 			end = start + list->len;
258 			if ((copy = end-offset) > 0) {
259 				if (copy > len)
260 					copy = len;
261 				if (skb_copy_datagram_iovec(list, offset-start, to, copy))
262 					goto fault;
263 				if ((len -= copy) == 0)
264 					return 0;
265 				offset += copy;
266 			}
267 			start = end;
268 		}
269 	}
270 	if (len == 0)
271 		return 0;
272 
273 fault:
274 	return -EFAULT;
275 }
276 
skb_copy_and_csum_datagram(const struct sk_buff * skb,int offset,u8 * to,int len,unsigned int * csump)277 int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump)
278 {
279 	int i, copy;
280 	int start = skb->len - skb->data_len;
281 	int pos = 0;
282 
283 	/* Copy header. */
284 	if ((copy = start-offset) > 0) {
285 		int err = 0;
286 		if (copy > len)
287 			copy = len;
288 		*csump = csum_and_copy_to_user(skb->data+offset, to, copy, *csump, &err);
289 		if (err)
290 			goto fault;
291 		if ((len -= copy) == 0)
292 			return 0;
293 		offset += copy;
294 		to += copy;
295 		pos = copy;
296 	}
297 
298 	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
299 		int end;
300 
301 		BUG_TRAP(start <= offset+len);
302 
303 		end = start + skb_shinfo(skb)->frags[i].size;
304 		if ((copy = end-offset) > 0) {
305 			unsigned int csum2;
306 			int err = 0;
307 			u8  *vaddr;
308 			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
309 			struct page *page = frag->page;
310 
311 			if (copy > len)
312 				copy = len;
313 			vaddr = kmap(page);
314 			csum2 = csum_and_copy_to_user(vaddr + frag->page_offset +
315 						      offset-start, to, copy, 0, &err);
316 			kunmap(page);
317 			if (err)
318 				goto fault;
319 			*csump = csum_block_add(*csump, csum2, pos);
320 			if (!(len -= copy))
321 				return 0;
322 			offset += copy;
323 			to += copy;
324 			pos += copy;
325 		}
326 		start = end;
327 	}
328 
329 	if (skb_shinfo(skb)->frag_list) {
330 		struct sk_buff *list;
331 
332 		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
333 			int end;
334 
335 			BUG_TRAP(start <= offset+len);
336 
337 			end = start + list->len;
338 			if ((copy = end-offset) > 0) {
339 				unsigned int csum2 = 0;
340 				if (copy > len)
341 					copy = len;
342 				if (skb_copy_and_csum_datagram(list, offset-start, to, copy, &csum2))
343 					goto fault;
344 				*csump = csum_block_add(*csump, csum2, pos);
345 				if ((len -= copy) == 0)
346 					return 0;
347 				offset += copy;
348 				to += copy;
349 				pos += copy;
350 			}
351 			start = end;
352 		}
353 	}
354 	if (len == 0)
355 		return 0;
356 
357 fault:
358 	return -EFAULT;
359 }
360 
361 /* Copy and checkum skb to user iovec. Caller _must_ check that
362    skb will fit to this iovec.
363 
364    Returns: 0       - success.
365             -EINVAL - checksum failure.
366 	    -EFAULT - fault during copy. Beware, in this case iovec can be
367 	              modified!
368  */
369 
skb_copy_and_csum_datagram_iovec(const struct sk_buff * skb,int hlen,struct iovec * iov)370 int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov)
371 {
372 	unsigned int csum;
373 	int chunk = skb->len - hlen;
374 
375 	/* Skip filled elements. Pretty silly, look at memcpy_toiovec, though 8) */
376 	while (iov->iov_len == 0)
377 		iov++;
378 
379 	if (iov->iov_len < chunk) {
380 		if ((unsigned short)csum_fold(skb_checksum(skb, 0, chunk+hlen, skb->csum)))
381 			goto csum_error;
382 		if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
383 			goto fault;
384 	} else {
385 		csum = csum_partial(skb->data, hlen, skb->csum);
386 		if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base, chunk, &csum))
387 			goto fault;
388 		if ((unsigned short)csum_fold(csum))
389 			goto csum_error;
390 		iov->iov_len -= chunk;
391 		iov->iov_base += chunk;
392 	}
393 	return 0;
394 
395 csum_error:
396 	return -EINVAL;
397 
398 fault:
399 	return -EFAULT;
400 }
401 
402 
403 
404 /*
405  *	Datagram poll: Again totally generic. This also handles
406  *	sequenced packet sockets providing the socket receive queue
407  *	is only ever holding data ready to receive.
408  *
409  *	Note: when you _don't_ use this routine for this protocol,
410  *	and you use a different write policy from sock_writeable()
411  *	then please supply your own write_space callback.
412  */
413 
datagram_poll(struct file * file,struct socket * sock,poll_table * wait)414 unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *wait)
415 {
416 	struct sock *sk = sock->sk;
417 	unsigned int mask;
418 
419 	poll_wait(file, sk->sleep, wait);
420 	mask = 0;
421 
422 	/* exceptional events? */
423 	if (sk->err || !skb_queue_empty(&sk->error_queue))
424 		mask |= POLLERR;
425 	if (sk->shutdown == SHUTDOWN_MASK)
426 		mask |= POLLHUP;
427 
428 	/* readable? */
429 	if (!skb_queue_empty(&sk->receive_queue) || (sk->shutdown&RCV_SHUTDOWN))
430 		mask |= POLLIN | POLLRDNORM;
431 
432 	/* Connection-based need to check for termination and startup */
433 	if (connection_based(sk)) {
434 		if (sk->state==TCP_CLOSE)
435 			mask |= POLLHUP;
436 		/* connection hasn't started yet? */
437 		if (sk->state == TCP_SYN_SENT)
438 			return mask;
439 	}
440 
441 	/* writable? */
442 	if (sock_writeable(sk))
443 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
444 	else
445 		set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
446 
447 	return mask;
448 }
449