1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan.cox@linux.org>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Version:	$Id: af_unix.c,v 1.126.2.5 2002/03/05 12:47:34 davem Exp $
12  *
13  * Fixes:
14  *		Linus Torvalds	:	Assorted bug cures.
15  *		Niibe Yutaka	:	async I/O support.
16  *		Carsten Paeth	:	PF_UNIX check, address fixes.
17  *		Alan Cox	:	Limit size of allocated blocks.
18  *		Alan Cox	:	Fixed the stupid socketpair bug.
19  *		Alan Cox	:	BSD compatibility fine tuning.
20  *		Alan Cox	:	Fixed a bug in connect when interrupted.
21  *		Alan Cox	:	Sorted out a proper draft version of
22  *					file descriptor passing hacked up from
23  *					Mike Shaver's work.
24  *		Marty Leisner	:	Fixes to fd passing
25  *		Nick Nevin	:	recvmsg bugfix.
26  *		Alan Cox	:	Started proper garbage collector
27  *		Heiko EiBfeldt	:	Missing verify_area check
28  *		Alan Cox	:	Started POSIXisms
29  *		Andreas Schwab	:	Replace inode by dentry for proper
30  *					reference counting
31  *		Kirk Petersen	:	Made this a module
32  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
33  *					Lots of bug fixes.
34  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
35  *					by above two patches.
36  *	     Andrea Arcangeli	:	If possible we block in connect(2)
37  *					if the max backlog of the listen socket
38  *					is been reached. This won't break
39  *					old apps and it will avoid huge amount
40  *					of socks hashed (this for unix_gc()
41  *					performances reasons).
42  *					Security fix that limits the max
43  *					number of socks to 2*max_files and
44  *					the number of skb queueable in the
45  *					dgram receiver.
46  *		Artur Skawina   :	Hash function optimizations
47  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
48  *	      Malcolm Beattie   :	Set peercred for socketpair
49  *	     Michal Ostrowski   :       Module initialization cleanup.
50  *
51  *
52  * Known differences from reference BSD that was tested:
53  *
54  *	[TO FIX]
55  *	ECONNREFUSED is not returned from one end of a connected() socket to the
56  *		other the moment one end closes.
57  *	fstat() doesn't return st_dev=NODEV, and give the blksize as high water mark
58  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
59  *	[NOT TO FIX]
60  *	accept() returns a path name even if the connecting socket has closed
61  *		in the meantime (BSD loses the path and gives up).
62  *	accept() returns 0 length path for an unbound connector. BSD returns 16
63  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
64  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
65  *	BSD af_unix apparently has connect forgetting to block properly.
66  *		(need to check this with the POSIX spec in detail)
67  *
68  * Differences from 2.0.0-11-... (ANK)
69  *	Bug fixes and improvements.
70  *		- client shutdown killed server socket.
71  *		- removed all useless cli/sti pairs.
72  *
73  *	Semantic changes/extensions.
74  *		- generic control message passing.
75  *		- SCM_CREDENTIALS control message.
76  *		- "Abstract" (not FS based) socket bindings.
77  *		  Abstract names are sequences of bytes (not zero terminated)
78  *		  started by 0, so that this name space does not intersect
79  *		  with BSD names.
80  */
81 
82 #include <linux/module.h>
83 #include <linux/config.h>
84 #include <linux/kernel.h>
85 #include <linux/major.h>
86 #include <linux/signal.h>
87 #include <linux/sched.h>
88 #include <linux/errno.h>
89 #include <linux/string.h>
90 #include <linux/stat.h>
91 #include <linux/socket.h>
92 #include <linux/un.h>
93 #include <linux/fcntl.h>
94 #include <linux/termios.h>
95 #include <linux/sockios.h>
96 #include <linux/net.h>
97 #include <linux/in.h>
98 #include <linux/fs.h>
99 #include <linux/slab.h>
100 #include <asm/uaccess.h>
101 #include <linux/skbuff.h>
102 #include <linux/netdevice.h>
103 #include <net/sock.h>
104 #include <linux/tcp.h>
105 #include <net/af_unix.h>
106 #include <linux/proc_fs.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/smp_lock.h>
111 #include <linux/rtnetlink.h>
112 
113 #include <asm/checksum.h>
114 
115 int sysctl_unix_max_dgram_qlen = 10;
116 
117 unix_socket *unix_socket_table[UNIX_HASH_SIZE+1];
118 rwlock_t unix_table_lock = RW_LOCK_UNLOCKED;
119 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
120 
121 #define unix_sockets_unbound	(unix_socket_table[UNIX_HASH_SIZE])
122 
123 #define UNIX_ABSTRACT(sk)	((sk)->protinfo.af_unix.addr->hash!=UNIX_HASH_SIZE)
124 
125 /*
126  *  SMP locking strategy:
127  *    hash table is protected with rwlock unix_table_lock
128  *    each socket state is protected by separate rwlock.
129  */
130 
unix_hash_fold(unsigned hash)131 static inline unsigned unix_hash_fold(unsigned hash)
132 {
133 	hash ^= hash>>16;
134 	hash ^= hash>>8;
135 	return hash&(UNIX_HASH_SIZE-1);
136 }
137 
138 #define unix_peer(sk) ((sk)->pair)
139 
unix_our_peer(unix_socket * sk,unix_socket * osk)140 static inline int unix_our_peer(unix_socket *sk, unix_socket *osk)
141 {
142 	return unix_peer(osk) == sk;
143 }
144 
unix_may_send(unix_socket * sk,unix_socket * osk)145 static inline int unix_may_send(unix_socket *sk, unix_socket *osk)
146 {
147 	return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
148 }
149 
unix_peer_get(unix_socket * s)150 static inline unix_socket * unix_peer_get(unix_socket *s)
151 {
152 	unix_socket *peer;
153 
154 	unix_state_rlock(s);
155 	peer = unix_peer(s);
156 	if (peer)
157 		sock_hold(peer);
158 	unix_state_runlock(s);
159 	return peer;
160 }
161 
unix_release_addr(struct unix_address * addr)162 extern inline void unix_release_addr(struct unix_address *addr)
163 {
164 	if (atomic_dec_and_test(&addr->refcnt))
165 		kfree(addr);
166 }
167 
168 /*
169  *	Check unix socket name:
170  *		- should be not zero length.
171  *	        - if started by not zero, should be NULL terminated (FS object)
172  *		- if started by zero, it is abstract name.
173  */
174 
unix_mkname(struct sockaddr_un * sunaddr,int len,unsigned * hashp)175 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
176 {
177 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
178 		return -EINVAL;
179 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
180 		return -EINVAL;
181 	if (sunaddr->sun_path[0]) {
182 		((char *)sunaddr)[len]=0;
183 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
184 		return len;
185 	}
186 
187 	*hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
188 	return len;
189 }
190 
__unix_remove_socket(unix_socket * sk)191 static void __unix_remove_socket(unix_socket *sk)
192 {
193 	unix_socket **list = sk->protinfo.af_unix.list;
194 	if (list) {
195 		if (sk->next)
196 			sk->next->prev = sk->prev;
197 		if (sk->prev)
198 			sk->prev->next = sk->next;
199 		if (*list == sk)
200 			*list = sk->next;
201 		sk->protinfo.af_unix.list = NULL;
202 		sk->prev = NULL;
203 		sk->next = NULL;
204 		__sock_put(sk);
205 	}
206 }
207 
__unix_insert_socket(unix_socket ** list,unix_socket * sk)208 static void __unix_insert_socket(unix_socket **list, unix_socket *sk)
209 {
210 	BUG_TRAP(sk->protinfo.af_unix.list==NULL);
211 
212 	sk->protinfo.af_unix.list = list;
213 	sk->prev = NULL;
214 	sk->next = *list;
215 	if (*list)
216 		(*list)->prev = sk;
217 	*list=sk;
218 	sock_hold(sk);
219 }
220 
unix_remove_socket(unix_socket * sk)221 static inline void unix_remove_socket(unix_socket *sk)
222 {
223 	write_lock(&unix_table_lock);
224 	__unix_remove_socket(sk);
225 	write_unlock(&unix_table_lock);
226 }
227 
unix_insert_socket(unix_socket ** list,unix_socket * sk)228 static inline void unix_insert_socket(unix_socket **list, unix_socket *sk)
229 {
230 	write_lock(&unix_table_lock);
231 	__unix_insert_socket(list, sk);
232 	write_unlock(&unix_table_lock);
233 }
234 
__unix_find_socket_byname(struct sockaddr_un * sunname,int len,int type,unsigned hash)235 static unix_socket *__unix_find_socket_byname(struct sockaddr_un *sunname,
236 					      int len, int type, unsigned hash)
237 {
238 	unix_socket *s;
239 
240 	for (s=unix_socket_table[hash^type]; s; s=s->next) {
241 		if(s->protinfo.af_unix.addr->len==len &&
242 		   memcmp(s->protinfo.af_unix.addr->name, sunname, len) == 0)
243 			return s;
244 	}
245 	return NULL;
246 }
247 
248 static inline unix_socket *
unix_find_socket_byname(struct sockaddr_un * sunname,int len,int type,unsigned hash)249 unix_find_socket_byname(struct sockaddr_un *sunname,
250 			int len, int type, unsigned hash)
251 {
252 	unix_socket *s;
253 
254 	read_lock(&unix_table_lock);
255 	s = __unix_find_socket_byname(sunname, len, type, hash);
256 	if (s)
257 		sock_hold(s);
258 	read_unlock(&unix_table_lock);
259 	return s;
260 }
261 
unix_find_socket_byinode(struct inode * i)262 static unix_socket *unix_find_socket_byinode(struct inode *i)
263 {
264 	unix_socket *s;
265 
266 	read_lock(&unix_table_lock);
267 	for (s=unix_socket_table[i->i_ino & (UNIX_HASH_SIZE-1)]; s; s=s->next)
268 	{
269 		struct dentry *dentry = s->protinfo.af_unix.dentry;
270 
271 		if(dentry && dentry->d_inode == i)
272 		{
273 			sock_hold(s);
274 			break;
275 		}
276 	}
277 	read_unlock(&unix_table_lock);
278 	return s;
279 }
280 
unix_writable(struct sock * sk)281 static inline int unix_writable(struct sock *sk)
282 {
283 	return ((atomic_read(&sk->wmem_alloc)<<2) <= sk->sndbuf);
284 }
285 
unix_write_space(struct sock * sk)286 static void unix_write_space(struct sock *sk)
287 {
288 	read_lock(&sk->callback_lock);
289 	if (unix_writable(sk)) {
290 		if (sk->sleep && waitqueue_active(sk->sleep))
291 			wake_up_interruptible(sk->sleep);
292 		sk_wake_async(sk, 2, POLL_OUT);
293 	}
294 	read_unlock(&sk->callback_lock);
295 }
296 
297 /* When dgram socket disconnects (or changes its peer), we clear its receive
298  * queue of packets arrived from previous peer. First, it allows to do
299  * flow control based only on wmem_alloc; second, sk connected to peer
300  * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)301 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
302 {
303 	if (skb_queue_len(&sk->receive_queue)) {
304 		skb_queue_purge(&sk->receive_queue);
305 		wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
306 
307 		/* If one link of bidirectional dgram pipe is disconnected,
308 		 * we signal error. Messages are lost. Do not make this,
309 		 * when peer was not connected to us.
310 		 */
311 		if (!other->dead && unix_peer(other) == sk) {
312 			other->err = ECONNRESET;
313 			other->error_report(other);
314 		}
315 	}
316 }
317 
unix_sock_destructor(struct sock * sk)318 static void unix_sock_destructor(struct sock *sk)
319 {
320 	skb_queue_purge(&sk->receive_queue);
321 
322 	BUG_TRAP(atomic_read(&sk->wmem_alloc) == 0);
323 	BUG_TRAP(sk->protinfo.af_unix.list==NULL);
324 	BUG_TRAP(sk->socket==NULL);
325 	if (sk->dead==0) {
326 		printk("Attempt to release alive unix socket: %p\n", sk);
327 		return;
328 	}
329 
330 	if (sk->protinfo.af_unix.addr)
331 		unix_release_addr(sk->protinfo.af_unix.addr);
332 
333 	atomic_dec(&unix_nr_socks);
334 #ifdef UNIX_REFCNT_DEBUG
335 	printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
336 #endif
337 	MOD_DEC_USE_COUNT;
338 }
339 
unix_release_sock(unix_socket * sk,int embrion)340 static int unix_release_sock (unix_socket *sk, int embrion)
341 {
342 	struct dentry *dentry;
343 	struct vfsmount *mnt;
344 	unix_socket *skpair;
345 	struct sk_buff *skb;
346 	int state;
347 
348 	unix_remove_socket(sk);
349 
350 	/* Clear state */
351 	unix_state_wlock(sk);
352 	sock_orphan(sk);
353 	sk->shutdown = SHUTDOWN_MASK;
354 	dentry = sk->protinfo.af_unix.dentry;
355 	sk->protinfo.af_unix.dentry=NULL;
356 	mnt = sk->protinfo.af_unix.mnt;
357 	sk->protinfo.af_unix.mnt=NULL;
358 	state = sk->state;
359 	sk->state = TCP_CLOSE;
360 	unix_state_wunlock(sk);
361 
362 	wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
363 
364 	skpair=unix_peer(sk);
365 
366 	if (skpair!=NULL) {
367 		if (sk->type==SOCK_STREAM) {
368 			unix_state_wlock(skpair);
369 			skpair->shutdown=SHUTDOWN_MASK;	/* No more writes*/
370 			if (!skb_queue_empty(&sk->receive_queue) || embrion)
371 				skpair->err = ECONNRESET;
372 			unix_state_wunlock(skpair);
373 			skpair->state_change(skpair);
374 			read_lock(&skpair->callback_lock);
375 			sk_wake_async(skpair,1,POLL_HUP);
376 			read_unlock(&skpair->callback_lock);
377 			yield(); /* let the other side wake up */
378 		}
379 		sock_put(skpair); /* It may now die */
380 		unix_peer(sk) = NULL;
381 	}
382 
383 	/* Try to flush out this socket. Throw out buffers at least */
384 
385 	while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
386 	{
387 		if (state==TCP_LISTEN)
388 			unix_release_sock(skb->sk, 1);
389 		/* passed fds are erased in the kfree_skb hook	      */
390 		kfree_skb(skb);
391 	}
392 
393 	if (dentry) {
394 		dput(dentry);
395 		mntput(mnt);
396 	}
397 
398 	sock_put(sk);
399 
400 	/* ---- Socket is dead now and most probably destroyed ---- */
401 
402 	/*
403 	 * Fixme: BSD difference: In BSD all sockets connected to use get
404 	 *	  ECONNRESET and we die on the spot. In Linux we behave
405 	 *	  like files and pipes do and wait for the last
406 	 *	  dereference.
407 	 *
408 	 * Can't we simply set sock->err?
409 	 *
410 	 *	  What the above comment does talk about? --ANK(980817)
411 	 */
412 
413 	if (atomic_read(&unix_tot_inflight))
414 		unix_gc();		/* Garbage collect fds */
415 
416 	return 0;
417 }
418 
unix_listen(struct socket * sock,int backlog)419 static int unix_listen(struct socket *sock, int backlog)
420 {
421 	int err;
422 	struct sock *sk = sock->sk;
423 
424 	err = -EOPNOTSUPP;
425 	if (sock->type!=SOCK_STREAM)
426 		goto out;			/* Only stream sockets accept */
427 	err = -EINVAL;
428 	if (!sk->protinfo.af_unix.addr)
429 		goto out;			/* No listens on an unbound socket */
430 	unix_state_wlock(sk);
431 	if (sk->state != TCP_CLOSE && sk->state != TCP_LISTEN)
432 		goto out_unlock;
433 	if (backlog > sk->max_ack_backlog)
434 		wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
435 	sk->max_ack_backlog=backlog;
436 	sk->state=TCP_LISTEN;
437 	/* set credentials so connect can copy them */
438 	sk->peercred.pid = current->pid;
439 	sk->peercred.uid = current->euid;
440 	sk->peercred.gid = current->egid;
441 	err = 0;
442 
443 out_unlock:
444 	unix_state_wunlock(sk);
445 out:
446 	return err;
447 }
448 
449 extern struct proto_ops unix_stream_ops;
450 extern struct proto_ops unix_dgram_ops;
451 
unix_create1(struct socket * sock)452 static struct sock * unix_create1(struct socket *sock)
453 {
454 	struct sock *sk;
455 
456 	if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
457 		return NULL;
458 
459 	MOD_INC_USE_COUNT;
460 	sk = sk_alloc(PF_UNIX, GFP_KERNEL, 1);
461 	if (!sk) {
462 		MOD_DEC_USE_COUNT;
463 		return NULL;
464 	}
465 
466 	atomic_inc(&unix_nr_socks);
467 
468 	sock_init_data(sock,sk);
469 
470 	sk->write_space		=	unix_write_space;
471 
472 	sk->max_ack_backlog = sysctl_unix_max_dgram_qlen;
473 	sk->destruct = unix_sock_destructor;
474 	sk->protinfo.af_unix.dentry=NULL;
475 	sk->protinfo.af_unix.mnt=NULL;
476 	sk->protinfo.af_unix.lock = RW_LOCK_UNLOCKED;
477 	atomic_set(&sk->protinfo.af_unix.inflight, sock ? 0 : -1);
478 	init_MUTEX(&sk->protinfo.af_unix.readsem);/* single task reading lock */
479 	init_waitqueue_head(&sk->protinfo.af_unix.peer_wait);
480 	sk->protinfo.af_unix.list=NULL;
481 	unix_insert_socket(&unix_sockets_unbound, sk);
482 
483 	return sk;
484 }
485 
unix_create(struct socket * sock,int protocol)486 static int unix_create(struct socket *sock, int protocol)
487 {
488 	if (protocol && protocol != PF_UNIX)
489 		return -EPROTONOSUPPORT;
490 
491 	sock->state = SS_UNCONNECTED;
492 
493 	switch (sock->type) {
494 	case SOCK_STREAM:
495 		sock->ops = &unix_stream_ops;
496 		break;
497 		/*
498 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
499 		 *	nothing uses it.
500 		 */
501 	case SOCK_RAW:
502 		sock->type=SOCK_DGRAM;
503 	case SOCK_DGRAM:
504 		sock->ops = &unix_dgram_ops;
505 		break;
506 	default:
507 		return -ESOCKTNOSUPPORT;
508 	}
509 
510 	return unix_create1(sock) ? 0 : -ENOMEM;
511 }
512 
unix_release(struct socket * sock)513 static int unix_release(struct socket *sock)
514 {
515 	unix_socket *sk = sock->sk;
516 
517 	if (!sk)
518 		return 0;
519 
520 	sock->sk = NULL;
521 
522 	return unix_release_sock (sk, 0);
523 }
524 
unix_autobind(struct socket * sock)525 static int unix_autobind(struct socket *sock)
526 {
527 	struct sock *sk = sock->sk;
528 	static u32 ordernum = 1;
529 	struct unix_address * addr;
530 	int err;
531 
532 	down(&sk->protinfo.af_unix.readsem);
533 
534 	err = 0;
535 	if (sk->protinfo.af_unix.addr)
536 		goto out;
537 
538 	err = -ENOMEM;
539 	addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
540 	if (!addr)
541 		goto out;
542 
543 	memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
544 	addr->name->sun_family = AF_UNIX;
545 	atomic_set(&addr->refcnt, 1);
546 
547 retry:
548 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
549 	addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
550 
551 	write_lock(&unix_table_lock);
552 	ordernum = (ordernum+1)&0xFFFFF;
553 
554 	if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
555 				      addr->hash)) {
556 		write_unlock(&unix_table_lock);
557 		/* Sanity yield. It is unusual case, but yet... */
558 		if (!(ordernum&0xFF))
559 			yield();
560 		goto retry;
561 	}
562 	addr->hash ^= sk->type;
563 
564 	__unix_remove_socket(sk);
565 	sk->protinfo.af_unix.addr = addr;
566 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
567 	write_unlock(&unix_table_lock);
568 	err = 0;
569 
570 out:
571 	up(&sk->protinfo.af_unix.readsem);
572 	return err;
573 }
574 
unix_find_other(struct sockaddr_un * sunname,int len,int type,unsigned hash,int * error)575 static unix_socket *unix_find_other(struct sockaddr_un *sunname, int len,
576 				    int type, unsigned hash, int *error)
577 {
578 	unix_socket *u;
579 	struct nameidata nd;
580 	int err = 0;
581 
582 	if (sunname->sun_path[0]) {
583 		if (path_init(sunname->sun_path,
584 			      LOOKUP_POSITIVE|LOOKUP_FOLLOW, &nd))
585 			err = path_walk(sunname->sun_path, &nd);
586 		if (err)
587 			goto fail;
588 		err = permission(nd.dentry->d_inode,MAY_WRITE);
589 		if (err)
590 			goto put_fail;
591 
592 		err = -ECONNREFUSED;
593 		if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
594 			goto put_fail;
595 		u=unix_find_socket_byinode(nd.dentry->d_inode);
596 		if (!u)
597 			goto put_fail;
598 
599 		if (u->type == type)
600 			UPDATE_ATIME(nd.dentry->d_inode);
601 
602 		path_release(&nd);
603 
604 		err=-EPROTOTYPE;
605 		if (u->type != type) {
606 			sock_put(u);
607 			goto fail;
608 		}
609 	} else {
610 		err = -ECONNREFUSED;
611 		u=unix_find_socket_byname(sunname, len, type, hash);
612 		if (u) {
613 			struct dentry *dentry;
614 			dentry = u->protinfo.af_unix.dentry;
615 			if (dentry)
616 				UPDATE_ATIME(dentry->d_inode);
617 		} else
618 			goto fail;
619 	}
620 	return u;
621 
622 put_fail:
623 	path_release(&nd);
624 fail:
625 	*error=err;
626 	return NULL;
627 }
628 
629 
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)630 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
631 {
632 	struct sock *sk = sock->sk;
633 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
634 	struct dentry * dentry = NULL;
635 	struct nameidata nd;
636 	int err;
637 	unsigned hash;
638 	struct unix_address *addr;
639 	unix_socket **list;
640 
641 	err = -EINVAL;
642 	if (sunaddr->sun_family != AF_UNIX)
643 		goto out;
644 
645 	if (addr_len==sizeof(short)) {
646 		err = unix_autobind(sock);
647 		goto out;
648 	}
649 
650 	err = unix_mkname(sunaddr, addr_len, &hash);
651 	if (err < 0)
652 		goto out;
653 	addr_len = err;
654 
655 	down(&sk->protinfo.af_unix.readsem);
656 
657 	err = -EINVAL;
658 	if (sk->protinfo.af_unix.addr)
659 		goto out_up;
660 
661 	err = -ENOMEM;
662 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
663 	if (!addr)
664 		goto out_up;
665 
666 	memcpy(addr->name, sunaddr, addr_len);
667 	addr->len = addr_len;
668 	addr->hash = hash^sk->type;
669 	atomic_set(&addr->refcnt, 1);
670 
671 	if (sunaddr->sun_path[0]) {
672 		unsigned int mode;
673 		err = 0;
674 		/*
675 		 * Get the parent directory, calculate the hash for last
676 		 * component.
677 		 */
678 		if (path_init(sunaddr->sun_path, LOOKUP_PARENT, &nd))
679 			err = path_walk(sunaddr->sun_path, &nd);
680 		if (err)
681 			goto out_mknod_parent;
682 		/*
683 		 * Yucky last component or no last component at all?
684 		 * (foo/., foo/.., /////)
685 		 */
686 		err = -EEXIST;
687 		if (nd.last_type != LAST_NORM)
688 			goto out_mknod;
689 		/*
690 		 * Lock the directory.
691 		 */
692 		down(&nd.dentry->d_inode->i_sem);
693 		/*
694 		 * Do the final lookup.
695 		 */
696 		dentry = lookup_hash(&nd.last, nd.dentry);
697 		err = PTR_ERR(dentry);
698 		if (IS_ERR(dentry))
699 			goto out_mknod_unlock;
700 		err = -ENOENT;
701 		/*
702 		 * Special case - lookup gave negative, but... we had foo/bar/
703 		 * From the vfs_mknod() POV we just have a negative dentry -
704 		 * all is fine. Let's be bastards - you had / on the end, you've
705 		 * been asking for (non-existent) directory. -ENOENT for you.
706 		 */
707 		if (nd.last.name[nd.last.len] && !dentry->d_inode)
708 			goto out_mknod_dput;
709 		/*
710 		 * All right, let's create it.
711 		 */
712 		mode = S_IFSOCK | (sock->inode->i_mode & ~current->fs->umask);
713 		err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
714 		if (err)
715 			goto out_mknod_dput;
716 		up(&nd.dentry->d_inode->i_sem);
717 		dput(nd.dentry);
718 		nd.dentry = dentry;
719 
720 		addr->hash = UNIX_HASH_SIZE;
721 	}
722 
723 	write_lock(&unix_table_lock);
724 
725 	if (!sunaddr->sun_path[0]) {
726 		err = -EADDRINUSE;
727 		if (__unix_find_socket_byname(sunaddr, addr_len,
728 					      sk->type, hash)) {
729 			unix_release_addr(addr);
730 			goto out_unlock;
731 		}
732 
733 		list = &unix_socket_table[addr->hash];
734 	} else {
735 		list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
736 		sk->protinfo.af_unix.dentry = nd.dentry;
737 		sk->protinfo.af_unix.mnt = nd.mnt;
738 	}
739 
740 	err = 0;
741 	__unix_remove_socket(sk);
742 	sk->protinfo.af_unix.addr = addr;
743 	__unix_insert_socket(list, sk);
744 
745 out_unlock:
746 	write_unlock(&unix_table_lock);
747 out_up:
748 	up(&sk->protinfo.af_unix.readsem);
749 out:
750 	return err;
751 
752 out_mknod_dput:
753 	dput(dentry);
754 out_mknod_unlock:
755 	up(&nd.dentry->d_inode->i_sem);
756 out_mknod:
757 	path_release(&nd);
758 out_mknod_parent:
759 	if (err==-EEXIST)
760 		err=-EADDRINUSE;
761 	unix_release_addr(addr);
762 	goto out_up;
763 }
764 
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)765 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
766 			      int alen, int flags)
767 {
768 	struct sock *sk = sock->sk;
769 	struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
770 	struct sock *other;
771 	unsigned hash;
772 	int err;
773 
774 	if (addr->sa_family != AF_UNSPEC) {
775 		err = unix_mkname(sunaddr, alen, &hash);
776 		if (err < 0)
777 			goto out;
778 		alen = err;
779 
780 		if (sock->passcred && !sk->protinfo.af_unix.addr &&
781 		    (err = unix_autobind(sock)) != 0)
782 			goto out;
783 
784 		other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
785 		if (!other)
786 			goto out;
787 
788 		unix_state_wlock(sk);
789 
790 		err = -EPERM;
791 		if (!unix_may_send(sk, other))
792 			goto out_unlock;
793 	} else {
794 		/*
795 		 *	1003.1g breaking connected state with AF_UNSPEC
796 		 */
797 		other = NULL;
798 		unix_state_wlock(sk);
799 	}
800 
801 	/*
802 	 * If it was connected, reconnect.
803 	 */
804 	if (unix_peer(sk)) {
805 		struct sock *old_peer = unix_peer(sk);
806 		unix_peer(sk)=other;
807 		unix_state_wunlock(sk);
808 
809 		if (other != old_peer)
810 			unix_dgram_disconnected(sk, old_peer);
811 		sock_put(old_peer);
812 	} else {
813 		unix_peer(sk)=other;
814 		unix_state_wunlock(sk);
815 	}
816  	return 0;
817 
818 out_unlock:
819 	unix_state_wunlock(sk);
820 	sock_put(other);
821 out:
822 	return err;
823 }
824 
unix_wait_for_peer(unix_socket * other,long timeo)825 static long unix_wait_for_peer(unix_socket *other, long timeo)
826 {
827 	int sched;
828 	DECLARE_WAITQUEUE(wait, current);
829 
830 	__set_current_state(TASK_INTERRUPTIBLE);
831 	add_wait_queue_exclusive(&other->protinfo.af_unix.peer_wait, &wait);
832 
833 	sched = (!other->dead &&
834 		 !(other->shutdown&RCV_SHUTDOWN) &&
835 		 skb_queue_len(&other->receive_queue) > other->max_ack_backlog);
836 
837 	unix_state_runlock(other);
838 
839 	if (sched)
840 		timeo = schedule_timeout(timeo);
841 
842 	__set_current_state(TASK_RUNNING);
843 	remove_wait_queue(&other->protinfo.af_unix.peer_wait, &wait);
844 	return timeo;
845 }
846 
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)847 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
848 			       int addr_len, int flags)
849 {
850 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
851 	struct sock *sk = sock->sk;
852 	struct sock *newsk = NULL;
853 	unix_socket *other = NULL;
854 	struct sk_buff *skb = NULL;
855 	unsigned hash;
856 	int st;
857 	int err;
858 	long timeo;
859 
860 	err = unix_mkname(sunaddr, addr_len, &hash);
861 	if (err < 0)
862 		goto out;
863 	addr_len = err;
864 
865 	if (sock->passcred && !sk->protinfo.af_unix.addr &&
866 	    (err = unix_autobind(sock)) != 0)
867 		goto out;
868 
869 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
870 
871 	/* First of all allocate resources.
872 	   If we will make it after state is locked,
873 	   we will have to recheck all again in any case.
874 	 */
875 
876 	err = -ENOMEM;
877 
878 	/* create new sock for complete connection */
879 	newsk = unix_create1(NULL);
880 	if (newsk == NULL)
881 		goto out;
882 
883 	/* Allocate skb for sending to listening sock */
884 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
885 	if (skb == NULL)
886 		goto out;
887 
888 restart:
889 	/*  Find listening sock. */
890 	other=unix_find_other(sunaddr, addr_len, sk->type, hash, &err);
891 	if (!other)
892 		goto out;
893 
894 	/* Latch state of peer */
895 	unix_state_rlock(other);
896 
897 	/* Apparently VFS overslept socket death. Retry. */
898 	if (other->dead) {
899 		unix_state_runlock(other);
900 		sock_put(other);
901 		goto restart;
902 	}
903 
904 	err = -ECONNREFUSED;
905 	if (other->state != TCP_LISTEN)
906 		goto out_unlock;
907 
908 	if (other->shutdown & RCV_SHUTDOWN)
909 		goto out_unlock;
910 
911 	if (skb_queue_len(&other->receive_queue) > other->max_ack_backlog) {
912 		err = -EAGAIN;
913 		if (!timeo)
914 			goto out_unlock;
915 
916 		timeo = unix_wait_for_peer(other, timeo);
917 
918 		err = sock_intr_errno(timeo);
919 		if (signal_pending(current))
920 			goto out;
921 		sock_put(other);
922 		goto restart;
923         }
924 
925 	/* Latch our state.
926 
927 	   It is tricky place. We need to grab write lock and cannot
928 	   drop lock on peer. It is dangerous because deadlock is
929 	   possible. Connect to self case and simultaneous
930 	   attempt to connect are eliminated by checking socket
931 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
932 	   check this before attempt to grab lock.
933 
934 	   Well, and we have to recheck the state after socket locked.
935 	 */
936 	st = sk->state;
937 
938 	switch (st) {
939 	case TCP_CLOSE:
940 		/* This is ok... continue with connect */
941 		break;
942 	case TCP_ESTABLISHED:
943 		/* Socket is already connected */
944 		err = -EISCONN;
945 		goto out_unlock;
946 	default:
947 		err = -EINVAL;
948 		goto out_unlock;
949 	}
950 
951 	unix_state_wlock(sk);
952 
953 	if (sk->state != st) {
954 		unix_state_wunlock(sk);
955 		unix_state_runlock(other);
956 		sock_put(other);
957 		goto restart;
958 	}
959 
960 	/* The way is open! Fastly set all the necessary fields... */
961 
962 	sock_hold(sk);
963 	unix_peer(newsk)=sk;
964 	newsk->state=TCP_ESTABLISHED;
965 	newsk->type=SOCK_STREAM;
966 	newsk->peercred.pid = current->pid;
967 	newsk->peercred.uid = current->euid;
968 	newsk->peercred.gid = current->egid;
969 	newsk->sleep = &newsk->protinfo.af_unix.peer_wait;
970 
971 	/* copy address information from listening to new sock*/
972 	if (other->protinfo.af_unix.addr)
973 	{
974 		atomic_inc(&other->protinfo.af_unix.addr->refcnt);
975 		newsk->protinfo.af_unix.addr=other->protinfo.af_unix.addr;
976 	}
977 	if (other->protinfo.af_unix.dentry) {
978 		newsk->protinfo.af_unix.dentry=dget(other->protinfo.af_unix.dentry);
979 		newsk->protinfo.af_unix.mnt=mntget(other->protinfo.af_unix.mnt);
980 	}
981 
982 	/* Set credentials */
983 	sk->peercred = other->peercred;
984 
985 	sock_hold(newsk);
986 	unix_peer(sk)=newsk;
987 	sock->state=SS_CONNECTED;
988 	sk->state=TCP_ESTABLISHED;
989 
990 	unix_state_wunlock(sk);
991 
992 	/* take ten and and send info to listening sock */
993 	spin_lock(&other->receive_queue.lock);
994 	__skb_queue_tail(&other->receive_queue,skb);
995 	/* Undo artificially decreased inflight after embrion
996 	 * is installed to listening socket. */
997 	atomic_inc(&newsk->protinfo.af_unix.inflight);
998 	spin_unlock(&other->receive_queue.lock);
999 	unix_state_runlock(other);
1000 	other->data_ready(other, 0);
1001 	sock_put(other);
1002 	return 0;
1003 
1004 out_unlock:
1005 	if (other)
1006 		unix_state_runlock(other);
1007 
1008 out:
1009 	if (skb)
1010 		kfree_skb(skb);
1011 	if (newsk)
1012 		unix_release_sock(newsk, 0);
1013 	if (other)
1014 		sock_put(other);
1015 	return err;
1016 }
1017 
unix_socketpair(struct socket * socka,struct socket * sockb)1018 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1019 {
1020 	struct sock *ska=socka->sk, *skb = sockb->sk;
1021 
1022 	/* Join our sockets back to back */
1023 	sock_hold(ska);
1024 	sock_hold(skb);
1025 	unix_peer(ska)=skb;
1026 	unix_peer(skb)=ska;
1027 	ska->peercred.pid = skb->peercred.pid = current->pid;
1028 	ska->peercred.uid = skb->peercred.uid = current->euid;
1029 	ska->peercred.gid = skb->peercred.gid = current->egid;
1030 
1031 	if (ska->type != SOCK_DGRAM)
1032 	{
1033 		ska->state=TCP_ESTABLISHED;
1034 		skb->state=TCP_ESTABLISHED;
1035 		socka->state=SS_CONNECTED;
1036 		sockb->state=SS_CONNECTED;
1037 	}
1038 	return 0;
1039 }
1040 
unix_accept(struct socket * sock,struct socket * newsock,int flags)1041 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1042 {
1043 	unix_socket *sk = sock->sk;
1044 	unix_socket *tsk;
1045 	struct sk_buff *skb;
1046 	int err;
1047 
1048 	err = -EOPNOTSUPP;
1049 	if (sock->type!=SOCK_STREAM)
1050 		goto out;
1051 
1052 	err = -EINVAL;
1053 	if (sk->state!=TCP_LISTEN)
1054 		goto out;
1055 
1056 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1057 	 * so that no locks are necessary.
1058 	 */
1059 
1060 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1061 	if (!skb) {
1062 		/* This means receive shutdown. */
1063 		if (err == 0)
1064 			err = -EINVAL;
1065 		goto out;
1066 	}
1067 
1068 	tsk = skb->sk;
1069 	skb_free_datagram(sk, skb);
1070 	wake_up_interruptible(&sk->protinfo.af_unix.peer_wait);
1071 
1072 	/* attach accepted sock to socket */
1073 	unix_state_wlock(tsk);
1074 	newsock->state = SS_CONNECTED;
1075 	sock_graft(tsk, newsock);
1076 	unix_state_wunlock(tsk);
1077 	return 0;
1078 
1079 out:
1080 	return err;
1081 }
1082 
1083 
unix_getname(struct socket * sock,struct sockaddr * uaddr,int * uaddr_len,int peer)1084 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1085 {
1086 	struct sock *sk = sock->sk;
1087 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1088 	int err = 0;
1089 
1090 	if (peer) {
1091 		sk = unix_peer_get(sk);
1092 
1093 		err = -ENOTCONN;
1094 		if (!sk)
1095 			goto out;
1096 		err = 0;
1097 	} else {
1098 		sock_hold(sk);
1099 	}
1100 
1101 	unix_state_rlock(sk);
1102 	if (!sk->protinfo.af_unix.addr)	{
1103 		sunaddr->sun_family = AF_UNIX;
1104 		sunaddr->sun_path[0] = 0;
1105 		*uaddr_len = sizeof(short);
1106 	} else {
1107 		struct unix_address *addr = sk->protinfo.af_unix.addr;
1108 
1109 		*uaddr_len = addr->len;
1110 		memcpy(sunaddr, addr->name, *uaddr_len);
1111 	}
1112 	unix_state_runlock(sk);
1113 	sock_put(sk);
1114 out:
1115 	return err;
1116 }
1117 
unix_detach_fds(struct scm_cookie * scm,struct sk_buff * skb)1118 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1119 {
1120 	int i;
1121 
1122 	scm->fp = UNIXCB(skb).fp;
1123 	skb->destructor = sock_wfree;
1124 	UNIXCB(skb).fp = NULL;
1125 
1126 	for (i=scm->fp->count-1; i>=0; i--)
1127 		unix_notinflight(scm->fp->fp[i]);
1128 }
1129 
unix_destruct_fds(struct sk_buff * skb)1130 static void unix_destruct_fds(struct sk_buff *skb)
1131 {
1132 	struct scm_cookie scm;
1133 	memset(&scm, 0, sizeof(scm));
1134 	unix_detach_fds(&scm, skb);
1135 
1136 	/* Alas, it calls VFS */
1137 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1138 	scm_destroy(&scm);
1139 	sock_wfree(skb);
1140 }
1141 
unix_attach_fds(struct scm_cookie * scm,struct sk_buff * skb)1142 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1143 {
1144 	int i;
1145 	for (i=scm->fp->count-1; i>=0; i--)
1146 		unix_inflight(scm->fp->fp[i]);
1147 	UNIXCB(skb).fp = scm->fp;
1148 	skb->destructor = unix_destruct_fds;
1149 	scm->fp = NULL;
1150 }
1151 
1152 /*
1153  *	Send AF_UNIX data.
1154  */
1155 
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,int len,struct scm_cookie * scm)1156 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, int len,
1157 			      struct scm_cookie *scm)
1158 {
1159 	struct sock *sk = sock->sk;
1160 	struct sockaddr_un *sunaddr=msg->msg_name;
1161 	unix_socket *other = NULL;
1162 	int namelen = 0; /* fake GCC */
1163 	int err;
1164 	unsigned hash;
1165 	struct sk_buff *skb;
1166 	long timeo;
1167 
1168 	wait_for_unix_gc();
1169 
1170 	err = -EOPNOTSUPP;
1171 	if (msg->msg_flags&MSG_OOB)
1172 		goto out;
1173 
1174 	if (msg->msg_namelen) {
1175 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1176 		if (err < 0)
1177 			goto out;
1178 		namelen = err;
1179 	} else {
1180 		sunaddr = NULL;
1181 		err = -ENOTCONN;
1182 		other = unix_peer_get(sk);
1183 		if (!other)
1184 			goto out;
1185 	}
1186 
1187 	if (sock->passcred && !sk->protinfo.af_unix.addr &&
1188 	    (err = unix_autobind(sock)) != 0)
1189 		goto out;
1190 
1191 	err = -EMSGSIZE;
1192 	if ((unsigned)len > sk->sndbuf - 32)
1193 		goto out;
1194 
1195 	skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1196 	if (skb==NULL)
1197 		goto out;
1198 
1199 	memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred));
1200 	if (scm->fp)
1201 		unix_attach_fds(scm, skb);
1202 
1203 	skb->h.raw = skb->data;
1204 	err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1205 	if (err)
1206 		goto out_free;
1207 
1208 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1209 
1210 restart:
1211 	if (!other) {
1212 		err = -ECONNRESET;
1213 		if (sunaddr == NULL)
1214 			goto out_free;
1215 
1216 		other = unix_find_other(sunaddr, namelen, sk->type, hash, &err);
1217 		if (other==NULL)
1218 			goto out_free;
1219 	}
1220 
1221 	unix_state_rlock(other);
1222 	err = -EPERM;
1223 	if (!unix_may_send(sk, other))
1224 		goto out_unlock;
1225 
1226 	if (other->dead) {
1227 		/*
1228 		 *	Check with 1003.1g - what should
1229 		 *	datagram error
1230 		 */
1231 		unix_state_runlock(other);
1232 		sock_put(other);
1233 
1234 		err = 0;
1235 		unix_state_wlock(sk);
1236 		if (unix_peer(sk) == other) {
1237 			unix_peer(sk)=NULL;
1238 			unix_state_wunlock(sk);
1239 
1240 			unix_dgram_disconnected(sk, other);
1241 			sock_put(other);
1242 			err = -ECONNREFUSED;
1243 		} else {
1244 			unix_state_wunlock(sk);
1245 		}
1246 
1247 		other = NULL;
1248 		if (err)
1249 			goto out_free;
1250 		goto restart;
1251 	}
1252 
1253 	err = -EPIPE;
1254 	if (other->shutdown&RCV_SHUTDOWN)
1255 		goto out_unlock;
1256 
1257 	if (unix_peer(other) != sk &&
1258 	    skb_queue_len(&other->receive_queue) > other->max_ack_backlog) {
1259 		if (!timeo) {
1260 			err = -EAGAIN;
1261 			goto out_unlock;
1262 		}
1263 
1264 		timeo = unix_wait_for_peer(other, timeo);
1265 
1266 		err = sock_intr_errno(timeo);
1267 		if (signal_pending(current))
1268 			goto out_free;
1269 
1270 		goto restart;
1271 	}
1272 
1273 	skb_queue_tail(&other->receive_queue, skb);
1274 	unix_state_runlock(other);
1275 	other->data_ready(other, len);
1276 	sock_put(other);
1277 	return len;
1278 
1279 out_unlock:
1280 	unix_state_runlock(other);
1281 out_free:
1282 	kfree_skb(skb);
1283 out:
1284 	if (other)
1285 		sock_put(other);
1286 	return err;
1287 }
1288 
1289 
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,int len,struct scm_cookie * scm)1290 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
1291 			       struct scm_cookie *scm)
1292 {
1293 	struct sock *sk = sock->sk;
1294 	unix_socket *other = NULL;
1295 	struct sockaddr_un *sunaddr=msg->msg_name;
1296 	int err,size;
1297 	struct sk_buff *skb;
1298 	int sent=0;
1299 
1300 	wait_for_unix_gc();
1301 
1302 	err = -EOPNOTSUPP;
1303 	if (msg->msg_flags&MSG_OOB)
1304 		goto out_err;
1305 
1306 	if (msg->msg_namelen) {
1307 		err = (sk->state==TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP);
1308 		goto out_err;
1309 	} else {
1310 		sunaddr = NULL;
1311 		err = -ENOTCONN;
1312 		other = unix_peer_get(sk);
1313 		if (!other)
1314 			goto out_err;
1315 	}
1316 
1317 	if (sk->shutdown&SEND_SHUTDOWN)
1318 		goto pipe_err;
1319 
1320 	while(sent < len)
1321 	{
1322 		/*
1323 		 *	Optimisation for the fact that under 0.01% of X messages typically
1324 		 *	need breaking up.
1325 		 */
1326 
1327 		size=len-sent;
1328 
1329 		/* Keep two messages in the pipe so it schedules better */
1330 		if (size > sk->sndbuf/2 - 64)
1331 			size = sk->sndbuf/2 - 64;
1332 
1333 		if (size > SKB_MAX_ALLOC)
1334 			size = SKB_MAX_ALLOC;
1335 
1336 		/*
1337 		 *	Grab a buffer
1338 		 */
1339 
1340 		skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1341 
1342 		if (skb==NULL)
1343 			goto out_err;
1344 
1345 		/*
1346 		 *	If you pass two values to the sock_alloc_send_skb
1347 		 *	it tries to grab the large buffer with GFP_NOFS
1348 		 *	(which can fail easily), and if it fails grab the
1349 		 *	fallback size buffer which is under a page and will
1350 		 *	succeed. [Alan]
1351 		 */
1352 		size = min_t(int, size, skb_tailroom(skb));
1353 
1354 		memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred));
1355 		if (scm->fp)
1356 			unix_attach_fds(scm, skb);
1357 
1358 		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1359 			kfree_skb(skb);
1360 			goto out_err;
1361 		}
1362 
1363 		unix_state_rlock(other);
1364 
1365 		if (other->dead || (other->shutdown & RCV_SHUTDOWN))
1366 			goto pipe_err_free;
1367 
1368 		skb_queue_tail(&other->receive_queue, skb);
1369 		unix_state_runlock(other);
1370 		other->data_ready(other, size);
1371 		sent+=size;
1372 	}
1373 	sock_put(other);
1374 	return sent;
1375 
1376 pipe_err_free:
1377 	unix_state_runlock(other);
1378 	kfree_skb(skb);
1379 pipe_err:
1380 	if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1381 		send_sig(SIGPIPE,current,0);
1382 	err = -EPIPE;
1383 out_err:
1384         if (other)
1385 		sock_put(other);
1386 	return sent ? : err;
1387 }
1388 
unix_copy_addr(struct msghdr * msg,struct sock * sk)1389 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1390 {
1391 	msg->msg_namelen = 0;
1392 	if (sk->protinfo.af_unix.addr) {
1393 		msg->msg_namelen=sk->protinfo.af_unix.addr->len;
1394 		memcpy(msg->msg_name,
1395 		       sk->protinfo.af_unix.addr->name,
1396 		       sk->protinfo.af_unix.addr->len);
1397 	}
1398 }
1399 
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,int size,int flags,struct scm_cookie * scm)1400 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int size,
1401 			      int flags, struct scm_cookie *scm)
1402 {
1403 	struct sock *sk = sock->sk;
1404 	int noblock = flags & MSG_DONTWAIT;
1405 	struct sk_buff *skb;
1406 	int err;
1407 
1408 	err = -EOPNOTSUPP;
1409 	if (flags&MSG_OOB)
1410 		goto out;
1411 
1412 	msg->msg_namelen = 0;
1413 
1414 	down(&sk->protinfo.af_unix.readsem);
1415 
1416 	skb = skb_recv_datagram(sk, flags, noblock, &err);
1417 	if (!skb)
1418 		goto out_unlock;
1419 
1420 	wake_up_interruptible(&sk->protinfo.af_unix.peer_wait);
1421 
1422 	if (msg->msg_name)
1423 		unix_copy_addr(msg, skb->sk);
1424 
1425 	if (size > skb->len)
1426 		size = skb->len;
1427 	else if (size < skb->len)
1428 		msg->msg_flags |= MSG_TRUNC;
1429 
1430 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1431 	if (err)
1432 		goto out_free;
1433 
1434 	scm->creds = *UNIXCREDS(skb);
1435 
1436 	if (!(flags & MSG_PEEK))
1437 	{
1438 		if (UNIXCB(skb).fp)
1439 			unix_detach_fds(scm, skb);
1440 	}
1441 	else
1442 	{
1443 		/* It is questionable: on PEEK we could:
1444 		   - do not return fds - good, but too simple 8)
1445 		   - return fds, and do not return them on read (old strategy,
1446 		     apparently wrong)
1447 		   - clone fds (I choosed it for now, it is the most universal
1448 		     solution)
1449 
1450 	           POSIX 1003.1g does not actually define this clearly
1451 	           at all. POSIX 1003.1g doesn't define a lot of things
1452 	           clearly however!
1453 
1454 		*/
1455 		if (UNIXCB(skb).fp)
1456 			scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1457 	}
1458 	err = size;
1459 
1460 out_free:
1461 	skb_free_datagram(sk,skb);
1462 out_unlock:
1463 	up(&sk->protinfo.af_unix.readsem);
1464 out:
1465 	return err;
1466 }
1467 
1468 /*
1469  *	Sleep until data has arrive. But check for races..
1470  */
1471 
unix_stream_data_wait(unix_socket * sk,long timeo)1472 static long unix_stream_data_wait(unix_socket * sk, long timeo)
1473 {
1474 	DECLARE_WAITQUEUE(wait, current);
1475 
1476 	unix_state_rlock(sk);
1477 
1478 	add_wait_queue(sk->sleep, &wait);
1479 
1480 	for (;;) {
1481 		set_current_state(TASK_INTERRUPTIBLE);
1482 
1483 		if (skb_queue_len(&sk->receive_queue) ||
1484 		    sk->err ||
1485 		    (sk->shutdown & RCV_SHUTDOWN) ||
1486 		    signal_pending(current) ||
1487 		    !timeo)
1488 			break;
1489 
1490 		set_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
1491 		unix_state_runlock(sk);
1492 		timeo = schedule_timeout(timeo);
1493 		unix_state_rlock(sk);
1494 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
1495 	}
1496 
1497 	__set_current_state(TASK_RUNNING);
1498 	remove_wait_queue(sk->sleep, &wait);
1499 	unix_state_runlock(sk);
1500 	return timeo;
1501 }
1502 
1503 
1504 
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,int size,int flags,struct scm_cookie * scm)1505 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size,
1506 			       int flags, struct scm_cookie *scm)
1507 {
1508 	struct sock *sk = sock->sk;
1509 	struct sockaddr_un *sunaddr=msg->msg_name;
1510 	int copied = 0;
1511 	int check_creds = 0;
1512 	int target;
1513 	int err = 0;
1514 	long timeo;
1515 
1516 	err = -EINVAL;
1517 	if (sk->state != TCP_ESTABLISHED)
1518 		goto out;
1519 
1520 	err = -EOPNOTSUPP;
1521 	if (flags&MSG_OOB)
1522 		goto out;
1523 
1524 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1525 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1526 
1527 	msg->msg_namelen = 0;
1528 
1529 	/* Lock the socket to prevent queue disordering
1530 	 * while sleeps in memcpy_tomsg
1531 	 */
1532 
1533 	down(&sk->protinfo.af_unix.readsem);
1534 
1535 	do
1536 	{
1537 		int chunk;
1538 		struct sk_buff *skb;
1539 
1540 		skb=skb_dequeue(&sk->receive_queue);
1541 		if (skb==NULL)
1542 		{
1543 			if (copied >= target)
1544 				break;
1545 
1546 			/*
1547 			 *	POSIX 1003.1g mandates this order.
1548 			 */
1549 
1550 			if ((err = sock_error(sk)) != 0)
1551 				break;
1552 			if (sk->shutdown & RCV_SHUTDOWN)
1553 				break;
1554 			err = -EAGAIN;
1555 			if (!timeo)
1556 				break;
1557 			up(&sk->protinfo.af_unix.readsem);
1558 
1559 			timeo = unix_stream_data_wait(sk, timeo);
1560 
1561 			if (signal_pending(current)) {
1562 				err = sock_intr_errno(timeo);
1563 				goto out;
1564 			}
1565 			down(&sk->protinfo.af_unix.readsem);
1566 			continue;
1567 		}
1568 
1569 		if (check_creds) {
1570 			/* Never glue messages from different writers */
1571 			if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) {
1572 				skb_queue_head(&sk->receive_queue, skb);
1573 				break;
1574 			}
1575 		} else {
1576 			/* Copy credentials */
1577 			scm->creds = *UNIXCREDS(skb);
1578 			check_creds = 1;
1579 		}
1580 
1581 		/* Copy address just once */
1582 		if (sunaddr)
1583 		{
1584 			unix_copy_addr(msg, skb->sk);
1585 			sunaddr = NULL;
1586 		}
1587 
1588 		chunk = min_t(unsigned int, skb->len, size);
1589 		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1590 			skb_queue_head(&sk->receive_queue, skb);
1591 			if (copied == 0)
1592 				copied = -EFAULT;
1593 			break;
1594 		}
1595 		copied += chunk;
1596 		size -= chunk;
1597 
1598 		/* Mark read part of skb as used */
1599 		if (!(flags & MSG_PEEK))
1600 		{
1601 			skb_pull(skb, chunk);
1602 
1603 			if (UNIXCB(skb).fp)
1604 				unix_detach_fds(scm, skb);
1605 
1606 			/* put the skb back if we didn't use it up.. */
1607 			if (skb->len)
1608 			{
1609 				skb_queue_head(&sk->receive_queue, skb);
1610 				break;
1611 			}
1612 
1613 			kfree_skb(skb);
1614 
1615 			if (scm->fp)
1616 				break;
1617 		}
1618 		else
1619 		{
1620 			/* It is questionable, see note in unix_dgram_recvmsg.
1621 			 */
1622 			if (UNIXCB(skb).fp)
1623 				scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1624 
1625 			/* put message back and return */
1626 			skb_queue_head(&sk->receive_queue, skb);
1627 			break;
1628 		}
1629 	} while (size);
1630 
1631 	up(&sk->protinfo.af_unix.readsem);
1632 out:
1633 	return copied ? : err;
1634 }
1635 
unix_shutdown(struct socket * sock,int mode)1636 static int unix_shutdown(struct socket *sock, int mode)
1637 {
1638 	struct sock *sk = sock->sk;
1639 	unix_socket *other;
1640 
1641 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1642 
1643 	if (mode) {
1644 		unix_state_wlock(sk);
1645 		sk->shutdown |= mode;
1646 		other=unix_peer(sk);
1647 		if (other)
1648 			sock_hold(other);
1649 		unix_state_wunlock(sk);
1650 		sk->state_change(sk);
1651 
1652 		if (other && sk->type == SOCK_STREAM) {
1653 			int peer_mode = 0;
1654 
1655 			if (mode&RCV_SHUTDOWN)
1656 				peer_mode |= SEND_SHUTDOWN;
1657 			if (mode&SEND_SHUTDOWN)
1658 				peer_mode |= RCV_SHUTDOWN;
1659 			unix_state_wlock(other);
1660 			other->shutdown |= peer_mode;
1661 			unix_state_wunlock(other);
1662 			other->state_change(other);
1663 			read_lock(&other->callback_lock);
1664 			if (peer_mode == SHUTDOWN_MASK)
1665 				sk_wake_async(other,1,POLL_HUP);
1666 			else if (peer_mode & RCV_SHUTDOWN)
1667 				sk_wake_async(other,1,POLL_IN);
1668 			read_unlock(&other->callback_lock);
1669 		}
1670 		if (other)
1671 			sock_put(other);
1672 	}
1673 	return 0;
1674 }
1675 
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)1676 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1677 {
1678 	struct sock *sk = sock->sk;
1679 	long amount=0;
1680 	int err;
1681 
1682 	switch(cmd)
1683 	{
1684 		case SIOCOUTQ:
1685 			amount = atomic_read(&sk->wmem_alloc);
1686 			err = put_user(amount, (int *)arg);
1687 			break;
1688 		case SIOCINQ:
1689 		{
1690 			struct sk_buff *skb;
1691 			if (sk->state==TCP_LISTEN) {
1692 				err = -EINVAL;
1693 				break;
1694 			}
1695 
1696 			spin_lock(&sk->receive_queue.lock);
1697 			if (sk->type == SOCK_STREAM) {
1698 				skb_queue_walk(&sk->receive_queue, skb)
1699 					amount += skb->len;
1700 			} else {
1701 				if((skb=skb_peek(&sk->receive_queue))!=NULL)
1702 					amount=skb->len;
1703 			}
1704 			spin_unlock(&sk->receive_queue.lock);
1705 			err = put_user(amount, (int *)arg);
1706 			break;
1707 		}
1708 
1709 		default:
1710 			err = dev_ioctl(cmd, (void *)arg);
1711 			break;
1712 	}
1713 	return err;
1714 }
1715 
unix_poll(struct file * file,struct socket * sock,poll_table * wait)1716 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1717 {
1718 	struct sock *sk = sock->sk;
1719 	unsigned int mask;
1720 
1721 	poll_wait(file, sk->sleep, wait);
1722 	mask = 0;
1723 
1724 	/* exceptional events? */
1725 	if (sk->err)
1726 		mask |= POLLERR;
1727 	if (sk->shutdown == SHUTDOWN_MASK)
1728 		mask |= POLLHUP;
1729 
1730 	/* readable? */
1731 	if (!skb_queue_empty(&sk->receive_queue) || (sk->shutdown&RCV_SHUTDOWN))
1732 		mask |= POLLIN | POLLRDNORM;
1733 
1734 	/* Connection-based need to check for termination and startup */
1735 	if (sk->type == SOCK_STREAM && sk->state==TCP_CLOSE)
1736 		mask |= POLLHUP;
1737 
1738 	/*
1739 	 * we set writable also when the other side has shut down the
1740 	 * connection. This prevents stuck sockets.
1741 	 */
1742 	if (unix_writable(sk))
1743 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1744 
1745 	return mask;
1746 }
1747 
1748 
1749 #ifdef CONFIG_PROC_FS
unix_read_proc(char * buffer,char ** start,off_t offset,int length,int * eof,void * data)1750 static int unix_read_proc(char *buffer, char **start, off_t offset,
1751 			  int length, int *eof, void *data)
1752 {
1753 	off_t pos=0;
1754 	off_t begin=0;
1755 	int len=0;
1756 	int i;
1757 	unix_socket *s;
1758 
1759 	len+= sprintf(buffer,"Num       RefCount Protocol Flags    Type St "
1760 	    "Inode Path\n");
1761 
1762 	read_lock(&unix_table_lock);
1763 	forall_unix_sockets (i,s)
1764 	{
1765 		unix_state_rlock(s);
1766 
1767 		len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5lu",
1768 			s,
1769 			atomic_read(&s->refcnt),
1770 			0,
1771 			s->state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1772 			s->type,
1773 			s->socket ?
1774 			(s->state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1775 			(s->state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1776 			sock_i_ino(s));
1777 
1778 		if (s->protinfo.af_unix.addr)
1779 		{
1780 			buffer[len++] = ' ';
1781 			memcpy(buffer+len, s->protinfo.af_unix.addr->name->sun_path,
1782 			       s->protinfo.af_unix.addr->len-sizeof(short));
1783 			if (!UNIX_ABSTRACT(s))
1784 				len--;
1785 			else
1786 				buffer[len] = '@';
1787 			len += s->protinfo.af_unix.addr->len - sizeof(short);
1788 		}
1789 		unix_state_runlock(s);
1790 
1791 		buffer[len++]='\n';
1792 
1793 		pos = begin + len;
1794 		if(pos<offset)
1795 		{
1796 			len=0;
1797 			begin=pos;
1798 		}
1799 		if(pos>offset+length)
1800 			goto done;
1801 	}
1802 	*eof = 1;
1803 done:
1804 	read_unlock(&unix_table_lock);
1805 	*start=buffer+(offset-begin);
1806 	len-=(offset-begin);
1807 	if(len>length)
1808 		len=length;
1809 	if (len < 0)
1810 		len = 0;
1811 	return len;
1812 }
1813 #endif
1814 
1815 struct proto_ops unix_stream_ops = {
1816 	family:		PF_UNIX,
1817 
1818 	release:	unix_release,
1819 	bind:		unix_bind,
1820 	connect:	unix_stream_connect,
1821 	socketpair:	unix_socketpair,
1822 	accept:		unix_accept,
1823 	getname:	unix_getname,
1824 	poll:		unix_poll,
1825 	ioctl:		unix_ioctl,
1826 	listen:		unix_listen,
1827 	shutdown:	unix_shutdown,
1828 	setsockopt:	sock_no_setsockopt,
1829 	getsockopt:	sock_no_getsockopt,
1830 	sendmsg:	unix_stream_sendmsg,
1831 	recvmsg:	unix_stream_recvmsg,
1832 	mmap:		sock_no_mmap,
1833 	sendpage:	sock_no_sendpage,
1834 };
1835 
1836 struct proto_ops unix_dgram_ops = {
1837 	family:		PF_UNIX,
1838 
1839 	release:	unix_release,
1840 	bind:		unix_bind,
1841 	connect:	unix_dgram_connect,
1842 	socketpair:	unix_socketpair,
1843 	accept:		sock_no_accept,
1844 	getname:	unix_getname,
1845 	poll:		datagram_poll,
1846 	ioctl:		unix_ioctl,
1847 	listen:		sock_no_listen,
1848 	shutdown:	unix_shutdown,
1849 	setsockopt:	sock_no_setsockopt,
1850 	getsockopt:	sock_no_getsockopt,
1851 	sendmsg:	unix_dgram_sendmsg,
1852 	recvmsg:	unix_dgram_recvmsg,
1853 	mmap:		sock_no_mmap,
1854 	sendpage:	sock_no_sendpage,
1855 };
1856 
1857 struct net_proto_family unix_family_ops = {
1858 	family:		PF_UNIX,
1859 	create:		unix_create
1860 };
1861 
1862 #ifdef CONFIG_SYSCTL
1863 extern void unix_sysctl_register(void);
1864 extern void unix_sysctl_unregister(void);
1865 #else
unix_sysctl_register(void)1866 static inline void unix_sysctl_register(void) {}
unix_sysctl_unregister(void)1867 static inline void unix_sysctl_unregister(void) {}
1868 #endif
1869 
1870 static char banner[] __initdata = KERN_INFO "NET4: Unix domain sockets 1.0/SMP for Linux NET4.0.\n";
1871 
af_unix_init(void)1872 static int __init af_unix_init(void)
1873 {
1874 	struct sk_buff *dummy_skb;
1875 
1876 	printk(banner);
1877 	if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb))
1878 	{
1879 		printk(KERN_CRIT "unix_proto_init: panic\n");
1880 		return -1;
1881 	}
1882 	sock_register(&unix_family_ops);
1883 #ifdef CONFIG_PROC_FS
1884 	create_proc_read_entry("net/unix", 0, 0, unix_read_proc, NULL);
1885 #endif
1886 	unix_sysctl_register();
1887 	return 0;
1888 }
1889 
af_unix_exit(void)1890 static void __exit af_unix_exit(void)
1891 {
1892 	sock_unregister(PF_UNIX);
1893 	unix_sysctl_unregister();
1894 	remove_proc_entry("net/unix", 0);
1895 }
1896 
1897 module_init(af_unix_init);
1898 module_exit(af_unix_exit);
1899 
1900 MODULE_LICENSE("GPL");
1901