1 /*
2  *  linux/fs/pipe.c
3  *
4  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
5  */
6 
7 #include <linux/mm.h>
8 #include <linux/file.h>
9 #include <linux/poll.h>
10 #include <linux/slab.h>
11 #include <linux/module.h>
12 #include <linux/init.h>
13 
14 #include <asm/uaccess.h>
15 #include <asm/ioctls.h>
16 
17 /*
18  * We use a start+len construction, which provides full use of the
19  * allocated memory.
20  * -- Florian Coosmann (FGC)
21  *
22  * Reads with count = 0 should always return 0.
23  * -- Julian Bradfield 1999-06-07.
24  */
25 
26 /* Drop the inode semaphore and wait for a pipe event, atomically */
pipe_wait(struct inode * inode)27 void pipe_wait(struct inode * inode)
28 {
29 	DECLARE_WAITQUEUE(wait, current);
30 	current->state = TASK_INTERRUPTIBLE;
31 	add_wait_queue(PIPE_WAIT(*inode), &wait);
32 	up(PIPE_SEM(*inode));
33 	schedule();
34 	remove_wait_queue(PIPE_WAIT(*inode), &wait);
35 	current->state = TASK_RUNNING;
36 	down(PIPE_SEM(*inode));
37 }
38 
39 static ssize_t
pipe_read(struct file * filp,char * buf,size_t count,loff_t * ppos)40 pipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
41 {
42 	struct inode *inode = filp->f_dentry->d_inode;
43 	ssize_t size, read, ret;
44 
45 	/* Seeks are not allowed on pipes.  */
46 	ret = -ESPIPE;
47 	read = 0;
48 	if (ppos != &filp->f_pos)
49 		goto out_nolock;
50 
51 	/* Always return 0 on null read.  */
52 	ret = 0;
53 	if (count == 0)
54 		goto out_nolock;
55 
56 	/* Get the pipe semaphore */
57 	ret = -ERESTARTSYS;
58 	if (down_interruptible(PIPE_SEM(*inode)))
59 		goto out_nolock;
60 
61 	if (PIPE_EMPTY(*inode)) {
62 do_more_read:
63 		ret = 0;
64 		if (!PIPE_WRITERS(*inode))
65 			goto out;
66 
67 		ret = -EAGAIN;
68 		if (filp->f_flags & O_NONBLOCK)
69 			goto out;
70 
71 		for (;;) {
72 			PIPE_WAITING_READERS(*inode)++;
73 			pipe_wait(inode);
74 			PIPE_WAITING_READERS(*inode)--;
75 			ret = -ERESTARTSYS;
76 			if (signal_pending(current))
77 				goto out;
78 			ret = 0;
79 			if (!PIPE_EMPTY(*inode))
80 				break;
81 			if (!PIPE_WRITERS(*inode))
82 				goto out;
83 		}
84 	}
85 
86 	/* Read what data is available.  */
87 	ret = -EFAULT;
88 	while (count > 0 && (size = PIPE_LEN(*inode))) {
89 		char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode);
90 		ssize_t chars = PIPE_MAX_RCHUNK(*inode);
91 
92 		if (chars > count)
93 			chars = count;
94 		if (chars > size)
95 			chars = size;
96 
97 		if (copy_to_user(buf, pipebuf, chars))
98 			goto out;
99 
100 		read += chars;
101 		PIPE_START(*inode) += chars;
102 		PIPE_START(*inode) &= (PIPE_SIZE - 1);
103 		PIPE_LEN(*inode) -= chars;
104 		count -= chars;
105 		buf += chars;
106 	}
107 
108 	/* Cache behaviour optimization */
109 	if (!PIPE_LEN(*inode))
110 		PIPE_START(*inode) = 0;
111 
112 	if (count && PIPE_WAITING_WRITERS(*inode) && !(filp->f_flags & O_NONBLOCK)) {
113 		/*
114 		 * We know that we are going to sleep: signal
115 		 * writers synchronously that there is more
116 		 * room.
117 		 */
118 		wake_up_interruptible_sync(PIPE_WAIT(*inode));
119 		if (!PIPE_EMPTY(*inode))
120 			BUG();
121 		goto do_more_read;
122 	}
123 	/* Signal writers asynchronously that there is more room.  */
124 	wake_up_interruptible(PIPE_WAIT(*inode));
125 
126 	ret = read;
127 out:
128 	up(PIPE_SEM(*inode));
129 out_nolock:
130 	if (read)
131 		ret = read;
132 
133 	UPDATE_ATIME(inode);
134 	return ret;
135 }
136 
137 static ssize_t
pipe_write(struct file * filp,const char * buf,size_t count,loff_t * ppos)138 pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
139 {
140 	struct inode *inode = filp->f_dentry->d_inode;
141 	ssize_t free, written, ret;
142 
143 	/* Seeks are not allowed on pipes.  */
144 	ret = -ESPIPE;
145 	written = 0;
146 	if (ppos != &filp->f_pos)
147 		goto out_nolock;
148 
149 	/* Null write succeeds.  */
150 	ret = 0;
151 	if (count == 0)
152 		goto out_nolock;
153 
154 	ret = -ERESTARTSYS;
155 	if (down_interruptible(PIPE_SEM(*inode)))
156 		goto out_nolock;
157 
158 	/* No readers yields SIGPIPE.  */
159 	if (!PIPE_READERS(*inode))
160 		goto sigpipe;
161 
162 	/* If count <= PIPE_BUF, we have to make it atomic.  */
163 	free = (count <= PIPE_BUF ? count : 1);
164 
165 	/* Wait, or check for, available space.  */
166 	if (filp->f_flags & O_NONBLOCK) {
167 		ret = -EAGAIN;
168 		if (PIPE_FREE(*inode) < free)
169 			goto out;
170 	} else {
171 		while (PIPE_FREE(*inode) < free) {
172 			PIPE_WAITING_WRITERS(*inode)++;
173 			pipe_wait(inode);
174 			PIPE_WAITING_WRITERS(*inode)--;
175 			ret = -ERESTARTSYS;
176 			if (signal_pending(current))
177 				goto out;
178 
179 			if (!PIPE_READERS(*inode))
180 				goto sigpipe;
181 		}
182 	}
183 
184 	/* Copy into available space.  */
185 	ret = -EFAULT;
186 	while (count > 0) {
187 		int space;
188 		char *pipebuf = PIPE_BASE(*inode) + PIPE_END(*inode);
189 		ssize_t chars = PIPE_MAX_WCHUNK(*inode);
190 
191 		if ((space = PIPE_FREE(*inode)) != 0) {
192 			if (chars > count)
193 				chars = count;
194 			if (chars > space)
195 				chars = space;
196 
197 			if (copy_from_user(pipebuf, buf, chars))
198 				goto out;
199 
200 			written += chars;
201 			PIPE_LEN(*inode) += chars;
202 			count -= chars;
203 			buf += chars;
204 			space = PIPE_FREE(*inode);
205 			continue;
206 		}
207 
208 		ret = written;
209 		if (filp->f_flags & O_NONBLOCK)
210 			break;
211 
212 		do {
213 			/*
214 			 * Synchronous wake-up: it knows that this process
215 			 * is going to give up this CPU, so it doesn't have
216 			 * to do idle reschedules.
217 			 */
218 			wake_up_interruptible_sync(PIPE_WAIT(*inode));
219 			PIPE_WAITING_WRITERS(*inode)++;
220 			pipe_wait(inode);
221 			PIPE_WAITING_WRITERS(*inode)--;
222 			if (signal_pending(current))
223 				goto out;
224 			if (!PIPE_READERS(*inode))
225 				goto sigpipe;
226 		} while (!PIPE_FREE(*inode));
227 		ret = -EFAULT;
228 	}
229 
230 	/* Signal readers asynchronously that there is more data.  */
231 	wake_up_interruptible(PIPE_WAIT(*inode));
232 
233 	update_mctime(inode);
234 
235 out:
236 	up(PIPE_SEM(*inode));
237 out_nolock:
238 	if (written)
239 		ret = written;
240 	return ret;
241 
242 sigpipe:
243 	if (written)
244 		goto out;
245 	up(PIPE_SEM(*inode));
246 	send_sig(SIGPIPE, current, 0);
247 	return -EPIPE;
248 }
249 
250 static ssize_t
bad_pipe_r(struct file * filp,char * buf,size_t count,loff_t * ppos)251 bad_pipe_r(struct file *filp, char *buf, size_t count, loff_t *ppos)
252 {
253 	return -EBADF;
254 }
255 
256 static ssize_t
bad_pipe_w(struct file * filp,const char * buf,size_t count,loff_t * ppos)257 bad_pipe_w(struct file *filp, const char *buf, size_t count, loff_t *ppos)
258 {
259 	return -EBADF;
260 }
261 
262 static int
pipe_ioctl(struct inode * pino,struct file * filp,unsigned int cmd,unsigned long arg)263 pipe_ioctl(struct inode *pino, struct file *filp,
264 	   unsigned int cmd, unsigned long arg)
265 {
266 	switch (cmd) {
267 		case FIONREAD:
268 			return put_user(PIPE_LEN(*pino), (int *)arg);
269 		default:
270 			return -EINVAL;
271 	}
272 }
273 
274 /* No kernel lock held - fine */
275 static unsigned int
pipe_poll(struct file * filp,poll_table * wait)276 pipe_poll(struct file *filp, poll_table *wait)
277 {
278 	unsigned int mask;
279 	struct inode *inode = filp->f_dentry->d_inode;
280 
281 	poll_wait(filp, PIPE_WAIT(*inode), wait);
282 
283 	/* Reading only -- no need for acquiring the semaphore.  */
284 	mask = POLLIN | POLLRDNORM;
285 	if (PIPE_EMPTY(*inode))
286 		mask = POLLOUT | POLLWRNORM;
287 	if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
288 		mask |= POLLHUP;
289 	if (!PIPE_READERS(*inode))
290 		mask |= POLLERR;
291 
292 	return mask;
293 }
294 
295 /* FIXME: most Unices do not set POLLERR for fifos */
296 #define fifo_poll pipe_poll
297 
298 static int
pipe_release(struct inode * inode,int decr,int decw)299 pipe_release(struct inode *inode, int decr, int decw)
300 {
301 	down(PIPE_SEM(*inode));
302 	PIPE_READERS(*inode) -= decr;
303 	PIPE_WRITERS(*inode) -= decw;
304 	if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
305 		struct pipe_inode_info *info = inode->i_pipe;
306 		inode->i_pipe = NULL;
307 		free_page((unsigned long) info->base);
308 		kfree(info);
309 	} else {
310 		wake_up_interruptible(PIPE_WAIT(*inode));
311 	}
312 	up(PIPE_SEM(*inode));
313 
314 	return 0;
315 }
316 
317 static int
pipe_read_release(struct inode * inode,struct file * filp)318 pipe_read_release(struct inode *inode, struct file *filp)
319 {
320 	return pipe_release(inode, 1, 0);
321 }
322 
323 static int
pipe_write_release(struct inode * inode,struct file * filp)324 pipe_write_release(struct inode *inode, struct file *filp)
325 {
326 	return pipe_release(inode, 0, 1);
327 }
328 
329 static int
pipe_rdwr_release(struct inode * inode,struct file * filp)330 pipe_rdwr_release(struct inode *inode, struct file *filp)
331 {
332 	int decr, decw;
333 
334 	decr = (filp->f_mode & FMODE_READ) != 0;
335 	decw = (filp->f_mode & FMODE_WRITE) != 0;
336 	return pipe_release(inode, decr, decw);
337 }
338 
339 static int
pipe_read_open(struct inode * inode,struct file * filp)340 pipe_read_open(struct inode *inode, struct file *filp)
341 {
342 	int ret = -ENOENT;
343 
344 	down(PIPE_SEM(*inode));
345 	if (inode->i_pipe) {
346 		ret = 0;
347 		PIPE_READERS(*inode)++;
348 	}
349 	up(PIPE_SEM(*inode));
350 
351 	return ret;
352 }
353 
354 static int
pipe_write_open(struct inode * inode,struct file * filp)355 pipe_write_open(struct inode *inode, struct file *filp)
356 {
357 	int ret = -ENOENT;
358 
359 	down(PIPE_SEM(*inode));
360 	if (inode->i_pipe) {
361 		ret = 0;
362 		PIPE_WRITERS(*inode)++;
363 	}
364 	up(PIPE_SEM(*inode));
365 
366 	return ret;
367 }
368 
369 static int
pipe_rdwr_open(struct inode * inode,struct file * filp)370 pipe_rdwr_open(struct inode *inode, struct file *filp)
371 {
372 	int ret = -ENOENT;
373 
374 	down(PIPE_SEM(*inode));
375 	if (inode->i_pipe) {
376 		ret = 0;
377 		if (filp->f_mode & FMODE_READ)
378 			PIPE_READERS(*inode)++;
379 		if (filp->f_mode & FMODE_WRITE)
380 			PIPE_WRITERS(*inode)++;
381 	}
382 	up(PIPE_SEM(*inode));
383 
384 	return ret;
385 }
386 
387 /*
388  * The file_operations structs are not static because they
389  * are also used in linux/fs/fifo.c to do operations on FIFOs.
390  */
391 struct file_operations read_fifo_fops = {
392 	llseek:		no_llseek,
393 	read:		pipe_read,
394 	write:		bad_pipe_w,
395 	poll:		fifo_poll,
396 	ioctl:		pipe_ioctl,
397 	open:		pipe_read_open,
398 	release:	pipe_read_release,
399 };
400 
401 struct file_operations write_fifo_fops = {
402 	llseek:		no_llseek,
403 	read:		bad_pipe_r,
404 	write:		pipe_write,
405 	poll:		fifo_poll,
406 	ioctl:		pipe_ioctl,
407 	open:		pipe_write_open,
408 	release:	pipe_write_release,
409 };
410 
411 struct file_operations rdwr_fifo_fops = {
412 	llseek:		no_llseek,
413 	read:		pipe_read,
414 	write:		pipe_write,
415 	poll:		fifo_poll,
416 	ioctl:		pipe_ioctl,
417 	open:		pipe_rdwr_open,
418 	release:	pipe_rdwr_release,
419 };
420 
421 struct file_operations read_pipe_fops = {
422 	llseek:		no_llseek,
423 	read:		pipe_read,
424 	write:		bad_pipe_w,
425 	poll:		pipe_poll,
426 	ioctl:		pipe_ioctl,
427 	open:		pipe_read_open,
428 	release:	pipe_read_release,
429 };
430 
431 struct file_operations write_pipe_fops = {
432 	llseek:		no_llseek,
433 	read:		bad_pipe_r,
434 	write:		pipe_write,
435 	poll:		pipe_poll,
436 	ioctl:		pipe_ioctl,
437 	open:		pipe_write_open,
438 	release:	pipe_write_release,
439 };
440 
441 struct file_operations rdwr_pipe_fops = {
442 	llseek:		no_llseek,
443 	read:		pipe_read,
444 	write:		pipe_write,
445 	poll:		pipe_poll,
446 	ioctl:		pipe_ioctl,
447 	open:		pipe_rdwr_open,
448 	release:	pipe_rdwr_release,
449 };
450 
pipe_new(struct inode * inode)451 struct inode* pipe_new(struct inode* inode)
452 {
453 	unsigned long page;
454 
455 	page = __get_free_page(GFP_USER);
456 	if (!page)
457 		return NULL;
458 
459 	inode->i_pipe = kmalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
460 	if (!inode->i_pipe)
461 		goto fail_page;
462 
463 	init_waitqueue_head(PIPE_WAIT(*inode));
464 	PIPE_BASE(*inode) = (char*) page;
465 	PIPE_START(*inode) = PIPE_LEN(*inode) = 0;
466 	PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 0;
467 	PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0;
468 	PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
469 
470 	return inode;
471 fail_page:
472 	free_page(page);
473 	return NULL;
474 }
475 
476 static struct vfsmount *pipe_mnt;
pipefs_delete_dentry(struct dentry * dentry)477 static int pipefs_delete_dentry(struct dentry *dentry)
478 {
479 	return 1;
480 }
481 static struct dentry_operations pipefs_dentry_operations = {
482 	d_delete:	pipefs_delete_dentry,
483 };
484 
get_pipe_inode(void)485 static struct inode * get_pipe_inode(void)
486 {
487 	struct inode *inode = new_inode(pipe_mnt->mnt_sb);
488 
489 	if (!inode)
490 		goto fail_inode;
491 
492 	if(!pipe_new(inode))
493 		goto fail_iput;
494 	PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
495 	inode->i_fop = &rdwr_pipe_fops;
496 
497 	/*
498 	 * Mark the inode dirty from the very beginning,
499 	 * that way it will never be moved to the dirty
500 	 * list because "mark_inode_dirty()" will think
501 	 * that it already _is_ on the dirty list.
502 	 */
503 	inode->i_state = I_DIRTY;
504 	inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
505 	inode->i_uid = current->fsuid;
506 	inode->i_gid = current->fsgid;
507 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
508 	inode->i_blksize = PAGE_SIZE;
509 	return inode;
510 
511 fail_iput:
512 	iput(inode);
513 fail_inode:
514 	return NULL;
515 }
516 
do_pipe(int * fd)517 int do_pipe(int *fd)
518 {
519 	struct qstr this;
520 	char name[32];
521 	struct dentry *dentry;
522 	struct inode * inode;
523 	struct file *f1, *f2;
524 	int error;
525 	int i,j;
526 
527 	error = -ENFILE;
528 	f1 = get_empty_filp();
529 	if (!f1)
530 		goto no_files;
531 
532 	f2 = get_empty_filp();
533 	if (!f2)
534 		goto close_f1;
535 
536 	inode = get_pipe_inode();
537 	if (!inode)
538 		goto close_f12;
539 
540 	error = get_unused_fd();
541 	if (error < 0)
542 		goto close_f12_inode;
543 	i = error;
544 
545 	error = get_unused_fd();
546 	if (error < 0)
547 		goto close_f12_inode_i;
548 	j = error;
549 
550 	error = -ENOMEM;
551 	sprintf(name, "[%lu]", inode->i_ino);
552 	this.name = name;
553 	this.len = strlen(name);
554 	this.hash = inode->i_ino; /* will go */
555 	dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
556 	if (!dentry)
557 		goto close_f12_inode_i_j;
558 	dentry->d_op = &pipefs_dentry_operations;
559 	d_add(dentry, inode);
560 	f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
561 	f1->f_dentry = f2->f_dentry = dget(dentry);
562 
563 	/* read file */
564 	f1->f_pos = f2->f_pos = 0;
565 	f1->f_flags = O_RDONLY;
566 	f1->f_op = &read_pipe_fops;
567 	f1->f_mode = 1;
568 	f1->f_version = 0;
569 
570 	/* write file */
571 	f2->f_flags = O_WRONLY;
572 	f2->f_op = &write_pipe_fops;
573 	f2->f_mode = 2;
574 	f2->f_version = 0;
575 
576 	fd_install(i, f1);
577 	fd_install(j, f2);
578 	fd[0] = i;
579 	fd[1] = j;
580 	return 0;
581 
582 close_f12_inode_i_j:
583 	put_unused_fd(j);
584 close_f12_inode_i:
585 	put_unused_fd(i);
586 close_f12_inode:
587 	free_page((unsigned long) PIPE_BASE(*inode));
588 	kfree(inode->i_pipe);
589 	inode->i_pipe = NULL;
590 	iput(inode);
591 close_f12:
592 	put_filp(f2);
593 close_f1:
594 	put_filp(f1);
595 no_files:
596 	return error;
597 }
598 
599 /*
600  * pipefs should _never_ be mounted by userland - too much of security hassle,
601  * no real gain from having the whole whorehouse mounted. So we don't need
602  * any operations on the root directory. However, we need a non-trivial
603  * d_name - pipe: will go nicely and kill the special-casing in procfs.
604  */
pipefs_statfs(struct super_block * sb,struct statfs * buf)605 static int pipefs_statfs(struct super_block *sb, struct statfs *buf)
606 {
607 	buf->f_type = PIPEFS_MAGIC;
608 	buf->f_bsize = 1024;
609 	buf->f_namelen = 255;
610 	return 0;
611 }
612 
613 static struct super_operations pipefs_ops = {
614 	statfs:		pipefs_statfs,
615 };
616 
pipefs_read_super(struct super_block * sb,void * data,int silent)617 static struct super_block * pipefs_read_super(struct super_block *sb, void *data, int silent)
618 {
619 	struct inode *root = new_inode(sb);
620 	if (!root)
621 		return NULL;
622 	root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
623 	root->i_uid = root->i_gid = 0;
624 	root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
625 	sb->s_blocksize = 1024;
626 	sb->s_blocksize_bits = 10;
627 	sb->s_magic = PIPEFS_MAGIC;
628 	sb->s_op	= &pipefs_ops;
629 	sb->s_root = d_alloc(NULL, &(const struct qstr) { "pipe:", 5, 0 });
630 	if (!sb->s_root) {
631 		iput(root);
632 		return NULL;
633 	}
634 	sb->s_root->d_sb = sb;
635 	sb->s_root->d_parent = sb->s_root;
636 	d_instantiate(sb->s_root, root);
637 	return sb;
638 }
639 
640 static DECLARE_FSTYPE(pipe_fs_type, "pipefs", pipefs_read_super, FS_NOMOUNT);
641 
init_pipe_fs(void)642 static int __init init_pipe_fs(void)
643 {
644 	int err = register_filesystem(&pipe_fs_type);
645 	if (!err) {
646 		pipe_mnt = kern_mount(&pipe_fs_type);
647 		err = PTR_ERR(pipe_mnt);
648 		if (IS_ERR(pipe_mnt))
649 			unregister_filesystem(&pipe_fs_type);
650 		else
651 			err = 0;
652 	}
653 	return err;
654 }
655 
exit_pipe_fs(void)656 static void __exit exit_pipe_fs(void)
657 {
658 	unregister_filesystem(&pipe_fs_type);
659 	mntput(pipe_mnt);
660 }
661 
662 module_init(init_pipe_fs)
663 module_exit(exit_pipe_fs)
664