1 /*
2  *  linux/fs/proc/base.c
3  *
4  *  Copyright (C) 1991, 1992 Linus Torvalds
5  *
6  *  proc base directory handling functions
7  *
8  *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
9  *  Instead of using magical inumbers to determine the kind of object
10  *  we allocate and fill in-core inodes upon lookup. They don't even
11  *  go into icache. We cache the reference to task_struct upon lookup too.
12  *  Eventually it should become a filesystem in its own. We don't use the
13  *  rest of procfs anymore.
14  */
15 
16 #include <asm/uaccess.h>
17 
18 #include <linux/config.h>
19 #include <linux/errno.h>
20 #include <linux/sched.h>
21 #include <linux/proc_fs.h>
22 #include <linux/stat.h>
23 #include <linux/init.h>
24 #include <linux/file.h>
25 #include <linux/string.h>
26 #include <linux/seq_file.h>
27 #include <linux/namespace.h>
28 
29 /*
30  * For hysterical raisins we keep the same inumbers as in the old procfs.
31  * Feel free to change the macro below - just keep the range distinct from
32  * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
33  * As soon as we'll get a separate superblock we will be able to forget
34  * about magical ranges too.
35  */
36 
37 #define fake_ino(pid,ino) (((pid)<<16)|(ino))
38 
39 int proc_pid_stat(struct task_struct*,char*);
40 int proc_pid_status(struct task_struct*,char*);
41 int proc_pid_statm(struct task_struct*,char*);
42 int proc_pid_cpu(struct task_struct*,char*);
43 
proc_fd_link(struct inode * inode,struct dentry ** dentry,struct vfsmount ** mnt)44 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
45 {
46 	if (inode->u.proc_i.file) {
47 		*mnt = mntget(inode->u.proc_i.file->f_vfsmnt);
48 		*dentry = dget(inode->u.proc_i.file->f_dentry);
49 		return 0;
50 	}
51 	return -ENOENT;
52 }
53 
proc_exe_link(struct inode * inode,struct dentry ** dentry,struct vfsmount ** mnt)54 static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
55 {
56 	struct mm_struct * mm;
57 	struct vm_area_struct * vma;
58 	int result = -ENOENT;
59 	struct task_struct *task = inode->u.proc_i.task;
60 
61 	task_lock(task);
62 	mm = task->mm;
63 	if (mm)
64 		atomic_inc(&mm->mm_users);
65 	task_unlock(task);
66 	if (!mm)
67 		goto out;
68 	down_read(&mm->mmap_sem);
69 	vma = mm->mmap;
70 	while (vma) {
71 		if ((vma->vm_flags & VM_EXECUTABLE) &&
72 		    vma->vm_file) {
73 			*mnt = mntget(vma->vm_file->f_vfsmnt);
74 			*dentry = dget(vma->vm_file->f_dentry);
75 			result = 0;
76 			break;
77 		}
78 		vma = vma->vm_next;
79 	}
80 	up_read(&mm->mmap_sem);
81 	mmput(mm);
82 out:
83 	return result;
84 }
85 
proc_cwd_link(struct inode * inode,struct dentry ** dentry,struct vfsmount ** mnt)86 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
87 {
88 	struct fs_struct *fs;
89 	int result = -ENOENT;
90 	task_lock(inode->u.proc_i.task);
91 	fs = inode->u.proc_i.task->fs;
92 	if(fs)
93 		atomic_inc(&fs->count);
94 	task_unlock(inode->u.proc_i.task);
95 	if (fs) {
96 		read_lock(&fs->lock);
97 		*mnt = mntget(fs->pwdmnt);
98 		*dentry = dget(fs->pwd);
99 		read_unlock(&fs->lock);
100 		result = 0;
101 		put_fs_struct(fs);
102 	}
103 	return result;
104 }
105 
proc_root_link(struct inode * inode,struct dentry ** dentry,struct vfsmount ** mnt)106 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
107 {
108 	struct fs_struct *fs;
109 	int result = -ENOENT;
110 	task_lock(inode->u.proc_i.task);
111 	fs = inode->u.proc_i.task->fs;
112 	if(fs)
113 		atomic_inc(&fs->count);
114 	task_unlock(inode->u.proc_i.task);
115 	if (fs) {
116 		read_lock(&fs->lock);
117 		*mnt = mntget(fs->rootmnt);
118 		*dentry = dget(fs->root);
119 		read_unlock(&fs->lock);
120 		result = 0;
121 		put_fs_struct(fs);
122 	}
123 	return result;
124 }
125 
126 #define MAY_PTRACE(task) \
127 	(task == current || \
128 	(task->p_pptr == current && \
129 	(task->ptrace & PT_PTRACED) && task->state == TASK_STOPPED))
130 
may_ptrace_attach(struct task_struct * task)131 static int may_ptrace_attach(struct task_struct *task)
132 {
133 	int retval = 0;
134 
135 	task_lock(task);
136 
137 	if (((current->uid != task->euid) ||
138 	    (current->uid != task->suid) ||
139 	    (current->uid != task->uid) ||
140 	    (current->gid != task->egid) ||
141 	    (current->gid != task->sgid) ||
142 	    (!cap_issubset(task->cap_permitted, current->cap_permitted)) ||
143 	    (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
144 		goto out;
145 	rmb();
146 	if (!is_dumpable(task) && !capable(CAP_SYS_PTRACE))
147 		goto out;
148 
149 	retval = 1;
150 
151 out:
152 	task_unlock(task);
153 	return retval;
154 }
155 
proc_pid_environ(struct task_struct * task,char * buffer)156 static int proc_pid_environ(struct task_struct *task, char * buffer)
157 {
158 	struct mm_struct *mm;
159 	int res = 0;
160 
161 	if (!may_ptrace_attach(task))
162 		return -ESRCH;
163 
164 	task_lock(task);
165 	mm = task->mm;
166 	if (mm)
167 		atomic_inc(&mm->mm_users);
168 	task_unlock(task);
169 	if (mm && mm->env_start && mm->env_start < mm->env_end) {
170 		unsigned long len = mm->env_end - mm->env_start;
171 		if (len > PAGE_SIZE)
172 			len = PAGE_SIZE;
173 		res = access_process_vm(task, mm->env_start, buffer, len, 0);
174 		if (res >= 0 && !may_ptrace_attach(task))
175 			res = -ESRCH;
176 	}
177 	if (mm)
178 		mmput(mm);
179 	return res;
180 }
181 
proc_pid_cmdline(struct task_struct * task,char * buffer)182 static int proc_pid_cmdline(struct task_struct *task, char * buffer)
183 {
184 	struct mm_struct *mm;
185 	int res = 0;
186 	task_lock(task);
187 	mm = task->mm;
188 	if (mm) {
189 		if (mm->arg_end)
190 			atomic_inc(&mm->mm_users);
191 		else
192 			mm = NULL;
193 	}
194 	task_unlock(task);
195 	if (mm && mm->arg_start && mm->arg_start < mm->arg_end) {
196 		unsigned long len = mm->arg_end - mm->arg_start;
197 		if (len > PAGE_SIZE)
198 			len = PAGE_SIZE;
199 		res = access_process_vm(task, mm->arg_start, buffer, len, 0);
200 		/* If the nul at the end of args has been overwritten, then
201 		   assume application is using setproctitle(3). */
202 		if (res > 0 && buffer[res - 1] != '\0') {
203 			len = strnlen(buffer, res);
204 			if (len < res) {
205 				res = len;
206 			} else
207 			if (mm->env_start < mm->env_end && res <= PAGE_SIZE) {
208 				len = mm->env_end - mm->env_start;
209 				if (len > PAGE_SIZE - res)
210 					len = PAGE_SIZE - res;
211 				res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
212 				res = strnlen(buffer, res);
213 			} else
214 				res = 0;
215 		}
216 	}
217 	if (mm)
218 		mmput(mm);
219 	return res;
220 }
221 
222 /************************************************************************/
223 /*                       Here the fs part begins                        */
224 /************************************************************************/
225 
226 /* permission checks */
227 
proc_check_root(struct inode * inode)228 static int proc_check_root(struct inode *inode)
229 {
230 	struct dentry *de, *base, *root;
231 	struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
232 	int res = 0;
233 
234 	if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
235 		return -ENOENT;
236 	read_lock(&current->fs->lock);
237 	our_vfsmnt = mntget(current->fs->rootmnt);
238 	base = dget(current->fs->root);
239 	read_unlock(&current->fs->lock);
240 
241 	spin_lock(&dcache_lock);
242 	de = root;
243 	mnt = vfsmnt;
244 
245 	while (vfsmnt != our_vfsmnt) {
246 		if (vfsmnt == vfsmnt->mnt_parent)
247 			goto out;
248 		de = vfsmnt->mnt_mountpoint;
249 		vfsmnt = vfsmnt->mnt_parent;
250 	}
251 
252 	if (!is_subdir(de, base))
253 		goto out;
254 	spin_unlock(&dcache_lock);
255 
256 exit:
257 	dput(base);
258 	mntput(our_vfsmnt);
259 	dput(root);
260 	mntput(mnt);
261 	return res;
262 out:
263 	spin_unlock(&dcache_lock);
264 	res = -EACCES;
265 	goto exit;
266 }
267 
proc_permission(struct inode * inode,int mask)268 static int proc_permission(struct inode *inode, int mask)
269 {
270 	if (vfs_permission(inode, mask) != 0)
271 		return -EACCES;
272 	return proc_check_root(inode);
273 }
274 
275 extern struct seq_operations proc_pid_maps_op;
maps_open(struct inode * inode,struct file * file)276 static int maps_open(struct inode *inode, struct file *file)
277 {
278 	struct task_struct *task = inode->u.proc_i.task;
279 	int ret = seq_open(file, &proc_pid_maps_op);
280 	if (!ret) {
281 		struct seq_file *m = file->private_data;
282 		m->private = task;
283 	}
284 	return ret;
285 }
286 
287 static struct file_operations proc_maps_operations = {
288 	.open		= maps_open,
289 	.read		= seq_read,
290 	.llseek		= seq_lseek,
291 	.release	= seq_release,
292 };
293 
294 extern struct seq_operations mounts_op;
mounts_open(struct inode * inode,struct file * file)295 static int mounts_open(struct inode *inode, struct file *file)
296 {
297 	struct task_struct *task = inode->u.proc_i.task;
298 	int ret = seq_open(file, &mounts_op);
299 
300 	if (!ret) {
301 		struct seq_file *m = file->private_data;
302 		struct namespace *namespace;
303 		task_lock(task);
304 		namespace = task->namespace;
305 		if (namespace)
306 			get_namespace(namespace);
307 		task_unlock(task);
308 
309 		if (namespace)
310 			m->private = namespace;
311 		else {
312 			seq_release(inode, file);
313 			ret = -EINVAL;
314 		}
315 	}
316 	return ret;
317 }
318 
mounts_release(struct inode * inode,struct file * file)319 static int mounts_release(struct inode *inode, struct file *file)
320 {
321 	struct seq_file *m = file->private_data;
322 	struct namespace *namespace = m->private;
323 	put_namespace(namespace);
324 	return seq_release(inode, file);
325 }
326 
327 static struct file_operations proc_mounts_operations = {
328 	open:		mounts_open,
329 	read:		seq_read,
330 	llseek:		seq_lseek,
331 	release:	mounts_release,
332 };
333 
334 #define PROC_BLOCK_SIZE	(3*1024)		/* 4K page size but our output routines use some slack for overruns */
335 
proc_info_read(struct file * file,char * buf,size_t count,loff_t * ppos)336 static ssize_t proc_info_read(struct file * file, char * buf,
337 			  size_t count, loff_t *ppos)
338 {
339 	struct inode * inode = file->f_dentry->d_inode;
340 	unsigned long page;
341 	ssize_t length;
342 	ssize_t end;
343 	struct task_struct *task = inode->u.proc_i.task;
344 	loff_t pos = *ppos;
345 
346 	if (count > PROC_BLOCK_SIZE)
347 		count = PROC_BLOCK_SIZE;
348 	if (!(page = __get_free_page(GFP_KERNEL)))
349 		return -ENOMEM;
350 
351 	length = inode->u.proc_i.op.proc_read(task, (char*)page);
352 
353 	if (length < 0) {
354 		free_page(page);
355 		return length;
356 	}
357 	/* Static 4kB (or whatever) block capacity */
358 	if (pos < 0 || pos >= length) {
359 		free_page(page);
360 		return 0;
361 	}
362 	if (count > length - pos)
363 		count = length - pos;
364 	end = count + pos;
365 	copy_to_user(buf, (char *) page + pos, count);
366 	*ppos = end;
367 	free_page(page);
368 	return count;
369 }
370 
371 static struct file_operations proc_info_file_operations = {
372 	read:		proc_info_read,
373 };
374 
mem_open(struct inode * inode,struct file * file)375 static int mem_open(struct inode* inode, struct file* file)
376 {
377 	file->private_data = (void*)((long)current->self_exec_id);
378 	return 0;
379 }
380 
mem_read(struct file * file,char * buf,size_t count,loff_t * ppos)381 static ssize_t mem_read(struct file * file, char * buf,
382 			size_t count, loff_t *ppos)
383 {
384 	struct task_struct *task = file->f_dentry->d_inode->u.proc_i.task;
385 	char *page;
386 	unsigned long src = *ppos;
387 	int copied = 0;
388 	struct mm_struct *mm;
389 
390 	if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
391 		return -ESRCH;
392 
393 	page = (char *)__get_free_page(GFP_USER);
394 	if (!page)
395 		return -ENOMEM;
396 
397 	task_lock(task);
398 	mm = task->mm;
399 	if (mm)
400 		atomic_inc(&mm->mm_users);
401 	task_unlock(task);
402 	if (!mm){
403 		copied = 0;
404 		goto out_free;
405 	}
406 
407 	if (file->private_data != (void*)((long)current->self_exec_id) ) {
408 		mmput(mm);
409 		copied = -EIO;
410 		goto out_free;
411 	}
412 
413 	while (count > 0) {
414 		int this_len, retval;
415 
416 		this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
417 		retval = access_process_vm(task, src, page, this_len, 0);
418 		if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) {
419 			if (!copied)
420 				copied = -EIO;
421 			break;
422 		}
423 		if (copy_to_user(buf, page, retval)) {
424 			copied = -EFAULT;
425 			break;
426 		}
427 		copied += retval;
428 		src += retval;
429 		buf += retval;
430 		count -= retval;
431 	}
432 	*ppos = src;
433 	mmput(mm);
434 
435 out_free:
436 	free_page((unsigned long) page);
437 	return copied;
438 }
439 
440 #define mem_write NULL
441 
442 #ifndef mem_write
443 /* This is a security hazard */
mem_write(struct file * file,const char * buf,size_t count,loff_t * ppos)444 static ssize_t mem_write(struct file * file, const char * buf,
445 			 size_t count, loff_t *ppos)
446 {
447 	int copied = 0;
448 	char *page;
449 	struct task_struct *task = file->f_dentry->d_inode->u.proc_i.task;
450 	unsigned long dst = *ppos;
451 
452 	if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
453 		return -ESRCH;
454 
455 	page = (char *)__get_free_page(GFP_USER);
456 	if (!page)
457 		return -ENOMEM;
458 
459 	while (count > 0) {
460 		int this_len, retval;
461 
462 		this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
463 		if (copy_from_user(page, buf, this_len)) {
464 			copied = -EFAULT;
465 			break;
466 		}
467 		retval = access_process_vm(task, dst, page, this_len, 1);
468 		if (!retval) {
469 			if (!copied)
470 				copied = -EIO;
471 			break;
472 		}
473 		copied += retval;
474 		buf += retval;
475 		dst += retval;
476 		count -= retval;
477 	}
478 	*ppos = dst;
479 	free_page((unsigned long) page);
480 	return copied;
481 }
482 #endif
483 
mem_lseek(struct file * file,loff_t offset,int orig)484 static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
485 {
486 	switch (orig) {
487 	case 0:
488 		file->f_pos = offset;
489 		break;
490 	case 1:
491 		file->f_pos += offset;
492 		break;
493 	default:
494 		return -EINVAL;
495 	}
496 	force_successful_syscall_return();
497 	return file->f_pos;
498 }
499 
500 static struct file_operations proc_mem_operations = {
501 	llseek:		mem_lseek,
502 	read:		mem_read,
503 	write:		mem_write,
504 	open:		mem_open,
505 };
506 
507 static struct inode_operations proc_mem_inode_operations = {
508 	permission:	proc_permission,
509 };
510 
proc_pid_follow_link(struct dentry * dentry,struct nameidata * nd)511 static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
512 {
513 	struct inode *inode = dentry->d_inode;
514 	int error = -EACCES;
515 
516 	/* We don't need a base pointer in the /proc filesystem */
517 	path_release(nd);
518 
519 	if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
520 		goto out;
521 	error = proc_check_root(inode);
522 	if (error)
523 		goto out;
524 
525 	error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
526 	nd->last_type = LAST_BIND;
527 out:
528 	return error;
529 }
530 
do_proc_readlink(struct dentry * dentry,struct vfsmount * mnt,char * buffer,int buflen)531 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
532 			    char * buffer, int buflen)
533 {
534 	struct inode * inode;
535 	char * tmp = (char*)__get_free_page(GFP_KERNEL), *path;
536 	int len;
537 
538 	if (!tmp)
539 		return -ENOMEM;
540 
541 	inode = dentry->d_inode;
542 	path = d_path(dentry, mnt, tmp, PAGE_SIZE);
543 	if (IS_ERR(path)) {
544 		free_page((unsigned long)tmp);
545 		return PTR_ERR(path);
546 	}
547 	len = tmp + PAGE_SIZE - 1 - path;
548 
549 	if (len < buflen)
550 		buflen = len;
551 	copy_to_user(buffer, path, buflen);
552 	free_page((unsigned long)tmp);
553 	return buflen;
554 }
555 
proc_pid_readlink(struct dentry * dentry,char * buffer,int buflen)556 static int proc_pid_readlink(struct dentry * dentry, char * buffer, int buflen)
557 {
558 	int error = -EACCES;
559 	struct inode *inode = dentry->d_inode;
560 	struct dentry *de;
561 	struct vfsmount *mnt = NULL;
562 
563 	if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
564 		goto out;
565 	error = proc_check_root(inode);
566 	if (error)
567 		goto out;
568 
569 	error = inode->u.proc_i.op.proc_get_link(inode, &de, &mnt);
570 	if (error)
571 		goto out;
572 
573 	error = do_proc_readlink(de, mnt, buffer, buflen);
574 	dput(de);
575 	mntput(mnt);
576 out:
577 	return error;
578 }
579 
580 static struct inode_operations proc_pid_link_inode_operations = {
581 	readlink:	proc_pid_readlink,
582 	follow_link:	proc_pid_follow_link
583 };
584 
585 struct pid_entry {
586 	int type;
587 	int len;
588 	char *name;
589 	mode_t mode;
590 };
591 
592 enum pid_directory_inos {
593 	PROC_PID_INO = 2,
594 	PROC_PID_STATUS,
595 	PROC_PID_MEM,
596 	PROC_PID_CWD,
597 	PROC_PID_ROOT,
598 	PROC_PID_EXE,
599 	PROC_PID_FD,
600 	PROC_PID_ENVIRON,
601 	PROC_PID_CMDLINE,
602 	PROC_PID_STAT,
603 	PROC_PID_STATM,
604 	PROC_PID_MAPS,
605 	PROC_PID_CPU,
606 	PROC_PID_MOUNTS,
607 	PROC_PID_FD_DIR = 0x8000,	/* 0x8000-0xffff */
608 };
609 
610 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
611 static struct pid_entry base_stuff[] = {
612   E(PROC_PID_FD,	"fd",		S_IFDIR|S_IRUSR|S_IXUSR),
613   E(PROC_PID_ENVIRON,	"environ",	S_IFREG|S_IRUSR),
614   E(PROC_PID_STATUS,	"status",	S_IFREG|S_IRUGO),
615   E(PROC_PID_CMDLINE,	"cmdline",	S_IFREG|S_IRUGO),
616   E(PROC_PID_STAT,	"stat",		S_IFREG|S_IRUGO),
617   E(PROC_PID_STATM,	"statm",	S_IFREG|S_IRUGO),
618 #ifdef CONFIG_SMP
619   E(PROC_PID_CPU,	"cpu",		S_IFREG|S_IRUGO),
620 #endif
621   E(PROC_PID_MAPS,	"maps",		S_IFREG|S_IRUGO),
622   E(PROC_PID_MEM,	"mem",		S_IFREG|S_IRUSR|S_IWUSR),
623   E(PROC_PID_CWD,	"cwd",		S_IFLNK|S_IRWXUGO),
624   E(PROC_PID_ROOT,	"root",		S_IFLNK|S_IRWXUGO),
625   E(PROC_PID_EXE,	"exe",		S_IFLNK|S_IRWXUGO),
626   E(PROC_PID_MOUNTS,	"mounts",	S_IFREG|S_IRUGO),
627   {0,0,NULL,0}
628 };
629 #undef E
630 
631 #define NUMBUF 10
632 
proc_readfd(struct file * filp,void * dirent,filldir_t filldir)633 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
634 {
635 	struct inode *inode = filp->f_dentry->d_inode;
636 	struct task_struct *p = inode->u.proc_i.task;
637 	unsigned int fd, pid, ino;
638 	int retval;
639 	char buf[NUMBUF];
640 	struct files_struct * files;
641 
642 	retval = 0;
643 	pid = p->pid;
644 
645 	fd = filp->f_pos;
646 	switch (fd) {
647 		case 0:
648 			if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
649 				goto out;
650 			filp->f_pos++;
651 		case 1:
652 			ino = fake_ino(pid, PROC_PID_INO);
653 			if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
654 				goto out;
655 			filp->f_pos++;
656 		default:
657 			task_lock(p);
658 			files = p->files;
659 			if (files)
660 				atomic_inc(&files->count);
661 			task_unlock(p);
662 			if (!files)
663 				goto out;
664 			read_lock(&files->file_lock);
665 			for (fd = filp->f_pos-2;
666 			     fd < files->max_fds;
667 			     fd++, filp->f_pos++) {
668 				unsigned int i,j;
669 
670 				if (!fcheck_files(files, fd))
671 					continue;
672 				read_unlock(&files->file_lock);
673 
674 				j = NUMBUF;
675 				i = fd;
676 				do {
677 					j--;
678 					buf[j] = '0' + (i % 10);
679 					i /= 10;
680 				} while (i);
681 
682 				ino = fake_ino(pid, PROC_PID_FD_DIR + fd);
683 				if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
684 					read_lock(&files->file_lock);
685 					break;
686 				}
687 				read_lock(&files->file_lock);
688 			}
689 			read_unlock(&files->file_lock);
690 			put_files_struct(files);
691 	}
692 out:
693 	return retval;
694 }
695 
proc_base_readdir(struct file * filp,void * dirent,filldir_t filldir)696 static int proc_base_readdir(struct file * filp,
697 	void * dirent, filldir_t filldir)
698 {
699 	int i;
700 	int pid;
701 	struct inode *inode = filp->f_dentry->d_inode;
702 	struct pid_entry *p;
703 
704 	pid = inode->u.proc_i.task->pid;
705 	if (!pid)
706 		return -ENOENT;
707 	i = filp->f_pos;
708 	switch (i) {
709 		case 0:
710 			if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0)
711 				return 0;
712 			i++;
713 			filp->f_pos++;
714 			/* fall through */
715 		case 1:
716 			if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0)
717 				return 0;
718 			i++;
719 			filp->f_pos++;
720 			/* fall through */
721 		default:
722 			i -= 2;
723 			if (i>=sizeof(base_stuff)/sizeof(base_stuff[0]))
724 				return 1;
725 			p = base_stuff + i;
726 			while (p->name) {
727 				if (filldir(dirent, p->name, p->len, filp->f_pos,
728 					    fake_ino(pid, p->type), p->mode >> 12) < 0)
729 					return 0;
730 				filp->f_pos++;
731 				p++;
732 			}
733 	}
734 	return 1;
735 }
736 
737 /* building an inode */
738 
task_dumpable(struct task_struct * task)739 static int task_dumpable(struct task_struct *task)
740 {
741 	int retval;
742 
743 	task_lock(task);
744 	retval = is_dumpable(task);
745 	task_unlock(task);
746 	return retval;
747 }
748 
749 
proc_pid_make_inode(struct super_block * sb,struct task_struct * task,int ino)750 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
751 {
752 	struct inode * inode;
753 
754 	/* We need a new inode */
755 
756 	inode = new_inode(sb);
757 	if (!inode)
758 		goto out;
759 
760 	/* Common stuff */
761 
762 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
763 	inode->i_ino = fake_ino(task->pid, ino);
764 
765 	if (!task->pid)
766 		goto out_unlock;
767 
768 	/*
769 	 * grab the reference to task.
770 	 */
771 	get_task_struct(task);
772 	inode->u.proc_i.task = task;
773 	inode->i_uid = 0;
774 	inode->i_gid = 0;
775 	if (ino == PROC_PID_INO || task_dumpable(task)) {
776 		inode->i_uid = task->euid;
777 		inode->i_gid = task->egid;
778 	}
779 
780 out:
781 	return inode;
782 
783 out_unlock:
784 	inode->u.generic_ip = NULL;
785 	iput(inode);
786 	return NULL;
787 }
788 
789 /* dentry stuff */
790 
pid_fd_revalidate(struct dentry * dentry,int flags)791 static int pid_fd_revalidate(struct dentry * dentry, int flags)
792 {
793 	return 0;
794 }
795 
796 /*
797  *	Exceptional case: normally we are not allowed to unhash a busy
798  * directory. In this case, however, we can do it - no aliasing problems
799  * due to the way we treat inodes.
800  */
pid_base_revalidate(struct dentry * dentry,int flags)801 static int pid_base_revalidate(struct dentry * dentry, int flags)
802 {
803 	if (dentry->d_inode->u.proc_i.task->pid)
804 		return 1;
805 	d_drop(dentry);
806 	return 0;
807 }
808 
pid_delete_dentry(struct dentry * dentry)809 static int pid_delete_dentry(struct dentry * dentry)
810 {
811 	return 1;
812 }
813 
814 static struct dentry_operations pid_fd_dentry_operations =
815 {
816 	d_revalidate:	pid_fd_revalidate,
817 	d_delete:	pid_delete_dentry,
818 };
819 
820 static struct dentry_operations pid_dentry_operations =
821 {
822 	d_delete:	pid_delete_dentry,
823 };
824 
825 static struct dentry_operations pid_base_dentry_operations =
826 {
827 	d_revalidate:	pid_base_revalidate,
828 	d_delete:	pid_delete_dentry,
829 };
830 
831 /* Lookups */
832 #define MAX_MULBY10	((~0U-9)/10)
833 
proc_lookupfd(struct inode * dir,struct dentry * dentry)834 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
835 {
836 	unsigned int fd, c;
837 	struct task_struct *task = dir->u.proc_i.task;
838 	struct file * file;
839 	struct files_struct * files;
840 	struct inode *inode;
841 	const char *name;
842 	int len;
843 
844 	fd = 0;
845 	len = dentry->d_name.len;
846 	name = dentry->d_name.name;
847 	if (len > 1 && *name == '0') goto out;
848 	while (len-- > 0) {
849 		c = *name - '0';
850 		name++;
851 		if (c > 9)
852 			goto out;
853 		if (fd >= MAX_MULBY10)
854 			goto out;
855 		fd *= 10;
856 		fd += c;
857 	}
858 
859 	inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd);
860 	if (!inode)
861 		goto out;
862 	task_lock(task);
863 	files = task->files;
864 	if (files)
865 		atomic_inc(&files->count);
866 	task_unlock(task);
867 	if (!files)
868 		goto out_unlock;
869 	read_lock(&files->file_lock);
870 	file = inode->u.proc_i.file = fcheck_files(files, fd);
871 	if (!file)
872 		goto out_unlock2;
873 	get_file(file);
874 	read_unlock(&files->file_lock);
875 	put_files_struct(files);
876 	inode->i_op = &proc_pid_link_inode_operations;
877 	inode->i_size = 64;
878 	inode->i_mode = S_IFLNK;
879 	inode->u.proc_i.op.proc_get_link = proc_fd_link;
880 	if (file->f_mode & 1)
881 		inode->i_mode |= S_IRUSR | S_IXUSR;
882 	if (file->f_mode & 2)
883 		inode->i_mode |= S_IWUSR | S_IXUSR;
884 	dentry->d_op = &pid_fd_dentry_operations;
885 	d_add(dentry, inode);
886 	return NULL;
887 
888 out_unlock2:
889 	read_unlock(&files->file_lock);
890 	put_files_struct(files);
891 out_unlock:
892 	iput(inode);
893 out:
894 	return ERR_PTR(-ENOENT);
895 }
896 
897 static struct file_operations proc_fd_operations = {
898 	read:		generic_read_dir,
899 	readdir:	proc_readfd,
900 };
901 
902 /*
903  * proc directories can do almost nothing..
904  */
905 static struct inode_operations proc_fd_inode_operations = {
906 	lookup:		proc_lookupfd,
907 	permission:	proc_permission,
908 };
909 
proc_base_lookup(struct inode * dir,struct dentry * dentry)910 static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
911 {
912 	struct inode *inode;
913 	int error;
914 	struct task_struct *task = dir->u.proc_i.task;
915 	struct pid_entry *p;
916 
917 	error = -ENOENT;
918 	inode = NULL;
919 
920 	for (p = base_stuff; p->name; p++) {
921 		if (p->len != dentry->d_name.len)
922 			continue;
923 		if (!memcmp(dentry->d_name.name, p->name, p->len))
924 			break;
925 	}
926 	if (!p->name)
927 		goto out;
928 
929 	error = -EINVAL;
930 	inode = proc_pid_make_inode(dir->i_sb, task, p->type);
931 	if (!inode)
932 		goto out;
933 
934 	inode->i_mode = p->mode;
935 	/*
936 	 * Yes, it does not scale. And it should not. Don't add
937 	 * new entries into /proc/<pid>/ without very good reasons.
938 	 */
939 	switch(p->type) {
940 		case PROC_PID_FD:
941 			inode->i_nlink = 2;
942 			inode->i_op = &proc_fd_inode_operations;
943 			inode->i_fop = &proc_fd_operations;
944 			break;
945 		case PROC_PID_EXE:
946 			inode->i_op = &proc_pid_link_inode_operations;
947 			inode->u.proc_i.op.proc_get_link = proc_exe_link;
948 			break;
949 		case PROC_PID_CWD:
950 			inode->i_op = &proc_pid_link_inode_operations;
951 			inode->u.proc_i.op.proc_get_link = proc_cwd_link;
952 			break;
953 		case PROC_PID_ROOT:
954 			inode->i_op = &proc_pid_link_inode_operations;
955 			inode->u.proc_i.op.proc_get_link = proc_root_link;
956 			break;
957 		case PROC_PID_ENVIRON:
958 			inode->i_fop = &proc_info_file_operations;
959 			inode->u.proc_i.op.proc_read = proc_pid_environ;
960 			break;
961 		case PROC_PID_STATUS:
962 			inode->i_fop = &proc_info_file_operations;
963 			inode->u.proc_i.op.proc_read = proc_pid_status;
964 			break;
965 		case PROC_PID_STAT:
966 			inode->i_fop = &proc_info_file_operations;
967 			inode->u.proc_i.op.proc_read = proc_pid_stat;
968 			break;
969 		case PROC_PID_CMDLINE:
970 			inode->i_fop = &proc_info_file_operations;
971 			inode->u.proc_i.op.proc_read = proc_pid_cmdline;
972 			break;
973 		case PROC_PID_STATM:
974 			inode->i_fop = &proc_info_file_operations;
975 			inode->u.proc_i.op.proc_read = proc_pid_statm;
976 			break;
977 		case PROC_PID_MAPS:
978 			inode->i_fop = &proc_maps_operations;
979 			break;
980 #ifdef CONFIG_SMP
981 		case PROC_PID_CPU:
982 			inode->i_fop = &proc_info_file_operations;
983 			inode->u.proc_i.op.proc_read = proc_pid_cpu;
984 			break;
985 #endif
986 		case PROC_PID_MEM:
987 			inode->i_op = &proc_mem_inode_operations;
988 			inode->i_fop = &proc_mem_operations;
989 			break;
990 		case PROC_PID_MOUNTS:
991 			inode->i_fop = &proc_mounts_operations;
992 			break;
993 		default:
994 			printk("procfs: impossible type (%d)",p->type);
995 			iput(inode);
996 			return ERR_PTR(-EINVAL);
997 	}
998 	dentry->d_op = &pid_dentry_operations;
999 	d_add(dentry, inode);
1000 	return NULL;
1001 
1002 out:
1003 	return ERR_PTR(error);
1004 }
1005 
1006 static struct file_operations proc_base_operations = {
1007 	read:		generic_read_dir,
1008 	readdir:	proc_base_readdir,
1009 };
1010 
1011 static struct inode_operations proc_base_inode_operations = {
1012 	lookup:		proc_base_lookup,
1013 };
1014 
1015 /*
1016  * /proc/self:
1017  */
proc_self_readlink(struct dentry * dentry,char * buffer,int buflen)1018 static int proc_self_readlink(struct dentry *dentry, char *buffer, int buflen)
1019 {
1020 	char tmp[30];
1021 	sprintf(tmp, "%d", current->pid);
1022 	return vfs_readlink(dentry,buffer,buflen,tmp);
1023 }
1024 
proc_self_follow_link(struct dentry * dentry,struct nameidata * nd)1025 static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1026 {
1027 	char tmp[30];
1028 	sprintf(tmp, "%d", current->pid);
1029 	return vfs_follow_link(nd,tmp);
1030 }
1031 
1032 static struct inode_operations proc_self_inode_operations = {
1033 	readlink:	proc_self_readlink,
1034 	follow_link:	proc_self_follow_link,
1035 };
1036 
proc_pid_lookup(struct inode * dir,struct dentry * dentry)1037 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
1038 {
1039 	unsigned int pid, c;
1040 	struct task_struct *task;
1041 	const char *name;
1042 	struct inode *inode;
1043 	int len;
1044 
1045 	pid = 0;
1046 	name = dentry->d_name.name;
1047 	len = dentry->d_name.len;
1048 	if (len == 4 && !memcmp(name, "self", 4)) {
1049 		inode = new_inode(dir->i_sb);
1050 		if (!inode)
1051 			return ERR_PTR(-ENOMEM);
1052 		inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1053 		inode->i_ino = fake_ino(0, PROC_PID_INO);
1054 		inode->u.proc_i.file = NULL;
1055 		inode->u.proc_i.task = NULL;
1056 		inode->i_mode = S_IFLNK|S_IRWXUGO;
1057 		inode->i_uid = inode->i_gid = 0;
1058 		inode->i_size = 64;
1059 		inode->i_op = &proc_self_inode_operations;
1060 		d_add(dentry, inode);
1061 		return NULL;
1062 	}
1063 	while (len-- > 0) {
1064 		c = *name - '0';
1065 		name++;
1066 		if (c > 9)
1067 			goto out;
1068 		if (pid >= MAX_MULBY10)
1069 			goto out;
1070 		pid *= 10;
1071 		pid += c;
1072 		if (!pid)
1073 			goto out;
1074 	}
1075 
1076 	read_lock(&tasklist_lock);
1077 	task = find_task_by_pid(pid);
1078 	if (task)
1079 		get_task_struct(task);
1080 	read_unlock(&tasklist_lock);
1081 	if (!task)
1082 		goto out;
1083 
1084 	inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO);
1085 
1086 	free_task_struct(task);
1087 
1088 	if (!inode)
1089 		goto out;
1090 	inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1091 	inode->i_op = &proc_base_inode_operations;
1092 	inode->i_fop = &proc_base_operations;
1093 	inode->i_nlink = 3;
1094 	inode->i_flags|=S_IMMUTABLE;
1095 
1096 	dentry->d_op = &pid_base_dentry_operations;
1097 	d_add(dentry, inode);
1098 	return NULL;
1099 out:
1100 	return ERR_PTR(-ENOENT);
1101 }
1102 
proc_pid_delete_inode(struct inode * inode)1103 void proc_pid_delete_inode(struct inode *inode)
1104 {
1105 	if (inode->u.proc_i.file)
1106 		fput(inode->u.proc_i.file);
1107 	if (inode->u.proc_i.task)
1108 		free_task_struct(inode->u.proc_i.task);
1109 }
1110 
1111 #define PROC_NUMBUF 10
1112 #define PROC_MAXPIDS 20
1113 
1114 /*
1115  * Get a few pid's to return for filldir - we need to hold the
1116  * tasklist lock while doing this, and we must release it before
1117  * we actually do the filldir itself, so we use a temp buffer..
1118  */
get_pid_list(int index,unsigned int * pids)1119 static int get_pid_list(int index, unsigned int *pids)
1120 {
1121 	struct task_struct *p;
1122 	int nr_pids = 0;
1123 
1124 	index--;
1125 	read_lock(&tasklist_lock);
1126 	for_each_task(p) {
1127 		int pid = p->pid;
1128 		if (!pid)
1129 			continue;
1130 		if (--index >= 0)
1131 			continue;
1132 		pids[nr_pids] = pid;
1133 		nr_pids++;
1134 		if (nr_pids >= PROC_MAXPIDS)
1135 			break;
1136 	}
1137 	read_unlock(&tasklist_lock);
1138 	return nr_pids;
1139 }
1140 
proc_pid_readdir(struct file * filp,void * dirent,filldir_t filldir)1141 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
1142 {
1143 	unsigned int pid_array[PROC_MAXPIDS];
1144 	char buf[PROC_NUMBUF];
1145 	unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
1146 	unsigned int nr_pids, i;
1147 
1148 	if (!nr) {
1149 		ino_t ino = fake_ino(0,PROC_PID_INO);
1150 		if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
1151 			return 0;
1152 		filp->f_pos++;
1153 		nr++;
1154 	}
1155 
1156 	nr_pids = get_pid_list(nr, pid_array);
1157 
1158 	for (i = 0; i < nr_pids; i++) {
1159 		int pid = pid_array[i];
1160 		ino_t ino = fake_ino(pid,PROC_PID_INO);
1161 		unsigned long j = PROC_NUMBUF;
1162 
1163 		do buf[--j] = '0' + (pid % 10); while (pid/=10);
1164 
1165 		if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0)
1166 			break;
1167 		filp->f_pos++;
1168 	}
1169 	return 0;
1170 }
1171