1 /*
2 * linux/fs/proc/base.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 *
6 * proc base directory handling functions
7 *
8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part.
9 * Instead of using magical inumbers to determine the kind of object
10 * we allocate and fill in-core inodes upon lookup. They don't even
11 * go into icache. We cache the reference to task_struct upon lookup too.
12 * Eventually it should become a filesystem in its own. We don't use the
13 * rest of procfs anymore.
14 */
15
16 #include <asm/uaccess.h>
17
18 #include <linux/config.h>
19 #include <linux/errno.h>
20 #include <linux/sched.h>
21 #include <linux/proc_fs.h>
22 #include <linux/stat.h>
23 #include <linux/init.h>
24 #include <linux/file.h>
25 #include <linux/string.h>
26 #include <linux/seq_file.h>
27 #include <linux/namespace.h>
28
29 /*
30 * For hysterical raisins we keep the same inumbers as in the old procfs.
31 * Feel free to change the macro below - just keep the range distinct from
32 * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
33 * As soon as we'll get a separate superblock we will be able to forget
34 * about magical ranges too.
35 */
36
37 #define fake_ino(pid,ino) (((pid)<<16)|(ino))
38
39 int proc_pid_stat(struct task_struct*,char*);
40 int proc_pid_status(struct task_struct*,char*);
41 int proc_pid_statm(struct task_struct*,char*);
42 int proc_pid_cpu(struct task_struct*,char*);
43
proc_fd_link(struct inode * inode,struct dentry ** dentry,struct vfsmount ** mnt)44 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
45 {
46 if (inode->u.proc_i.file) {
47 *mnt = mntget(inode->u.proc_i.file->f_vfsmnt);
48 *dentry = dget(inode->u.proc_i.file->f_dentry);
49 return 0;
50 }
51 return -ENOENT;
52 }
53
proc_exe_link(struct inode * inode,struct dentry ** dentry,struct vfsmount ** mnt)54 static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
55 {
56 struct mm_struct * mm;
57 struct vm_area_struct * vma;
58 int result = -ENOENT;
59 struct task_struct *task = inode->u.proc_i.task;
60
61 task_lock(task);
62 mm = task->mm;
63 if (mm)
64 atomic_inc(&mm->mm_users);
65 task_unlock(task);
66 if (!mm)
67 goto out;
68 down_read(&mm->mmap_sem);
69 vma = mm->mmap;
70 while (vma) {
71 if ((vma->vm_flags & VM_EXECUTABLE) &&
72 vma->vm_file) {
73 *mnt = mntget(vma->vm_file->f_vfsmnt);
74 *dentry = dget(vma->vm_file->f_dentry);
75 result = 0;
76 break;
77 }
78 vma = vma->vm_next;
79 }
80 up_read(&mm->mmap_sem);
81 mmput(mm);
82 out:
83 return result;
84 }
85
proc_cwd_link(struct inode * inode,struct dentry ** dentry,struct vfsmount ** mnt)86 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
87 {
88 struct fs_struct *fs;
89 int result = -ENOENT;
90 task_lock(inode->u.proc_i.task);
91 fs = inode->u.proc_i.task->fs;
92 if(fs)
93 atomic_inc(&fs->count);
94 task_unlock(inode->u.proc_i.task);
95 if (fs) {
96 read_lock(&fs->lock);
97 *mnt = mntget(fs->pwdmnt);
98 *dentry = dget(fs->pwd);
99 read_unlock(&fs->lock);
100 result = 0;
101 put_fs_struct(fs);
102 }
103 return result;
104 }
105
proc_root_link(struct inode * inode,struct dentry ** dentry,struct vfsmount ** mnt)106 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
107 {
108 struct fs_struct *fs;
109 int result = -ENOENT;
110 task_lock(inode->u.proc_i.task);
111 fs = inode->u.proc_i.task->fs;
112 if(fs)
113 atomic_inc(&fs->count);
114 task_unlock(inode->u.proc_i.task);
115 if (fs) {
116 read_lock(&fs->lock);
117 *mnt = mntget(fs->rootmnt);
118 *dentry = dget(fs->root);
119 read_unlock(&fs->lock);
120 result = 0;
121 put_fs_struct(fs);
122 }
123 return result;
124 }
125
126 #define MAY_PTRACE(task) \
127 (task == current || \
128 (task->p_pptr == current && \
129 (task->ptrace & PT_PTRACED) && task->state == TASK_STOPPED))
130
may_ptrace_attach(struct task_struct * task)131 static int may_ptrace_attach(struct task_struct *task)
132 {
133 int retval = 0;
134
135 task_lock(task);
136
137 if (((current->uid != task->euid) ||
138 (current->uid != task->suid) ||
139 (current->uid != task->uid) ||
140 (current->gid != task->egid) ||
141 (current->gid != task->sgid) ||
142 (!cap_issubset(task->cap_permitted, current->cap_permitted)) ||
143 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
144 goto out;
145 rmb();
146 if (!is_dumpable(task) && !capable(CAP_SYS_PTRACE))
147 goto out;
148
149 retval = 1;
150
151 out:
152 task_unlock(task);
153 return retval;
154 }
155
proc_pid_environ(struct task_struct * task,char * buffer)156 static int proc_pid_environ(struct task_struct *task, char * buffer)
157 {
158 struct mm_struct *mm;
159 int res = 0;
160
161 if (!may_ptrace_attach(task))
162 return -ESRCH;
163
164 task_lock(task);
165 mm = task->mm;
166 if (mm)
167 atomic_inc(&mm->mm_users);
168 task_unlock(task);
169 if (mm && mm->env_start && mm->env_start < mm->env_end) {
170 unsigned long len = mm->env_end - mm->env_start;
171 if (len > PAGE_SIZE)
172 len = PAGE_SIZE;
173 res = access_process_vm(task, mm->env_start, buffer, len, 0);
174 if (res >= 0 && !may_ptrace_attach(task))
175 res = -ESRCH;
176 }
177 if (mm)
178 mmput(mm);
179 return res;
180 }
181
proc_pid_cmdline(struct task_struct * task,char * buffer)182 static int proc_pid_cmdline(struct task_struct *task, char * buffer)
183 {
184 struct mm_struct *mm;
185 int res = 0;
186 task_lock(task);
187 mm = task->mm;
188 if (mm) {
189 if (mm->arg_end)
190 atomic_inc(&mm->mm_users);
191 else
192 mm = NULL;
193 }
194 task_unlock(task);
195 if (mm && mm->arg_start && mm->arg_start < mm->arg_end) {
196 unsigned long len = mm->arg_end - mm->arg_start;
197 if (len > PAGE_SIZE)
198 len = PAGE_SIZE;
199 res = access_process_vm(task, mm->arg_start, buffer, len, 0);
200 /* If the nul at the end of args has been overwritten, then
201 assume application is using setproctitle(3). */
202 if (res > 0 && buffer[res - 1] != '\0') {
203 len = strnlen(buffer, res);
204 if (len < res) {
205 res = len;
206 } else
207 if (mm->env_start < mm->env_end && res <= PAGE_SIZE) {
208 len = mm->env_end - mm->env_start;
209 if (len > PAGE_SIZE - res)
210 len = PAGE_SIZE - res;
211 res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
212 res = strnlen(buffer, res);
213 } else
214 res = 0;
215 }
216 }
217 if (mm)
218 mmput(mm);
219 return res;
220 }
221
222 /************************************************************************/
223 /* Here the fs part begins */
224 /************************************************************************/
225
226 /* permission checks */
227
proc_check_root(struct inode * inode)228 static int proc_check_root(struct inode *inode)
229 {
230 struct dentry *de, *base, *root;
231 struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
232 int res = 0;
233
234 if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
235 return -ENOENT;
236 read_lock(¤t->fs->lock);
237 our_vfsmnt = mntget(current->fs->rootmnt);
238 base = dget(current->fs->root);
239 read_unlock(¤t->fs->lock);
240
241 spin_lock(&dcache_lock);
242 de = root;
243 mnt = vfsmnt;
244
245 while (vfsmnt != our_vfsmnt) {
246 if (vfsmnt == vfsmnt->mnt_parent)
247 goto out;
248 de = vfsmnt->mnt_mountpoint;
249 vfsmnt = vfsmnt->mnt_parent;
250 }
251
252 if (!is_subdir(de, base))
253 goto out;
254 spin_unlock(&dcache_lock);
255
256 exit:
257 dput(base);
258 mntput(our_vfsmnt);
259 dput(root);
260 mntput(mnt);
261 return res;
262 out:
263 spin_unlock(&dcache_lock);
264 res = -EACCES;
265 goto exit;
266 }
267
proc_permission(struct inode * inode,int mask)268 static int proc_permission(struct inode *inode, int mask)
269 {
270 if (vfs_permission(inode, mask) != 0)
271 return -EACCES;
272 return proc_check_root(inode);
273 }
274
275 extern struct seq_operations proc_pid_maps_op;
maps_open(struct inode * inode,struct file * file)276 static int maps_open(struct inode *inode, struct file *file)
277 {
278 struct task_struct *task = inode->u.proc_i.task;
279 int ret = seq_open(file, &proc_pid_maps_op);
280 if (!ret) {
281 struct seq_file *m = file->private_data;
282 m->private = task;
283 }
284 return ret;
285 }
286
287 static struct file_operations proc_maps_operations = {
288 .open = maps_open,
289 .read = seq_read,
290 .llseek = seq_lseek,
291 .release = seq_release,
292 };
293
294 extern struct seq_operations mounts_op;
mounts_open(struct inode * inode,struct file * file)295 static int mounts_open(struct inode *inode, struct file *file)
296 {
297 struct task_struct *task = inode->u.proc_i.task;
298 int ret = seq_open(file, &mounts_op);
299
300 if (!ret) {
301 struct seq_file *m = file->private_data;
302 struct namespace *namespace;
303 task_lock(task);
304 namespace = task->namespace;
305 if (namespace)
306 get_namespace(namespace);
307 task_unlock(task);
308
309 if (namespace)
310 m->private = namespace;
311 else {
312 seq_release(inode, file);
313 ret = -EINVAL;
314 }
315 }
316 return ret;
317 }
318
mounts_release(struct inode * inode,struct file * file)319 static int mounts_release(struct inode *inode, struct file *file)
320 {
321 struct seq_file *m = file->private_data;
322 struct namespace *namespace = m->private;
323 put_namespace(namespace);
324 return seq_release(inode, file);
325 }
326
327 static struct file_operations proc_mounts_operations = {
328 open: mounts_open,
329 read: seq_read,
330 llseek: seq_lseek,
331 release: mounts_release,
332 };
333
334 #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
335
proc_info_read(struct file * file,char * buf,size_t count,loff_t * ppos)336 static ssize_t proc_info_read(struct file * file, char * buf,
337 size_t count, loff_t *ppos)
338 {
339 struct inode * inode = file->f_dentry->d_inode;
340 unsigned long page;
341 ssize_t length;
342 ssize_t end;
343 struct task_struct *task = inode->u.proc_i.task;
344 loff_t pos = *ppos;
345
346 if (count > PROC_BLOCK_SIZE)
347 count = PROC_BLOCK_SIZE;
348 if (!(page = __get_free_page(GFP_KERNEL)))
349 return -ENOMEM;
350
351 length = inode->u.proc_i.op.proc_read(task, (char*)page);
352
353 if (length < 0) {
354 free_page(page);
355 return length;
356 }
357 /* Static 4kB (or whatever) block capacity */
358 if (pos < 0 || pos >= length) {
359 free_page(page);
360 return 0;
361 }
362 if (count > length - pos)
363 count = length - pos;
364 end = count + pos;
365 copy_to_user(buf, (char *) page + pos, count);
366 *ppos = end;
367 free_page(page);
368 return count;
369 }
370
371 static struct file_operations proc_info_file_operations = {
372 read: proc_info_read,
373 };
374
mem_open(struct inode * inode,struct file * file)375 static int mem_open(struct inode* inode, struct file* file)
376 {
377 file->private_data = (void*)((long)current->self_exec_id);
378 return 0;
379 }
380
mem_read(struct file * file,char * buf,size_t count,loff_t * ppos)381 static ssize_t mem_read(struct file * file, char * buf,
382 size_t count, loff_t *ppos)
383 {
384 struct task_struct *task = file->f_dentry->d_inode->u.proc_i.task;
385 char *page;
386 unsigned long src = *ppos;
387 int copied = 0;
388 struct mm_struct *mm;
389
390 if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
391 return -ESRCH;
392
393 page = (char *)__get_free_page(GFP_USER);
394 if (!page)
395 return -ENOMEM;
396
397 task_lock(task);
398 mm = task->mm;
399 if (mm)
400 atomic_inc(&mm->mm_users);
401 task_unlock(task);
402 if (!mm){
403 copied = 0;
404 goto out_free;
405 }
406
407 if (file->private_data != (void*)((long)current->self_exec_id) ) {
408 mmput(mm);
409 copied = -EIO;
410 goto out_free;
411 }
412
413 while (count > 0) {
414 int this_len, retval;
415
416 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
417 retval = access_process_vm(task, src, page, this_len, 0);
418 if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) {
419 if (!copied)
420 copied = -EIO;
421 break;
422 }
423 if (copy_to_user(buf, page, retval)) {
424 copied = -EFAULT;
425 break;
426 }
427 copied += retval;
428 src += retval;
429 buf += retval;
430 count -= retval;
431 }
432 *ppos = src;
433 mmput(mm);
434
435 out_free:
436 free_page((unsigned long) page);
437 return copied;
438 }
439
440 #define mem_write NULL
441
442 #ifndef mem_write
443 /* This is a security hazard */
mem_write(struct file * file,const char * buf,size_t count,loff_t * ppos)444 static ssize_t mem_write(struct file * file, const char * buf,
445 size_t count, loff_t *ppos)
446 {
447 int copied = 0;
448 char *page;
449 struct task_struct *task = file->f_dentry->d_inode->u.proc_i.task;
450 unsigned long dst = *ppos;
451
452 if (!MAY_PTRACE(task) || !may_ptrace_attach(task))
453 return -ESRCH;
454
455 page = (char *)__get_free_page(GFP_USER);
456 if (!page)
457 return -ENOMEM;
458
459 while (count > 0) {
460 int this_len, retval;
461
462 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
463 if (copy_from_user(page, buf, this_len)) {
464 copied = -EFAULT;
465 break;
466 }
467 retval = access_process_vm(task, dst, page, this_len, 1);
468 if (!retval) {
469 if (!copied)
470 copied = -EIO;
471 break;
472 }
473 copied += retval;
474 buf += retval;
475 dst += retval;
476 count -= retval;
477 }
478 *ppos = dst;
479 free_page((unsigned long) page);
480 return copied;
481 }
482 #endif
483
mem_lseek(struct file * file,loff_t offset,int orig)484 static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
485 {
486 switch (orig) {
487 case 0:
488 file->f_pos = offset;
489 break;
490 case 1:
491 file->f_pos += offset;
492 break;
493 default:
494 return -EINVAL;
495 }
496 force_successful_syscall_return();
497 return file->f_pos;
498 }
499
500 static struct file_operations proc_mem_operations = {
501 llseek: mem_lseek,
502 read: mem_read,
503 write: mem_write,
504 open: mem_open,
505 };
506
507 static struct inode_operations proc_mem_inode_operations = {
508 permission: proc_permission,
509 };
510
proc_pid_follow_link(struct dentry * dentry,struct nameidata * nd)511 static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
512 {
513 struct inode *inode = dentry->d_inode;
514 int error = -EACCES;
515
516 /* We don't need a base pointer in the /proc filesystem */
517 path_release(nd);
518
519 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
520 goto out;
521 error = proc_check_root(inode);
522 if (error)
523 goto out;
524
525 error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
526 nd->last_type = LAST_BIND;
527 out:
528 return error;
529 }
530
do_proc_readlink(struct dentry * dentry,struct vfsmount * mnt,char * buffer,int buflen)531 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
532 char * buffer, int buflen)
533 {
534 struct inode * inode;
535 char * tmp = (char*)__get_free_page(GFP_KERNEL), *path;
536 int len;
537
538 if (!tmp)
539 return -ENOMEM;
540
541 inode = dentry->d_inode;
542 path = d_path(dentry, mnt, tmp, PAGE_SIZE);
543 if (IS_ERR(path)) {
544 free_page((unsigned long)tmp);
545 return PTR_ERR(path);
546 }
547 len = tmp + PAGE_SIZE - 1 - path;
548
549 if (len < buflen)
550 buflen = len;
551 copy_to_user(buffer, path, buflen);
552 free_page((unsigned long)tmp);
553 return buflen;
554 }
555
proc_pid_readlink(struct dentry * dentry,char * buffer,int buflen)556 static int proc_pid_readlink(struct dentry * dentry, char * buffer, int buflen)
557 {
558 int error = -EACCES;
559 struct inode *inode = dentry->d_inode;
560 struct dentry *de;
561 struct vfsmount *mnt = NULL;
562
563 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
564 goto out;
565 error = proc_check_root(inode);
566 if (error)
567 goto out;
568
569 error = inode->u.proc_i.op.proc_get_link(inode, &de, &mnt);
570 if (error)
571 goto out;
572
573 error = do_proc_readlink(de, mnt, buffer, buflen);
574 dput(de);
575 mntput(mnt);
576 out:
577 return error;
578 }
579
580 static struct inode_operations proc_pid_link_inode_operations = {
581 readlink: proc_pid_readlink,
582 follow_link: proc_pid_follow_link
583 };
584
585 struct pid_entry {
586 int type;
587 int len;
588 char *name;
589 mode_t mode;
590 };
591
592 enum pid_directory_inos {
593 PROC_PID_INO = 2,
594 PROC_PID_STATUS,
595 PROC_PID_MEM,
596 PROC_PID_CWD,
597 PROC_PID_ROOT,
598 PROC_PID_EXE,
599 PROC_PID_FD,
600 PROC_PID_ENVIRON,
601 PROC_PID_CMDLINE,
602 PROC_PID_STAT,
603 PROC_PID_STATM,
604 PROC_PID_MAPS,
605 PROC_PID_CPU,
606 PROC_PID_MOUNTS,
607 PROC_PID_FD_DIR = 0x8000, /* 0x8000-0xffff */
608 };
609
610 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
611 static struct pid_entry base_stuff[] = {
612 E(PROC_PID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR),
613 E(PROC_PID_ENVIRON, "environ", S_IFREG|S_IRUSR),
614 E(PROC_PID_STATUS, "status", S_IFREG|S_IRUGO),
615 E(PROC_PID_CMDLINE, "cmdline", S_IFREG|S_IRUGO),
616 E(PROC_PID_STAT, "stat", S_IFREG|S_IRUGO),
617 E(PROC_PID_STATM, "statm", S_IFREG|S_IRUGO),
618 #ifdef CONFIG_SMP
619 E(PROC_PID_CPU, "cpu", S_IFREG|S_IRUGO),
620 #endif
621 E(PROC_PID_MAPS, "maps", S_IFREG|S_IRUGO),
622 E(PROC_PID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
623 E(PROC_PID_CWD, "cwd", S_IFLNK|S_IRWXUGO),
624 E(PROC_PID_ROOT, "root", S_IFLNK|S_IRWXUGO),
625 E(PROC_PID_EXE, "exe", S_IFLNK|S_IRWXUGO),
626 E(PROC_PID_MOUNTS, "mounts", S_IFREG|S_IRUGO),
627 {0,0,NULL,0}
628 };
629 #undef E
630
631 #define NUMBUF 10
632
proc_readfd(struct file * filp,void * dirent,filldir_t filldir)633 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
634 {
635 struct inode *inode = filp->f_dentry->d_inode;
636 struct task_struct *p = inode->u.proc_i.task;
637 unsigned int fd, pid, ino;
638 int retval;
639 char buf[NUMBUF];
640 struct files_struct * files;
641
642 retval = 0;
643 pid = p->pid;
644
645 fd = filp->f_pos;
646 switch (fd) {
647 case 0:
648 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
649 goto out;
650 filp->f_pos++;
651 case 1:
652 ino = fake_ino(pid, PROC_PID_INO);
653 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
654 goto out;
655 filp->f_pos++;
656 default:
657 task_lock(p);
658 files = p->files;
659 if (files)
660 atomic_inc(&files->count);
661 task_unlock(p);
662 if (!files)
663 goto out;
664 read_lock(&files->file_lock);
665 for (fd = filp->f_pos-2;
666 fd < files->max_fds;
667 fd++, filp->f_pos++) {
668 unsigned int i,j;
669
670 if (!fcheck_files(files, fd))
671 continue;
672 read_unlock(&files->file_lock);
673
674 j = NUMBUF;
675 i = fd;
676 do {
677 j--;
678 buf[j] = '0' + (i % 10);
679 i /= 10;
680 } while (i);
681
682 ino = fake_ino(pid, PROC_PID_FD_DIR + fd);
683 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
684 read_lock(&files->file_lock);
685 break;
686 }
687 read_lock(&files->file_lock);
688 }
689 read_unlock(&files->file_lock);
690 put_files_struct(files);
691 }
692 out:
693 return retval;
694 }
695
proc_base_readdir(struct file * filp,void * dirent,filldir_t filldir)696 static int proc_base_readdir(struct file * filp,
697 void * dirent, filldir_t filldir)
698 {
699 int i;
700 int pid;
701 struct inode *inode = filp->f_dentry->d_inode;
702 struct pid_entry *p;
703
704 pid = inode->u.proc_i.task->pid;
705 if (!pid)
706 return -ENOENT;
707 i = filp->f_pos;
708 switch (i) {
709 case 0:
710 if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0)
711 return 0;
712 i++;
713 filp->f_pos++;
714 /* fall through */
715 case 1:
716 if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0)
717 return 0;
718 i++;
719 filp->f_pos++;
720 /* fall through */
721 default:
722 i -= 2;
723 if (i>=sizeof(base_stuff)/sizeof(base_stuff[0]))
724 return 1;
725 p = base_stuff + i;
726 while (p->name) {
727 if (filldir(dirent, p->name, p->len, filp->f_pos,
728 fake_ino(pid, p->type), p->mode >> 12) < 0)
729 return 0;
730 filp->f_pos++;
731 p++;
732 }
733 }
734 return 1;
735 }
736
737 /* building an inode */
738
task_dumpable(struct task_struct * task)739 static int task_dumpable(struct task_struct *task)
740 {
741 int retval;
742
743 task_lock(task);
744 retval = is_dumpable(task);
745 task_unlock(task);
746 return retval;
747 }
748
749
proc_pid_make_inode(struct super_block * sb,struct task_struct * task,int ino)750 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
751 {
752 struct inode * inode;
753
754 /* We need a new inode */
755
756 inode = new_inode(sb);
757 if (!inode)
758 goto out;
759
760 /* Common stuff */
761
762 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
763 inode->i_ino = fake_ino(task->pid, ino);
764
765 if (!task->pid)
766 goto out_unlock;
767
768 /*
769 * grab the reference to task.
770 */
771 get_task_struct(task);
772 inode->u.proc_i.task = task;
773 inode->i_uid = 0;
774 inode->i_gid = 0;
775 if (ino == PROC_PID_INO || task_dumpable(task)) {
776 inode->i_uid = task->euid;
777 inode->i_gid = task->egid;
778 }
779
780 out:
781 return inode;
782
783 out_unlock:
784 inode->u.generic_ip = NULL;
785 iput(inode);
786 return NULL;
787 }
788
789 /* dentry stuff */
790
pid_fd_revalidate(struct dentry * dentry,int flags)791 static int pid_fd_revalidate(struct dentry * dentry, int flags)
792 {
793 return 0;
794 }
795
796 /*
797 * Exceptional case: normally we are not allowed to unhash a busy
798 * directory. In this case, however, we can do it - no aliasing problems
799 * due to the way we treat inodes.
800 */
pid_base_revalidate(struct dentry * dentry,int flags)801 static int pid_base_revalidate(struct dentry * dentry, int flags)
802 {
803 if (dentry->d_inode->u.proc_i.task->pid)
804 return 1;
805 d_drop(dentry);
806 return 0;
807 }
808
pid_delete_dentry(struct dentry * dentry)809 static int pid_delete_dentry(struct dentry * dentry)
810 {
811 return 1;
812 }
813
814 static struct dentry_operations pid_fd_dentry_operations =
815 {
816 d_revalidate: pid_fd_revalidate,
817 d_delete: pid_delete_dentry,
818 };
819
820 static struct dentry_operations pid_dentry_operations =
821 {
822 d_delete: pid_delete_dentry,
823 };
824
825 static struct dentry_operations pid_base_dentry_operations =
826 {
827 d_revalidate: pid_base_revalidate,
828 d_delete: pid_delete_dentry,
829 };
830
831 /* Lookups */
832 #define MAX_MULBY10 ((~0U-9)/10)
833
proc_lookupfd(struct inode * dir,struct dentry * dentry)834 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
835 {
836 unsigned int fd, c;
837 struct task_struct *task = dir->u.proc_i.task;
838 struct file * file;
839 struct files_struct * files;
840 struct inode *inode;
841 const char *name;
842 int len;
843
844 fd = 0;
845 len = dentry->d_name.len;
846 name = dentry->d_name.name;
847 if (len > 1 && *name == '0') goto out;
848 while (len-- > 0) {
849 c = *name - '0';
850 name++;
851 if (c > 9)
852 goto out;
853 if (fd >= MAX_MULBY10)
854 goto out;
855 fd *= 10;
856 fd += c;
857 }
858
859 inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd);
860 if (!inode)
861 goto out;
862 task_lock(task);
863 files = task->files;
864 if (files)
865 atomic_inc(&files->count);
866 task_unlock(task);
867 if (!files)
868 goto out_unlock;
869 read_lock(&files->file_lock);
870 file = inode->u.proc_i.file = fcheck_files(files, fd);
871 if (!file)
872 goto out_unlock2;
873 get_file(file);
874 read_unlock(&files->file_lock);
875 put_files_struct(files);
876 inode->i_op = &proc_pid_link_inode_operations;
877 inode->i_size = 64;
878 inode->i_mode = S_IFLNK;
879 inode->u.proc_i.op.proc_get_link = proc_fd_link;
880 if (file->f_mode & 1)
881 inode->i_mode |= S_IRUSR | S_IXUSR;
882 if (file->f_mode & 2)
883 inode->i_mode |= S_IWUSR | S_IXUSR;
884 dentry->d_op = &pid_fd_dentry_operations;
885 d_add(dentry, inode);
886 return NULL;
887
888 out_unlock2:
889 read_unlock(&files->file_lock);
890 put_files_struct(files);
891 out_unlock:
892 iput(inode);
893 out:
894 return ERR_PTR(-ENOENT);
895 }
896
897 static struct file_operations proc_fd_operations = {
898 read: generic_read_dir,
899 readdir: proc_readfd,
900 };
901
902 /*
903 * proc directories can do almost nothing..
904 */
905 static struct inode_operations proc_fd_inode_operations = {
906 lookup: proc_lookupfd,
907 permission: proc_permission,
908 };
909
proc_base_lookup(struct inode * dir,struct dentry * dentry)910 static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
911 {
912 struct inode *inode;
913 int error;
914 struct task_struct *task = dir->u.proc_i.task;
915 struct pid_entry *p;
916
917 error = -ENOENT;
918 inode = NULL;
919
920 for (p = base_stuff; p->name; p++) {
921 if (p->len != dentry->d_name.len)
922 continue;
923 if (!memcmp(dentry->d_name.name, p->name, p->len))
924 break;
925 }
926 if (!p->name)
927 goto out;
928
929 error = -EINVAL;
930 inode = proc_pid_make_inode(dir->i_sb, task, p->type);
931 if (!inode)
932 goto out;
933
934 inode->i_mode = p->mode;
935 /*
936 * Yes, it does not scale. And it should not. Don't add
937 * new entries into /proc/<pid>/ without very good reasons.
938 */
939 switch(p->type) {
940 case PROC_PID_FD:
941 inode->i_nlink = 2;
942 inode->i_op = &proc_fd_inode_operations;
943 inode->i_fop = &proc_fd_operations;
944 break;
945 case PROC_PID_EXE:
946 inode->i_op = &proc_pid_link_inode_operations;
947 inode->u.proc_i.op.proc_get_link = proc_exe_link;
948 break;
949 case PROC_PID_CWD:
950 inode->i_op = &proc_pid_link_inode_operations;
951 inode->u.proc_i.op.proc_get_link = proc_cwd_link;
952 break;
953 case PROC_PID_ROOT:
954 inode->i_op = &proc_pid_link_inode_operations;
955 inode->u.proc_i.op.proc_get_link = proc_root_link;
956 break;
957 case PROC_PID_ENVIRON:
958 inode->i_fop = &proc_info_file_operations;
959 inode->u.proc_i.op.proc_read = proc_pid_environ;
960 break;
961 case PROC_PID_STATUS:
962 inode->i_fop = &proc_info_file_operations;
963 inode->u.proc_i.op.proc_read = proc_pid_status;
964 break;
965 case PROC_PID_STAT:
966 inode->i_fop = &proc_info_file_operations;
967 inode->u.proc_i.op.proc_read = proc_pid_stat;
968 break;
969 case PROC_PID_CMDLINE:
970 inode->i_fop = &proc_info_file_operations;
971 inode->u.proc_i.op.proc_read = proc_pid_cmdline;
972 break;
973 case PROC_PID_STATM:
974 inode->i_fop = &proc_info_file_operations;
975 inode->u.proc_i.op.proc_read = proc_pid_statm;
976 break;
977 case PROC_PID_MAPS:
978 inode->i_fop = &proc_maps_operations;
979 break;
980 #ifdef CONFIG_SMP
981 case PROC_PID_CPU:
982 inode->i_fop = &proc_info_file_operations;
983 inode->u.proc_i.op.proc_read = proc_pid_cpu;
984 break;
985 #endif
986 case PROC_PID_MEM:
987 inode->i_op = &proc_mem_inode_operations;
988 inode->i_fop = &proc_mem_operations;
989 break;
990 case PROC_PID_MOUNTS:
991 inode->i_fop = &proc_mounts_operations;
992 break;
993 default:
994 printk("procfs: impossible type (%d)",p->type);
995 iput(inode);
996 return ERR_PTR(-EINVAL);
997 }
998 dentry->d_op = &pid_dentry_operations;
999 d_add(dentry, inode);
1000 return NULL;
1001
1002 out:
1003 return ERR_PTR(error);
1004 }
1005
1006 static struct file_operations proc_base_operations = {
1007 read: generic_read_dir,
1008 readdir: proc_base_readdir,
1009 };
1010
1011 static struct inode_operations proc_base_inode_operations = {
1012 lookup: proc_base_lookup,
1013 };
1014
1015 /*
1016 * /proc/self:
1017 */
proc_self_readlink(struct dentry * dentry,char * buffer,int buflen)1018 static int proc_self_readlink(struct dentry *dentry, char *buffer, int buflen)
1019 {
1020 char tmp[30];
1021 sprintf(tmp, "%d", current->pid);
1022 return vfs_readlink(dentry,buffer,buflen,tmp);
1023 }
1024
proc_self_follow_link(struct dentry * dentry,struct nameidata * nd)1025 static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1026 {
1027 char tmp[30];
1028 sprintf(tmp, "%d", current->pid);
1029 return vfs_follow_link(nd,tmp);
1030 }
1031
1032 static struct inode_operations proc_self_inode_operations = {
1033 readlink: proc_self_readlink,
1034 follow_link: proc_self_follow_link,
1035 };
1036
proc_pid_lookup(struct inode * dir,struct dentry * dentry)1037 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
1038 {
1039 unsigned int pid, c;
1040 struct task_struct *task;
1041 const char *name;
1042 struct inode *inode;
1043 int len;
1044
1045 pid = 0;
1046 name = dentry->d_name.name;
1047 len = dentry->d_name.len;
1048 if (len == 4 && !memcmp(name, "self", 4)) {
1049 inode = new_inode(dir->i_sb);
1050 if (!inode)
1051 return ERR_PTR(-ENOMEM);
1052 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1053 inode->i_ino = fake_ino(0, PROC_PID_INO);
1054 inode->u.proc_i.file = NULL;
1055 inode->u.proc_i.task = NULL;
1056 inode->i_mode = S_IFLNK|S_IRWXUGO;
1057 inode->i_uid = inode->i_gid = 0;
1058 inode->i_size = 64;
1059 inode->i_op = &proc_self_inode_operations;
1060 d_add(dentry, inode);
1061 return NULL;
1062 }
1063 while (len-- > 0) {
1064 c = *name - '0';
1065 name++;
1066 if (c > 9)
1067 goto out;
1068 if (pid >= MAX_MULBY10)
1069 goto out;
1070 pid *= 10;
1071 pid += c;
1072 if (!pid)
1073 goto out;
1074 }
1075
1076 read_lock(&tasklist_lock);
1077 task = find_task_by_pid(pid);
1078 if (task)
1079 get_task_struct(task);
1080 read_unlock(&tasklist_lock);
1081 if (!task)
1082 goto out;
1083
1084 inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO);
1085
1086 free_task_struct(task);
1087
1088 if (!inode)
1089 goto out;
1090 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1091 inode->i_op = &proc_base_inode_operations;
1092 inode->i_fop = &proc_base_operations;
1093 inode->i_nlink = 3;
1094 inode->i_flags|=S_IMMUTABLE;
1095
1096 dentry->d_op = &pid_base_dentry_operations;
1097 d_add(dentry, inode);
1098 return NULL;
1099 out:
1100 return ERR_PTR(-ENOENT);
1101 }
1102
proc_pid_delete_inode(struct inode * inode)1103 void proc_pid_delete_inode(struct inode *inode)
1104 {
1105 if (inode->u.proc_i.file)
1106 fput(inode->u.proc_i.file);
1107 if (inode->u.proc_i.task)
1108 free_task_struct(inode->u.proc_i.task);
1109 }
1110
1111 #define PROC_NUMBUF 10
1112 #define PROC_MAXPIDS 20
1113
1114 /*
1115 * Get a few pid's to return for filldir - we need to hold the
1116 * tasklist lock while doing this, and we must release it before
1117 * we actually do the filldir itself, so we use a temp buffer..
1118 */
get_pid_list(int index,unsigned int * pids)1119 static int get_pid_list(int index, unsigned int *pids)
1120 {
1121 struct task_struct *p;
1122 int nr_pids = 0;
1123
1124 index--;
1125 read_lock(&tasklist_lock);
1126 for_each_task(p) {
1127 int pid = p->pid;
1128 if (!pid)
1129 continue;
1130 if (--index >= 0)
1131 continue;
1132 pids[nr_pids] = pid;
1133 nr_pids++;
1134 if (nr_pids >= PROC_MAXPIDS)
1135 break;
1136 }
1137 read_unlock(&tasklist_lock);
1138 return nr_pids;
1139 }
1140
proc_pid_readdir(struct file * filp,void * dirent,filldir_t filldir)1141 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
1142 {
1143 unsigned int pid_array[PROC_MAXPIDS];
1144 char buf[PROC_NUMBUF];
1145 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
1146 unsigned int nr_pids, i;
1147
1148 if (!nr) {
1149 ino_t ino = fake_ino(0,PROC_PID_INO);
1150 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
1151 return 0;
1152 filp->f_pos++;
1153 nr++;
1154 }
1155
1156 nr_pids = get_pid_list(nr, pid_array);
1157
1158 for (i = 0; i < nr_pids; i++) {
1159 int pid = pid_array[i];
1160 ino_t ino = fake_ino(pid,PROC_PID_INO);
1161 unsigned long j = PROC_NUMBUF;
1162
1163 do buf[--j] = '0' + (pid % 10); while (pid/=10);
1164
1165 if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0)
1166 break;
1167 filp->f_pos++;
1168 }
1169 return 0;
1170 }
1171