1 /*
2 * Resizable virtual memory filesystem for Linux.
3 *
4 * Copyright (C) 2000 Linus Torvalds.
5 * 2000 Transmeta Corp.
6 * 2000-2001 Christoph Rohland
7 * 2000-2001 SAP AG
8 * 2002 Red Hat Inc.
9 * Copyright (C) 2002-2003 Hugh Dickins.
10 * Copyright (C) 2002-2003 VERITAS Software Corporation.
11 *
12 * This file is released under the GPL.
13 */
14
15 /*
16 * This virtual memory filesystem is heavily based on the ramfs. It
17 * extends ramfs by the ability to use swap and honor resource limits
18 * which makes it a completely usable filesystem.
19 */
20
21 #include <linux/config.h>
22 #include <linux/module.h>
23 #include <linux/init.h>
24 #include <linux/devfs_fs_kernel.h>
25 #include <linux/fs.h>
26 #include <linux/mm.h>
27 #include <linux/file.h>
28 #include <linux/swap.h>
29 #include <linux/pagemap.h>
30 #include <linux/string.h>
31 #include <linux/locks.h>
32 #include <linux/smp_lock.h>
33
34 #include <asm/uaccess.h>
35 #include <asm/div64.h>
36
37 /* This magic number is used in glibc for posix shared memory */
38 #define TMPFS_MAGIC 0x01021994
39
40 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
41 #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
42 #define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512)
43
44 #define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
45 #define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
46
47 #define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
48
49 /* info->flags needs VM_flags to handle pagein/truncate race efficiently */
50 #define SHMEM_PAGEIN VM_READ
51 #define SHMEM_TRUNCATE VM_WRITE
52
53 /* Pretend that each entry is of this size in directory's i_size */
54 #define BOGO_DIRENT_SIZE 20
55
56 #define SHMEM_SB(sb) (&sb->u.shmem_sb)
57
58 /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
59 enum sgp_type {
60 SGP_READ, /* don't exceed i_size, don't allocate page */
61 SGP_CACHE, /* don't exceed i_size, may allocate page */
62 SGP_WRITE, /* may exceed i_size, may allocate page */
63 };
64
65 static int shmem_getpage(struct inode *inode, unsigned long idx,
66 struct page **pagep, enum sgp_type sgp);
67
68 static struct super_operations shmem_ops;
69 static struct address_space_operations shmem_aops;
70 static struct file_operations shmem_file_operations;
71 static struct inode_operations shmem_inode_operations;
72 static struct inode_operations shmem_dir_inode_operations;
73 static struct vm_operations_struct shmem_vm_ops;
74
75 LIST_HEAD(shmem_inodes);
76 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
77
shmem_free_block(struct inode * inode)78 static void shmem_free_block(struct inode *inode)
79 {
80 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
81 spin_lock(&sbinfo->stat_lock);
82 sbinfo->free_blocks++;
83 inode->i_blocks -= BLOCKS_PER_PAGE;
84 spin_unlock(&sbinfo->stat_lock);
85 }
86
shmem_removepage(struct page * page)87 static void shmem_removepage(struct page *page)
88 {
89 if (!PageLaunder(page) && !PageError(page))
90 shmem_free_block(page->mapping->host);
91 }
92
93 /*
94 * shmem_swp_entry - find the swap vector position in the info structure
95 *
96 * @info: info structure for the inode
97 * @index: index of the page to find
98 * @page: optional page to add to the structure. Has to be preset to
99 * all zeros
100 *
101 * If there is no space allocated yet it will return NULL when
102 * page is 0, else it will use the page for the needed block,
103 * setting it to 0 on return to indicate that it has been used.
104 *
105 * The swap vector is organized the following way:
106 *
107 * There are SHMEM_NR_DIRECT entries directly stored in the
108 * shmem_inode_info structure. So small files do not need an addional
109 * allocation.
110 *
111 * For pages with index > SHMEM_NR_DIRECT there is the pointer
112 * i_indirect which points to a page which holds in the first half
113 * doubly indirect blocks, in the second half triple indirect blocks:
114 *
115 * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
116 * following layout (for SHMEM_NR_DIRECT == 16):
117 *
118 * i_indirect -> dir --> 16-19
119 * | +-> 20-23
120 * |
121 * +-->dir2 --> 24-27
122 * | +-> 28-31
123 * | +-> 32-35
124 * | +-> 36-39
125 * |
126 * +-->dir3 --> 40-43
127 * +-> 44-47
128 * +-> 48-51
129 * +-> 52-55
130 */
shmem_swp_entry(struct shmem_inode_info * info,unsigned long index,unsigned long * page)131 static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, unsigned long *page)
132 {
133 unsigned long offset;
134 void **dir;
135
136 if (index < SHMEM_NR_DIRECT)
137 return info->i_direct+index;
138 if (!info->i_indirect) {
139 if (page) {
140 info->i_indirect = (void **) *page;
141 *page = 0;
142 }
143 return NULL; /* need another page */
144 }
145
146 index -= SHMEM_NR_DIRECT;
147 offset = index % ENTRIES_PER_PAGE;
148 index /= ENTRIES_PER_PAGE;
149 dir = info->i_indirect;
150
151 if (index >= ENTRIES_PER_PAGE/2) {
152 index -= ENTRIES_PER_PAGE/2;
153 dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
154 index %= ENTRIES_PER_PAGE;
155 if (!*dir) {
156 if (page) {
157 *dir = (void *) *page;
158 *page = 0;
159 }
160 return NULL; /* need another page */
161 }
162 dir = (void **) *dir;
163 }
164
165 dir += index;
166 if (!*dir) {
167 if (!page || !*page)
168 return NULL; /* need a page */
169 *dir = (void *) *page;
170 *page = 0;
171 }
172 return (swp_entry_t *) *dir + offset;
173 }
174
175 /*
176 * shmem_swp_alloc - get the position of the swap entry for the page.
177 * If it does not exist allocate the entry.
178 *
179 * @info: info structure for the inode
180 * @index: index of the page to find
181 * @sgp: check and recheck i_size? skip allocation?
182 */
shmem_swp_alloc(struct shmem_inode_info * info,unsigned long index,enum sgp_type sgp)183 static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
184 {
185 struct inode *inode = info->inode;
186 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
187 unsigned long page = 0;
188 swp_entry_t *entry;
189 static const swp_entry_t unswapped = {0};
190
191 if (sgp != SGP_WRITE &&
192 ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size)
193 return ERR_PTR(-EINVAL);
194
195 while (!(entry = shmem_swp_entry(info, index, &page))) {
196 if (sgp == SGP_READ)
197 return (swp_entry_t *) &unswapped;
198 /*
199 * Test free_blocks against 1 not 0, since we have 1 data
200 * page (and perhaps indirect index pages) yet to allocate:
201 * a waste to allocate index if we cannot allocate data.
202 */
203 spin_lock(&sbinfo->stat_lock);
204 if (sbinfo->free_blocks <= 1) {
205 spin_unlock(&sbinfo->stat_lock);
206 return ERR_PTR(-ENOSPC);
207 }
208 sbinfo->free_blocks--;
209 inode->i_blocks += BLOCKS_PER_PAGE;
210 spin_unlock(&sbinfo->stat_lock);
211
212 spin_unlock(&info->lock);
213 page = get_zeroed_page(GFP_USER);
214 spin_lock(&info->lock);
215
216 if (!page) {
217 shmem_free_block(inode);
218 return ERR_PTR(-ENOMEM);
219 }
220 if (sgp != SGP_WRITE &&
221 ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) {
222 entry = ERR_PTR(-EINVAL);
223 break;
224 }
225 if (info->next_index <= index)
226 info->next_index = index + 1;
227 }
228 if (page) {
229 /* another task gave its page, or truncated the file */
230 shmem_free_block(inode);
231 free_page(page);
232 }
233 if (info->next_index <= index && !IS_ERR(entry))
234 info->next_index = index + 1;
235 return entry;
236 }
237
238 /*
239 * shmem_free_swp - free some swap entries in a directory
240 *
241 * @dir: pointer to the directory
242 * @edir: pointer after last entry of the directory
243 */
shmem_free_swp(swp_entry_t * dir,swp_entry_t * edir)244 static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
245 {
246 swp_entry_t *ptr;
247 int freed = 0;
248
249 for (ptr = dir; ptr < edir; ptr++) {
250 if (ptr->val) {
251 free_swap_and_cache(*ptr);
252 *ptr = (swp_entry_t){0};
253 freed++;
254 }
255 }
256 return freed;
257 }
258
259 /*
260 * shmem_truncate_direct - free the swap entries of a whole doubly
261 * indirect block
262 *
263 * @info: the info structure of the inode
264 * @dir: pointer to the pointer to the block
265 * @start: offset to start from (in pages)
266 * @len: how many pages are stored in this block
267 */
268 static inline unsigned long
shmem_truncate_direct(struct shmem_inode_info * info,swp_entry_t *** dir,unsigned long start,unsigned long len)269 shmem_truncate_direct(struct shmem_inode_info *info, swp_entry_t ***dir, unsigned long start, unsigned long len)
270 {
271 swp_entry_t **last, **ptr;
272 unsigned long off, freed_swp, freed = 0;
273
274 last = *dir + (len + ENTRIES_PER_PAGE - 1) / ENTRIES_PER_PAGE;
275 off = start % ENTRIES_PER_PAGE;
276
277 for (ptr = *dir + start/ENTRIES_PER_PAGE; ptr < last; ptr++, off = 0) {
278 if (!*ptr)
279 continue;
280
281 if (info->swapped) {
282 freed_swp = shmem_free_swp(*ptr + off,
283 *ptr + ENTRIES_PER_PAGE);
284 info->swapped -= freed_swp;
285 freed += freed_swp;
286 }
287
288 if (!off) {
289 freed++;
290 free_page((unsigned long) *ptr);
291 *ptr = 0;
292 }
293 }
294
295 if (!start) {
296 freed++;
297 free_page((unsigned long) *dir);
298 *dir = 0;
299 }
300 return freed;
301 }
302
303 /*
304 * shmem_truncate_indirect - truncate an inode
305 *
306 * @info: the info structure of the inode
307 * @index: the index to truncate
308 *
309 * This function locates the last doubly indirect block and calls
310 * then shmem_truncate_direct to do the real work
311 */
312 static inline unsigned long
shmem_truncate_indirect(struct shmem_inode_info * info,unsigned long index)313 shmem_truncate_indirect(struct shmem_inode_info *info, unsigned long index)
314 {
315 swp_entry_t ***base;
316 unsigned long baseidx, start;
317 unsigned long len = info->next_index;
318 unsigned long freed;
319
320 if (len <= SHMEM_NR_DIRECT) {
321 info->next_index = index;
322 if (!info->swapped)
323 return 0;
324 freed = shmem_free_swp(info->i_direct + index,
325 info->i_direct + len);
326 info->swapped -= freed;
327 return freed;
328 }
329
330 if (len <= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT) {
331 len -= SHMEM_NR_DIRECT;
332 base = (swp_entry_t ***) &info->i_indirect;
333 baseidx = SHMEM_NR_DIRECT;
334 } else {
335 len -= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT;
336 BUG_ON(len > ENTRIES_PER_PAGEPAGE*ENTRIES_PER_PAGE/2);
337 baseidx = len - 1;
338 baseidx -= baseidx % ENTRIES_PER_PAGEPAGE;
339 base = (swp_entry_t ***) info->i_indirect +
340 ENTRIES_PER_PAGE/2 + baseidx/ENTRIES_PER_PAGEPAGE;
341 len -= baseidx;
342 baseidx += ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT;
343 }
344
345 if (index > baseidx) {
346 info->next_index = index;
347 start = index - baseidx;
348 } else {
349 info->next_index = baseidx;
350 start = 0;
351 }
352 return *base? shmem_truncate_direct(info, base, start, len): 0;
353 }
354
shmem_truncate(struct inode * inode)355 static void shmem_truncate(struct inode *inode)
356 {
357 struct shmem_inode_info *info = SHMEM_I(inode);
358 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
359 unsigned long freed = 0;
360 unsigned long index;
361
362 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
363 index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
364 if (index >= info->next_index)
365 return;
366
367 spin_lock(&info->lock);
368 while (index < info->next_index)
369 freed += shmem_truncate_indirect(info, index);
370 BUG_ON(info->swapped > info->next_index);
371
372 if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
373 /*
374 * Call truncate_inode_pages again: racing shmem_unuse_inode
375 * may have swizzled a page in from swap since vmtruncate or
376 * generic_delete_inode did it, before we lowered next_index.
377 * Also, though shmem_getpage checks i_size before adding to
378 * cache, no recheck after: so fix the narrow window there too.
379 */
380 info->flags |= SHMEM_TRUNCATE;
381 spin_unlock(&info->lock);
382 truncate_inode_pages(inode->i_mapping, inode->i_size);
383 spin_lock(&info->lock);
384 info->flags &= ~SHMEM_TRUNCATE;
385 }
386
387 spin_unlock(&info->lock);
388 spin_lock(&sbinfo->stat_lock);
389 sbinfo->free_blocks += freed;
390 inode->i_blocks -= freed*BLOCKS_PER_PAGE;
391 spin_unlock(&sbinfo->stat_lock);
392 }
393
shmem_notify_change(struct dentry * dentry,struct iattr * attr)394 static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
395 {
396 struct inode *inode = dentry->d_inode;
397 struct page *page = NULL;
398 int error;
399
400 if (attr->ia_valid & ATTR_SIZE) {
401 if (attr->ia_size < inode->i_size) {
402 /*
403 * If truncating down to a partial page, then
404 * if that page is already allocated, hold it
405 * in memory until the truncation is over, so
406 * truncate_partial_page cannnot miss it were
407 * it assigned to swap.
408 */
409 if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
410 (void) shmem_getpage(inode,
411 attr->ia_size>>PAGE_CACHE_SHIFT,
412 &page, SGP_READ);
413 }
414 /*
415 * Reset SHMEM_PAGEIN flag so that shmem_truncate can
416 * detect if any pages might have been added to cache
417 * after truncate_inode_pages. But we needn't bother
418 * if it's being fully truncated to zero-length: the
419 * nrpages check is efficient enough in that case.
420 */
421 if (attr->ia_size) {
422 struct shmem_inode_info *info = SHMEM_I(inode);
423 spin_lock(&info->lock);
424 info->flags &= ~SHMEM_PAGEIN;
425 spin_unlock(&info->lock);
426 }
427 }
428 }
429
430 error = inode_change_ok(inode, attr);
431 if (!error)
432 error = inode_setattr(inode, attr);
433 if (page)
434 page_cache_release(page);
435 return error;
436 }
437
shmem_delete_inode(struct inode * inode)438 static void shmem_delete_inode(struct inode *inode)
439 {
440 struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
441 struct shmem_inode_info *info = SHMEM_I(inode);
442
443 if (inode->i_op->truncate == shmem_truncate) {
444 spin_lock(&shmem_ilock);
445 list_del(&info->list);
446 spin_unlock(&shmem_ilock);
447 inode->i_size = 0;
448 shmem_truncate(inode);
449 }
450 BUG_ON(inode->i_blocks);
451 spin_lock(&sbinfo->stat_lock);
452 sbinfo->free_inodes++;
453 spin_unlock(&sbinfo->stat_lock);
454 clear_inode(inode);
455 }
456
shmem_find_swp(swp_entry_t entry,swp_entry_t * dir,swp_entry_t * edir)457 static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
458 {
459 swp_entry_t *ptr;
460
461 for (ptr = dir; ptr < edir; ptr++) {
462 if (ptr->val == entry.val)
463 return ptr - dir;
464 }
465 return -1;
466 }
467
shmem_unuse_inode(struct shmem_inode_info * info,swp_entry_t entry,struct page * page)468 static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
469 {
470 struct inode *inode;
471 struct address_space *mapping;
472 swp_entry_t *ptr;
473 unsigned long idx;
474 int offset;
475
476 idx = 0;
477 ptr = info->i_direct;
478 spin_lock(&info->lock);
479 offset = info->next_index;
480 if (offset > SHMEM_NR_DIRECT)
481 offset = SHMEM_NR_DIRECT;
482 offset = shmem_find_swp(entry, ptr, ptr + offset);
483 if (offset >= 0)
484 goto found;
485
486 for (idx = SHMEM_NR_DIRECT; idx < info->next_index;
487 idx += ENTRIES_PER_PAGE) {
488 ptr = shmem_swp_entry(info, idx, NULL);
489 if (!ptr)
490 continue;
491 offset = info->next_index - idx;
492 if (offset > ENTRIES_PER_PAGE)
493 offset = ENTRIES_PER_PAGE;
494 offset = shmem_find_swp(entry, ptr, ptr + offset);
495 if (offset >= 0)
496 goto found;
497 }
498 spin_unlock(&info->lock);
499 return 0;
500 found:
501 idx += offset;
502 inode = info->inode;
503 mapping = inode->i_mapping;
504 delete_from_swap_cache(page);
505 if (add_to_page_cache_unique(page,
506 mapping, idx, page_hash(mapping, idx)) == 0) {
507 info->flags |= SHMEM_PAGEIN;
508 ptr[offset].val = 0;
509 info->swapped--;
510 } else if (add_to_swap_cache(page, entry) != 0)
511 BUG();
512 spin_unlock(&info->lock);
513 SetPageUptodate(page);
514 /*
515 * Decrement swap count even when the entry is left behind:
516 * try_to_unuse will skip over mms, then reincrement count.
517 */
518 swap_free(entry);
519 return 1;
520 }
521
522 /*
523 * shmem_unuse() search for an eventually swapped out shmem page.
524 */
shmem_unuse(swp_entry_t entry,struct page * page)525 int shmem_unuse(swp_entry_t entry, struct page *page)
526 {
527 struct list_head *p;
528 struct shmem_inode_info *info;
529 int found = 0;
530
531 spin_lock(&shmem_ilock);
532 list_for_each(p, &shmem_inodes) {
533 info = list_entry(p, struct shmem_inode_info, list);
534
535 if (info->swapped && shmem_unuse_inode(info, entry, page)) {
536 /* move head to start search for next from here */
537 list_move_tail(&shmem_inodes, &info->list);
538 found = 1;
539 break;
540 }
541 }
542 spin_unlock(&shmem_ilock);
543 return found;
544 }
545
546 /*
547 * Move the page from the page cache to the swap cache.
548 */
shmem_writepage(struct page * page)549 static int shmem_writepage(struct page *page)
550 {
551 struct shmem_inode_info *info;
552 swp_entry_t *entry, swap;
553 struct address_space *mapping;
554 unsigned long index;
555 struct inode *inode;
556
557 BUG_ON(!PageLocked(page));
558 if (!PageLaunder(page))
559 goto fail;
560
561 mapping = page->mapping;
562 index = page->index;
563 inode = mapping->host;
564 info = SHMEM_I(inode);
565 if (info->flags & VM_LOCKED)
566 goto fail;
567 getswap:
568 swap = get_swap_page();
569 if (!swap.val)
570 goto fail;
571
572 spin_lock(&info->lock);
573 if (index >= info->next_index) {
574 BUG_ON(!(info->flags & SHMEM_TRUNCATE));
575 spin_unlock(&info->lock);
576 swap_free(swap);
577 goto fail;
578 }
579 entry = shmem_swp_entry(info, index, NULL);
580 BUG_ON(!entry);
581 BUG_ON(entry->val);
582
583 /* Remove it from the page cache */
584 remove_inode_page(page);
585 page_cache_release(page);
586
587 /* Add it to the swap cache */
588 if (add_to_swap_cache(page, swap) != 0) {
589 /*
590 * Raced with "speculative" read_swap_cache_async.
591 * Add page back to page cache, unref swap, try again.
592 */
593 add_to_page_cache_locked(page, mapping, index);
594 info->flags |= SHMEM_PAGEIN;
595 spin_unlock(&info->lock);
596 swap_free(swap);
597 goto getswap;
598 }
599
600 *entry = swap;
601 info->swapped++;
602 spin_unlock(&info->lock);
603 SetPageUptodate(page);
604 set_page_dirty(page);
605 UnlockPage(page);
606 return 0;
607 fail:
608 return fail_writepage(page);
609 }
610
611 /*
612 * shmem_getpage - either get the page from swap or allocate a new one
613 *
614 * If we allocate a new one we do not mark it dirty. That's up to the
615 * vm. If we swap it in we mark it dirty since we also free the swap
616 * entry since a page cannot live in both the swap and page cache
617 */
shmem_getpage(struct inode * inode,unsigned long idx,struct page ** pagep,enum sgp_type sgp)618 static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp)
619 {
620 struct address_space *mapping = inode->i_mapping;
621 struct shmem_inode_info *info = SHMEM_I(inode);
622 struct shmem_sb_info *sbinfo;
623 struct page *filepage = *pagep;
624 struct page *swappage;
625 swp_entry_t *entry;
626 swp_entry_t swap;
627 int error = 0;
628
629 if (idx >= SHMEM_MAX_INDEX) {
630 error = -EFBIG;
631 goto failed;
632 }
633
634 /*
635 * Normally, filepage is NULL on entry, and either found
636 * uptodate immediately, or allocated and zeroed, or read
637 * in under swappage, which is then assigned to filepage.
638 * But shmem_readpage and shmem_prepare_write pass in a locked
639 * filepage, which may be found not uptodate by other callers
640 * too, and may need to be copied from the swappage read in.
641 */
642 repeat:
643 if (!filepage)
644 filepage = find_lock_page(mapping, idx);
645 if (filepage && Page_Uptodate(filepage))
646 goto done;
647
648 spin_lock(&info->lock);
649 entry = shmem_swp_alloc(info, idx, sgp);
650 if (IS_ERR(entry)) {
651 spin_unlock(&info->lock);
652 error = PTR_ERR(entry);
653 goto failed;
654 }
655 swap = *entry;
656
657 if (swap.val) {
658 /* Look it up and read it in.. */
659 swappage = lookup_swap_cache(swap);
660 if (!swappage) {
661 spin_unlock(&info->lock);
662 swapin_readahead(swap);
663 swappage = read_swap_cache_async(swap);
664 if (!swappage) {
665 spin_lock(&info->lock);
666 entry = shmem_swp_alloc(info, idx, sgp);
667 if (IS_ERR(entry))
668 error = PTR_ERR(entry);
669 else if (entry->val == swap.val)
670 error = -ENOMEM;
671 spin_unlock(&info->lock);
672 if (error)
673 goto failed;
674 goto repeat;
675 }
676 wait_on_page(swappage);
677 page_cache_release(swappage);
678 goto repeat;
679 }
680
681 /* We have to do this with page locked to prevent races */
682 if (TryLockPage(swappage)) {
683 spin_unlock(&info->lock);
684 wait_on_page(swappage);
685 page_cache_release(swappage);
686 goto repeat;
687 }
688 if (!Page_Uptodate(swappage)) {
689 spin_unlock(&info->lock);
690 UnlockPage(swappage);
691 page_cache_release(swappage);
692 error = -EIO;
693 goto failed;
694 }
695
696 delete_from_swap_cache(swappage);
697 if (filepage) {
698 entry->val = 0;
699 info->swapped--;
700 spin_unlock(&info->lock);
701 flush_page_to_ram(swappage);
702 copy_highpage(filepage, swappage);
703 UnlockPage(swappage);
704 page_cache_release(swappage);
705 flush_dcache_page(filepage);
706 SetPageUptodate(filepage);
707 SetPageDirty(filepage);
708 swap_free(swap);
709 } else if (add_to_page_cache_unique(swappage,
710 mapping, idx, page_hash(mapping, idx)) == 0) {
711 info->flags |= SHMEM_PAGEIN;
712 entry->val = 0;
713 info->swapped--;
714 spin_unlock(&info->lock);
715 filepage = swappage;
716 SetPageUptodate(filepage);
717 SetPageDirty(filepage);
718 swap_free(swap);
719 } else {
720 if (add_to_swap_cache(swappage, swap) != 0)
721 BUG();
722 spin_unlock(&info->lock);
723 SetPageUptodate(swappage);
724 SetPageDirty(swappage);
725 UnlockPage(swappage);
726 page_cache_release(swappage);
727 goto repeat;
728 }
729 } else if (sgp == SGP_READ && !filepage) {
730 filepage = find_get_page(mapping, idx);
731 if (filepage &&
732 (!Page_Uptodate(filepage) || TryLockPage(filepage))) {
733 spin_unlock(&info->lock);
734 wait_on_page(filepage);
735 page_cache_release(filepage);
736 filepage = NULL;
737 goto repeat;
738 }
739 spin_unlock(&info->lock);
740 } else {
741 sbinfo = SHMEM_SB(inode->i_sb);
742 spin_lock(&sbinfo->stat_lock);
743 if (sbinfo->free_blocks == 0) {
744 spin_unlock(&sbinfo->stat_lock);
745 spin_unlock(&info->lock);
746 error = -ENOSPC;
747 goto failed;
748 }
749 sbinfo->free_blocks--;
750 inode->i_blocks += BLOCKS_PER_PAGE;
751 spin_unlock(&sbinfo->stat_lock);
752
753 if (!filepage) {
754 spin_unlock(&info->lock);
755 filepage = page_cache_alloc(mapping);
756 if (!filepage) {
757 shmem_free_block(inode);
758 error = -ENOMEM;
759 goto failed;
760 }
761
762 spin_lock(&info->lock);
763 entry = shmem_swp_alloc(info, idx, sgp);
764 if (IS_ERR(entry))
765 error = PTR_ERR(entry);
766 if (error || entry->val ||
767 add_to_page_cache_unique(filepage,
768 mapping, idx, page_hash(mapping, idx)) != 0) {
769 spin_unlock(&info->lock);
770 page_cache_release(filepage);
771 shmem_free_block(inode);
772 filepage = NULL;
773 if (error)
774 goto failed;
775 goto repeat;
776 }
777 info->flags |= SHMEM_PAGEIN;
778 }
779
780 spin_unlock(&info->lock);
781 clear_highpage(filepage);
782 flush_dcache_page(filepage);
783 SetPageUptodate(filepage);
784 }
785 done:
786 if (!*pagep) {
787 if (filepage)
788 UnlockPage(filepage);
789 else
790 filepage = ZERO_PAGE(0);
791 *pagep = filepage;
792 }
793 if (PageError(filepage))
794 ClearPageError(filepage);
795 return 0;
796
797 failed:
798 if (filepage) {
799 if (*pagep == filepage)
800 SetPageError(filepage);
801 else {
802 UnlockPage(filepage);
803 page_cache_release(filepage);
804 }
805 }
806 return error;
807 }
808
shmem_nopage(struct vm_area_struct * vma,unsigned long address,int unused)809 struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int unused)
810 {
811 struct inode *inode = vma->vm_file->f_dentry->d_inode;
812 struct page *page = NULL;
813 unsigned long idx;
814 int error;
815
816 idx = (address - vma->vm_start) >> PAGE_SHIFT;
817 idx += vma->vm_pgoff;
818 idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
819
820 error = shmem_getpage(inode, idx, &page, SGP_CACHE);
821 if (error)
822 return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
823
824 mark_page_accessed(page);
825 flush_page_to_ram(page);
826 return page;
827 }
828
shmem_lock(struct file * file,int lock)829 void shmem_lock(struct file *file, int lock)
830 {
831 struct inode *inode = file->f_dentry->d_inode;
832 struct shmem_inode_info *info = SHMEM_I(inode);
833
834 spin_lock(&info->lock);
835 if (lock)
836 info->flags |= VM_LOCKED;
837 else
838 info->flags &= ~VM_LOCKED;
839 spin_unlock(&info->lock);
840 }
841
shmem_mmap(struct file * file,struct vm_area_struct * vma)842 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
843 {
844 struct vm_operations_struct *ops;
845 struct inode *inode = file->f_dentry->d_inode;
846
847 ops = &shmem_vm_ops;
848 if (!S_ISREG(inode->i_mode))
849 return -EACCES;
850 UPDATE_ATIME(inode);
851 vma->vm_ops = ops;
852 return 0;
853 }
854
shmem_get_inode(struct super_block * sb,int mode,int dev)855 static struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
856 {
857 struct inode *inode;
858 struct shmem_inode_info *info;
859 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
860
861 spin_lock(&sbinfo->stat_lock);
862 if (!sbinfo->free_inodes) {
863 spin_unlock(&sbinfo->stat_lock);
864 return NULL;
865 }
866 sbinfo->free_inodes--;
867 spin_unlock(&sbinfo->stat_lock);
868
869 inode = new_inode(sb);
870 if (inode) {
871 inode->i_mode = mode;
872 inode->i_uid = current->fsuid;
873 inode->i_gid = current->fsgid;
874 inode->i_blksize = PAGE_CACHE_SIZE;
875 inode->i_blocks = 0;
876 inode->i_rdev = NODEV;
877 inode->i_mapping->a_ops = &shmem_aops;
878 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
879 info = SHMEM_I(inode);
880 info->inode = inode;
881 spin_lock_init(&info->lock);
882 switch (mode & S_IFMT) {
883 default:
884 init_special_inode(inode, mode, dev);
885 break;
886 case S_IFREG:
887 inode->i_op = &shmem_inode_operations;
888 inode->i_fop = &shmem_file_operations;
889 spin_lock(&shmem_ilock);
890 list_add_tail(&info->list, &shmem_inodes);
891 spin_unlock(&shmem_ilock);
892 break;
893 case S_IFDIR:
894 inode->i_nlink++;
895 /* Some things misbehave if size == 0 on a directory */
896 inode->i_size = 2 * BOGO_DIRENT_SIZE;
897 inode->i_op = &shmem_dir_inode_operations;
898 inode->i_fop = &dcache_dir_ops;
899 break;
900 case S_IFLNK:
901 break;
902 }
903 }
904 return inode;
905 }
906
shmem_set_size(struct shmem_sb_info * info,unsigned long max_blocks,unsigned long max_inodes)907 static int shmem_set_size(struct shmem_sb_info *info,
908 unsigned long max_blocks, unsigned long max_inodes)
909 {
910 int error;
911 unsigned long blocks, inodes;
912
913 spin_lock(&info->stat_lock);
914 blocks = info->max_blocks - info->free_blocks;
915 inodes = info->max_inodes - info->free_inodes;
916 error = -EINVAL;
917 if (max_blocks < blocks)
918 goto out;
919 if (max_inodes < inodes)
920 goto out;
921 error = 0;
922 info->max_blocks = max_blocks;
923 info->free_blocks = max_blocks - blocks;
924 info->max_inodes = max_inodes;
925 info->free_inodes = max_inodes - inodes;
926 out:
927 spin_unlock(&info->stat_lock);
928 return error;
929 }
930
931 #ifdef CONFIG_TMPFS
932
933 static struct inode_operations shmem_symlink_inode_operations;
934 static struct inode_operations shmem_symlink_inline_operations;
935
936 /*
937 * tmpfs itself makes no use of generic_file_read, generic_file_mmap
938 * or generic_file_write; but shmem_readpage, shmem_prepare_write and
939 * shmem_commit_write let a tmpfs file be used below the loop driver,
940 * and shmem_readpage lets a tmpfs file be used by sendfile.
941 */
942 static int
shmem_readpage(struct file * file,struct page * page)943 shmem_readpage(struct file *file, struct page *page)
944 {
945 struct inode *inode = page->mapping->host;
946 int error = shmem_getpage(inode, page->index, &page, SGP_CACHE);
947 UnlockPage(page);
948 return error;
949 }
950
951 static int
shmem_prepare_write(struct file * file,struct page * page,unsigned offset,unsigned to)952 shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
953 {
954 struct inode *inode = page->mapping->host;
955 return shmem_getpage(inode, page->index, &page, SGP_WRITE);
956 }
957
958 static int
shmem_commit_write(struct file * file,struct page * page,unsigned offset,unsigned to)959 shmem_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
960 {
961 struct inode *inode = page->mapping->host;
962 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
963
964 if (pos > inode->i_size)
965 inode->i_size = pos;
966 SetPageDirty(page);
967 return 0;
968 }
969
970 static ssize_t
shmem_file_write(struct file * file,const char * buf,size_t count,loff_t * ppos)971 shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
972 {
973 struct inode *inode = file->f_dentry->d_inode;
974 loff_t pos;
975 unsigned long written;
976 ssize_t err;
977
978 if ((ssize_t) count < 0)
979 return -EINVAL;
980
981 if (!access_ok(VERIFY_READ, buf, count))
982 return -EFAULT;
983
984 down(&inode->i_sem);
985
986 pos = *ppos;
987 written = 0;
988
989 err = precheck_file_write(file, inode, &count, &pos);
990 if (err || !count)
991 goto out;
992
993 remove_suid(inode);
994 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
995
996 do {
997 struct page *page = NULL;
998 unsigned long bytes, index, offset;
999 char *kaddr;
1000 int left;
1001
1002 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
1003 index = pos >> PAGE_CACHE_SHIFT;
1004 bytes = PAGE_CACHE_SIZE - offset;
1005 if (bytes > count)
1006 bytes = count;
1007
1008 /*
1009 * We don't hold page lock across copy from user -
1010 * what would it guard against? - so no deadlock here.
1011 */
1012
1013 err = shmem_getpage(inode, index, &page, SGP_WRITE);
1014 if (err)
1015 break;
1016
1017 kaddr = kmap(page);
1018 left = __copy_from_user(kaddr + offset, buf, bytes);
1019 kunmap(page);
1020
1021 written += bytes;
1022 count -= bytes;
1023 pos += bytes;
1024 buf += bytes;
1025 if (pos > inode->i_size)
1026 inode->i_size = pos;
1027
1028 flush_dcache_page(page);
1029 SetPageDirty(page);
1030 SetPageReferenced(page);
1031 page_cache_release(page);
1032
1033 if (left) {
1034 pos -= left;
1035 written -= left;
1036 err = -EFAULT;
1037 break;
1038 }
1039 } while (count);
1040
1041 *ppos = pos;
1042 if (written)
1043 err = written;
1044 out:
1045 up(&inode->i_sem);
1046 return err;
1047 }
1048
do_shmem_file_read(struct file * filp,loff_t * ppos,read_descriptor_t * desc)1049 static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc)
1050 {
1051 struct inode *inode = filp->f_dentry->d_inode;
1052 struct address_space *mapping = inode->i_mapping;
1053 unsigned long index, offset;
1054 loff_t pos = *ppos;
1055
1056 if (unlikely(pos < 0))
1057 return;
1058
1059 index = pos >> PAGE_CACHE_SHIFT;
1060 offset = pos & ~PAGE_CACHE_MASK;
1061
1062 for (;;) {
1063 struct page *page = NULL;
1064 unsigned long end_index, nr, ret;
1065
1066 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
1067 if (index > end_index)
1068 break;
1069 if (index == end_index) {
1070 nr = inode->i_size & ~PAGE_CACHE_MASK;
1071 if (nr <= offset)
1072 break;
1073 }
1074
1075 desc->error = shmem_getpage(inode, index, &page, SGP_READ);
1076 if (desc->error) {
1077 if (desc->error == -EINVAL)
1078 desc->error = 0;
1079 break;
1080 }
1081
1082 /*
1083 * We must evaluate after, since reads (unlike writes)
1084 * are called without i_sem protection against truncate
1085 */
1086 nr = PAGE_CACHE_SIZE;
1087 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
1088 if (index == end_index) {
1089 nr = inode->i_size & ~PAGE_CACHE_MASK;
1090 if (nr <= offset) {
1091 page_cache_release(page);
1092 break;
1093 }
1094 }
1095 nr -= offset;
1096
1097 if (page != ZERO_PAGE(0)) {
1098 /*
1099 * If users can be writing to this page using arbitrary
1100 * virtual addresses, take care about potential aliasing
1101 * before reading the page on the kernel side.
1102 */
1103 if (mapping->i_mmap_shared != NULL)
1104 flush_dcache_page(page);
1105 /*
1106 * Mark the page accessed if we read the
1107 * beginning or we just did an lseek.
1108 */
1109 if (!offset || !filp->f_reada)
1110 mark_page_accessed(page);
1111 }
1112
1113 /*
1114 * Ok, we have the page, and it's up-to-date, so
1115 * now we can copy it to user space...
1116 *
1117 * The actor routine returns how many bytes were actually used..
1118 * NOTE! This may not be the same as how much of a user buffer
1119 * we filled up (we may be padding etc), so we can only update
1120 * "pos" here (the actor routine has to update the user buffer
1121 * pointers and the remaining count).
1122 */
1123 ret = file_read_actor(desc, page, offset, nr);
1124 offset += ret;
1125 index += offset >> PAGE_CACHE_SHIFT;
1126 offset &= ~PAGE_CACHE_MASK;
1127
1128 page_cache_release(page);
1129 if (ret != nr || !desc->count)
1130 break;
1131 }
1132
1133 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1134 filp->f_reada = 1;
1135 UPDATE_ATIME(inode);
1136 }
1137
shmem_file_read(struct file * filp,char * buf,size_t count,loff_t * ppos)1138 static ssize_t shmem_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
1139 {
1140 read_descriptor_t desc;
1141
1142 if ((ssize_t) count < 0)
1143 return -EINVAL;
1144 if (!access_ok(VERIFY_WRITE, buf, count))
1145 return -EFAULT;
1146 if (!count)
1147 return 0;
1148
1149 desc.written = 0;
1150 desc.count = count;
1151 desc.buf = buf;
1152 desc.error = 0;
1153
1154 do_shmem_file_read(filp, ppos, &desc);
1155 if (desc.written)
1156 return desc.written;
1157 return desc.error;
1158 }
1159
shmem_statfs(struct super_block * sb,struct statfs * buf)1160 static int shmem_statfs(struct super_block *sb, struct statfs *buf)
1161 {
1162 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1163
1164 buf->f_type = TMPFS_MAGIC;
1165 buf->f_bsize = PAGE_CACHE_SIZE;
1166 spin_lock(&sbinfo->stat_lock);
1167 buf->f_blocks = sbinfo->max_blocks;
1168 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
1169 buf->f_files = sbinfo->max_inodes;
1170 buf->f_ffree = sbinfo->free_inodes;
1171 spin_unlock(&sbinfo->stat_lock);
1172 buf->f_namelen = NAME_MAX;
1173 return 0;
1174 }
1175
1176 /*
1177 * Retaining negative dentries for an in-memory filesystem just wastes
1178 * memory and lookup time: arrange for them to be deleted immediately.
1179 */
shmem_delete_dentry(struct dentry * dentry)1180 static int shmem_delete_dentry(struct dentry *dentry)
1181 {
1182 return 1;
1183 }
1184
1185 /*
1186 * Lookup the data. This is trivial - if the dentry didn't already
1187 * exist, we know it is negative. Set d_op to delete negative dentries.
1188 */
shmem_lookup(struct inode * dir,struct dentry * dentry)1189 static struct dentry *shmem_lookup(struct inode *dir, struct dentry *dentry)
1190 {
1191 static struct dentry_operations shmem_dentry_operations = {
1192 .d_delete = shmem_delete_dentry,
1193 };
1194
1195 if (dentry->d_name.len > NAME_MAX)
1196 return ERR_PTR(-ENAMETOOLONG);
1197 dentry->d_op = &shmem_dentry_operations;
1198 d_add(dentry, NULL);
1199 return NULL;
1200 }
1201
1202 /*
1203 * File creation. Allocate an inode, and we're done..
1204 */
shmem_mknod(struct inode * dir,struct dentry * dentry,int mode,int dev)1205 static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev)
1206 {
1207 struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
1208 int error = -ENOSPC;
1209
1210 if (inode) {
1211 if (dir->i_mode & S_ISGID) {
1212 inode->i_gid = dir->i_gid;
1213 if (S_ISDIR(mode))
1214 inode->i_mode |= S_ISGID;
1215 }
1216 dir->i_size += BOGO_DIRENT_SIZE;
1217 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1218 d_instantiate(dentry, inode);
1219 dget(dentry); /* Extra count - pin the dentry in core */
1220 error = 0;
1221 }
1222 return error;
1223 }
1224
shmem_mkdir(struct inode * dir,struct dentry * dentry,int mode)1225 static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1226 {
1227 int error;
1228
1229 if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1230 return error;
1231 dir->i_nlink++;
1232 return 0;
1233 }
1234
shmem_create(struct inode * dir,struct dentry * dentry,int mode)1235 static int shmem_create(struct inode *dir, struct dentry *dentry, int mode)
1236 {
1237 return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1238 }
1239
1240 /*
1241 * Link a file..
1242 */
shmem_link(struct dentry * old_dentry,struct inode * dir,struct dentry * dentry)1243 static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1244 {
1245 struct inode *inode = old_dentry->d_inode;
1246
1247 if (S_ISDIR(inode->i_mode))
1248 return -EPERM;
1249
1250 dir->i_size += BOGO_DIRENT_SIZE;
1251 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1252 inode->i_nlink++;
1253 atomic_inc(&inode->i_count); /* New dentry reference */
1254 dget(dentry); /* Extra pinning count for the created dentry */
1255 d_instantiate(dentry, inode);
1256 return 0;
1257 }
1258
shmem_positive(struct dentry * dentry)1259 static inline int shmem_positive(struct dentry *dentry)
1260 {
1261 return dentry->d_inode && !d_unhashed(dentry);
1262 }
1263
1264 /*
1265 * Check that a directory is empty (this works
1266 * for regular files too, they'll just always be
1267 * considered empty..).
1268 *
1269 * Note that an empty directory can still have
1270 * children, they just all have to be negative..
1271 */
shmem_empty(struct dentry * dentry)1272 static int shmem_empty(struct dentry *dentry)
1273 {
1274 struct list_head *list;
1275
1276 spin_lock(&dcache_lock);
1277 list = dentry->d_subdirs.next;
1278
1279 while (list != &dentry->d_subdirs) {
1280 struct dentry *de = list_entry(list, struct dentry, d_child);
1281
1282 if (shmem_positive(de)) {
1283 spin_unlock(&dcache_lock);
1284 return 0;
1285 }
1286 list = list->next;
1287 }
1288 spin_unlock(&dcache_lock);
1289 return 1;
1290 }
1291
shmem_unlink(struct inode * dir,struct dentry * dentry)1292 static int shmem_unlink(struct inode *dir, struct dentry *dentry)
1293 {
1294 struct inode *inode = dentry->d_inode;
1295
1296 dir->i_size -= BOGO_DIRENT_SIZE;
1297 inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1298 inode->i_nlink--;
1299 dput(dentry); /* Undo the count from "create" - this does all the work */
1300 return 0;
1301 }
1302
shmem_rmdir(struct inode * dir,struct dentry * dentry)1303 static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
1304 {
1305 if (!shmem_empty(dentry))
1306 return -ENOTEMPTY;
1307
1308 dir->i_nlink--;
1309 return shmem_unlink(dir, dentry);
1310 }
1311
1312 /*
1313 * The VFS layer already does all the dentry stuff for rename,
1314 * we just have to decrement the usage count for the target if
1315 * it exists so that the VFS layer correctly free's it when it
1316 * gets overwritten.
1317 */
shmem_rename(struct inode * old_dir,struct dentry * old_dentry,struct inode * new_dir,struct dentry * new_dentry)1318 static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
1319 {
1320 struct inode *inode = old_dentry->d_inode;
1321 int they_are_dirs = S_ISDIR(inode->i_mode);
1322
1323 if (!shmem_empty(new_dentry))
1324 return -ENOTEMPTY;
1325
1326 if (new_dentry->d_inode) {
1327 (void) shmem_unlink(new_dir, new_dentry);
1328 if (they_are_dirs)
1329 old_dir->i_nlink--;
1330 } else if (they_are_dirs) {
1331 old_dir->i_nlink--;
1332 new_dir->i_nlink++;
1333 }
1334
1335 old_dir->i_size -= BOGO_DIRENT_SIZE;
1336 new_dir->i_size += BOGO_DIRENT_SIZE;
1337 old_dir->i_ctime = old_dir->i_mtime =
1338 new_dir->i_ctime = new_dir->i_mtime =
1339 inode->i_ctime = CURRENT_TIME;
1340 return 0;
1341 }
1342
shmem_symlink(struct inode * dir,struct dentry * dentry,const char * symname)1343 static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1344 {
1345 int error;
1346 int len;
1347 struct inode *inode;
1348 struct page *page = NULL;
1349 char *kaddr;
1350 struct shmem_inode_info *info;
1351
1352 len = strlen(symname) + 1;
1353 if (len > PAGE_CACHE_SIZE)
1354 return -ENAMETOOLONG;
1355
1356 inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
1357 if (!inode)
1358 return -ENOSPC;
1359
1360 info = SHMEM_I(inode);
1361 inode->i_size = len-1;
1362 if (len <= sizeof(struct shmem_inode_info)) {
1363 /* do it inline */
1364 memcpy(info, symname, len);
1365 inode->i_op = &shmem_symlink_inline_operations;
1366 } else {
1367 error = shmem_getpage(inode, 0, &page, SGP_WRITE);
1368 if (error) {
1369 iput(inode);
1370 return error;
1371 }
1372 inode->i_op = &shmem_symlink_inode_operations;
1373 spin_lock(&shmem_ilock);
1374 list_add_tail(&info->list, &shmem_inodes);
1375 spin_unlock(&shmem_ilock);
1376 kaddr = kmap(page);
1377 memcpy(kaddr, symname, len);
1378 kunmap(page);
1379 SetPageDirty(page);
1380 page_cache_release(page);
1381 }
1382 if (dir->i_mode & S_ISGID)
1383 inode->i_gid = dir->i_gid;
1384 dir->i_size += BOGO_DIRENT_SIZE;
1385 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1386 d_instantiate(dentry, inode);
1387 dget(dentry);
1388 return 0;
1389 }
1390
shmem_readlink_inline(struct dentry * dentry,char * buffer,int buflen)1391 static int shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen)
1392 {
1393 return vfs_readlink(dentry, buffer, buflen, (const char *)SHMEM_I(dentry->d_inode));
1394 }
1395
shmem_follow_link_inline(struct dentry * dentry,struct nameidata * nd)1396 static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
1397 {
1398 return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode));
1399 }
1400
shmem_readlink(struct dentry * dentry,char * buffer,int buflen)1401 static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
1402 {
1403 struct page *page = NULL;
1404 int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
1405 if (res)
1406 return res;
1407 res = vfs_readlink(dentry, buffer, buflen, kmap(page));
1408 kunmap(page);
1409 mark_page_accessed(page);
1410 page_cache_release(page);
1411 return res;
1412 }
1413
shmem_follow_link(struct dentry * dentry,struct nameidata * nd)1414 static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1415 {
1416 struct page *page = NULL;
1417 int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
1418 if (res)
1419 return res;
1420 res = vfs_follow_link(nd, kmap(page));
1421 kunmap(page);
1422 mark_page_accessed(page);
1423 page_cache_release(page);
1424 return res;
1425 }
1426
1427 static struct inode_operations shmem_symlink_inline_operations = {
1428 readlink: shmem_readlink_inline,
1429 follow_link: shmem_follow_link_inline,
1430 };
1431
1432 static struct inode_operations shmem_symlink_inode_operations = {
1433 truncate: shmem_truncate,
1434 readlink: shmem_readlink,
1435 follow_link: shmem_follow_link,
1436 };
1437
shmem_parse_options(char * options,int * mode,uid_t * uid,gid_t * gid,unsigned long * blocks,unsigned long * inodes)1438 static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes)
1439 {
1440 char *this_char, *value, *rest;
1441
1442 while ((this_char = strsep(&options, ",")) != NULL) {
1443 if (!*this_char)
1444 continue;
1445 if ((value = strchr(this_char,'=')) != NULL) {
1446 *value++ = 0;
1447 } else {
1448 printk(KERN_ERR
1449 "tmpfs: No value for mount option '%s'\n",
1450 this_char);
1451 return 1;
1452 }
1453
1454 if (!strcmp(this_char,"size")) {
1455 unsigned long long size;
1456 size = memparse(value,&rest);
1457 if (*rest == '%') {
1458 struct sysinfo si;
1459 si_meminfo(&si);
1460 size <<= PAGE_SHIFT;
1461 size *= si.totalram;
1462 do_div(size, 100);
1463 rest++;
1464 }
1465 if (*rest)
1466 goto bad_val;
1467 *blocks = size >> PAGE_CACHE_SHIFT;
1468 } else if (!strcmp(this_char,"nr_blocks")) {
1469 *blocks = memparse(value,&rest);
1470 if (*rest)
1471 goto bad_val;
1472 } else if (!strcmp(this_char,"nr_inodes")) {
1473 *inodes = memparse(value,&rest);
1474 if (*rest)
1475 goto bad_val;
1476 } else if (!strcmp(this_char,"mode")) {
1477 if (!mode)
1478 continue;
1479 *mode = simple_strtoul(value,&rest,8);
1480 if (*rest)
1481 goto bad_val;
1482 } else if (!strcmp(this_char,"uid")) {
1483 if (!uid)
1484 continue;
1485 *uid = simple_strtoul(value,&rest,0);
1486 if (*rest)
1487 goto bad_val;
1488 } else if (!strcmp(this_char,"gid")) {
1489 if (!gid)
1490 continue;
1491 *gid = simple_strtoul(value,&rest,0);
1492 if (*rest)
1493 goto bad_val;
1494 } else {
1495 printk(KERN_ERR "tmpfs: Bad mount option %s\n",
1496 this_char);
1497 return 1;
1498 }
1499 }
1500 return 0;
1501
1502 bad_val:
1503 printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
1504 value, this_char);
1505 return 1;
1506 }
1507
shmem_remount_fs(struct super_block * sb,int * flags,char * data)1508 static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
1509 {
1510 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1511 unsigned long max_blocks = sbinfo->max_blocks;
1512 unsigned long max_inodes = sbinfo->max_inodes;
1513
1514 if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes))
1515 return -EINVAL;
1516 return shmem_set_size(sbinfo, max_blocks, max_inodes);
1517 }
1518
shmem_sync_file(struct file * file,struct dentry * dentry,int datasync)1519 static int shmem_sync_file(struct file *file, struct dentry *dentry, int datasync)
1520 {
1521 return 0;
1522 }
1523 #endif
1524
shmem_read_super(struct super_block * sb,void * data,int silent)1525 static struct super_block *shmem_read_super(struct super_block *sb, void *data, int silent)
1526 {
1527 struct inode *inode;
1528 struct dentry *root;
1529 unsigned long blocks, inodes;
1530 int mode = S_IRWXUGO | S_ISVTX;
1531 uid_t uid = current->fsuid;
1532 gid_t gid = current->fsgid;
1533 struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1534 struct sysinfo si;
1535
1536 /*
1537 * Per default we only allow half of the physical ram per
1538 * tmpfs instance
1539 */
1540 si_meminfo(&si);
1541 blocks = inodes = si.totalram / 2;
1542
1543 #ifdef CONFIG_TMPFS
1544 if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes))
1545 return NULL;
1546 #endif
1547
1548 spin_lock_init(&sbinfo->stat_lock);
1549 sbinfo->max_blocks = blocks;
1550 sbinfo->free_blocks = blocks;
1551 sbinfo->max_inodes = inodes;
1552 sbinfo->free_inodes = inodes;
1553 sb->s_maxbytes = SHMEM_MAX_BYTES;
1554 sb->s_blocksize = PAGE_CACHE_SIZE;
1555 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1556 sb->s_magic = TMPFS_MAGIC;
1557 sb->s_op = &shmem_ops;
1558 inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
1559 if (!inode)
1560 return NULL;
1561
1562 inode->i_uid = uid;
1563 inode->i_gid = gid;
1564 root = d_alloc_root(inode);
1565 if (!root) {
1566 iput(inode);
1567 return NULL;
1568 }
1569 sb->s_root = root;
1570 return sb;
1571 }
1572
1573 static struct address_space_operations shmem_aops = {
1574 removepage: shmem_removepage,
1575 writepage: shmem_writepage,
1576 #ifdef CONFIG_TMPFS
1577 readpage: shmem_readpage,
1578 prepare_write: shmem_prepare_write,
1579 commit_write: shmem_commit_write,
1580 #endif
1581 };
1582
1583 static struct file_operations shmem_file_operations = {
1584 mmap: shmem_mmap,
1585 #ifdef CONFIG_TMPFS
1586 read: shmem_file_read,
1587 write: shmem_file_write,
1588 fsync: shmem_sync_file,
1589 #endif
1590 };
1591
1592 static struct inode_operations shmem_inode_operations = {
1593 truncate: shmem_truncate,
1594 setattr: shmem_notify_change,
1595 };
1596
1597 static struct inode_operations shmem_dir_inode_operations = {
1598 #ifdef CONFIG_TMPFS
1599 create: shmem_create,
1600 lookup: shmem_lookup,
1601 link: shmem_link,
1602 unlink: shmem_unlink,
1603 symlink: shmem_symlink,
1604 mkdir: shmem_mkdir,
1605 rmdir: shmem_rmdir,
1606 mknod: shmem_mknod,
1607 rename: shmem_rename,
1608 #endif
1609 };
1610
1611 static struct super_operations shmem_ops = {
1612 #ifdef CONFIG_TMPFS
1613 statfs: shmem_statfs,
1614 remount_fs: shmem_remount_fs,
1615 #endif
1616 delete_inode: shmem_delete_inode,
1617 put_inode: force_delete,
1618 };
1619
1620 static struct vm_operations_struct shmem_vm_ops = {
1621 nopage: shmem_nopage,
1622 };
1623
1624 #ifdef CONFIG_TMPFS
1625 /* type "shm" will be tagged obsolete in 2.5 */
1626 static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER);
1627 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER);
1628 #else
1629 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER|FS_NOMOUNT);
1630 #endif
1631 static struct vfsmount *shm_mnt;
1632
init_tmpfs(void)1633 static int __init init_tmpfs(void)
1634 {
1635 int error;
1636
1637 error = register_filesystem(&tmpfs_fs_type);
1638 if (error) {
1639 printk(KERN_ERR "Could not register tmpfs\n");
1640 goto out3;
1641 }
1642 #ifdef CONFIG_TMPFS
1643 error = register_filesystem(&shmem_fs_type);
1644 if (error) {
1645 printk(KERN_ERR "Could not register shm fs\n");
1646 goto out2;
1647 }
1648 devfs_mk_dir(NULL, "shm", NULL);
1649 #endif
1650 shm_mnt = kern_mount(&tmpfs_fs_type);
1651 if (IS_ERR(shm_mnt)) {
1652 error = PTR_ERR(shm_mnt);
1653 printk(KERN_ERR "Could not kern_mount tmpfs\n");
1654 goto out1;
1655 }
1656
1657 /* The internal instance should not do size checking */
1658 shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
1659 return 0;
1660
1661 out1:
1662 #ifdef CONFIG_TMPFS
1663 unregister_filesystem(&shmem_fs_type);
1664 out2:
1665 #endif
1666 unregister_filesystem(&tmpfs_fs_type);
1667 out3:
1668 shm_mnt = ERR_PTR(error);
1669 return error;
1670 }
module_init(init_tmpfs)1671 module_init(init_tmpfs)
1672
1673 /*
1674 * shmem_file_setup - get an unlinked file living in tmpfs
1675 *
1676 * @name: name for dentry (to be seen in /proc/<pid>/maps
1677 * @size: size to be set for the file
1678 *
1679 */
1680 struct file *shmem_file_setup(char *name, loff_t size)
1681 {
1682 int error;
1683 struct file *file;
1684 struct inode *inode;
1685 struct dentry *dentry, *root;
1686 struct qstr this;
1687 int vm_enough_memory(long pages);
1688
1689 if (IS_ERR(shm_mnt))
1690 return (void *)shm_mnt;
1691
1692 if (size > SHMEM_MAX_BYTES)
1693 return ERR_PTR(-EINVAL);
1694
1695 if (!vm_enough_memory(VM_ACCT(size)))
1696 return ERR_PTR(-ENOMEM);
1697
1698 this.name = name;
1699 this.len = strlen(name);
1700 this.hash = 0; /* will go */
1701 root = shm_mnt->mnt_root;
1702 dentry = d_alloc(root, &this);
1703 if (!dentry)
1704 return ERR_PTR(-ENOMEM);
1705
1706 error = -ENFILE;
1707 file = get_empty_filp();
1708 if (!file)
1709 goto put_dentry;
1710
1711 error = -ENOSPC;
1712 inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
1713 if (!inode)
1714 goto close_file;
1715
1716 d_instantiate(dentry, inode);
1717 inode->i_size = size;
1718 inode->i_nlink = 0; /* It is unlinked */
1719 file->f_vfsmnt = mntget(shm_mnt);
1720 file->f_dentry = dentry;
1721 file->f_op = &shmem_file_operations;
1722 file->f_mode = FMODE_WRITE | FMODE_READ;
1723 return file;
1724
1725 close_file:
1726 put_filp(file);
1727 put_dentry:
1728 dput(dentry);
1729 return ERR_PTR(error);
1730 }
1731
1732 /*
1733 * shmem_zero_setup - setup a shared anonymous mapping
1734 *
1735 * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
1736 */
shmem_zero_setup(struct vm_area_struct * vma)1737 int shmem_zero_setup(struct vm_area_struct *vma)
1738 {
1739 struct file *file;
1740 loff_t size = vma->vm_end - vma->vm_start;
1741
1742 file = shmem_file_setup("dev/zero", size);
1743 if (IS_ERR(file))
1744 return PTR_ERR(file);
1745
1746 if (vma->vm_file)
1747 fput(vma->vm_file);
1748 vma->vm_file = file;
1749 vma->vm_ops = &shmem_vm_ops;
1750 return 0;
1751 }
1752
1753 EXPORT_SYMBOL(shmem_file_setup);
1754