1 /*
2  * Resizable virtual memory filesystem for Linux.
3  *
4  * Copyright (C) 2000 Linus Torvalds.
5  *		 2000 Transmeta Corp.
6  *		 2000-2001 Christoph Rohland
7  *		 2000-2001 SAP AG
8  *		 2002 Red Hat Inc.
9  * Copyright (C) 2002-2003 Hugh Dickins.
10  * Copyright (C) 2002-2003 VERITAS Software Corporation.
11  *
12  * This file is released under the GPL.
13  */
14 
15 /*
16  * This virtual memory filesystem is heavily based on the ramfs. It
17  * extends ramfs by the ability to use swap and honor resource limits
18  * which makes it a completely usable filesystem.
19  */
20 
21 #include <linux/config.h>
22 #include <linux/module.h>
23 #include <linux/init.h>
24 #include <linux/devfs_fs_kernel.h>
25 #include <linux/fs.h>
26 #include <linux/mm.h>
27 #include <linux/file.h>
28 #include <linux/swap.h>
29 #include <linux/pagemap.h>
30 #include <linux/string.h>
31 #include <linux/locks.h>
32 #include <linux/smp_lock.h>
33 
34 #include <asm/uaccess.h>
35 #include <asm/div64.h>
36 
37 /* This magic number is used in glibc for posix shared memory */
38 #define TMPFS_MAGIC	0x01021994
39 
40 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
41 #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
42 #define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
43 
44 #define SHMEM_MAX_INDEX  (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
45 #define SHMEM_MAX_BYTES  ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
46 
47 #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
48 
49 /* info->flags needs VM_flags to handle pagein/truncate race efficiently */
50 #define SHMEM_PAGEIN	 VM_READ
51 #define SHMEM_TRUNCATE	 VM_WRITE
52 
53 /* Pretend that each entry is of this size in directory's i_size */
54 #define BOGO_DIRENT_SIZE 20
55 
56 #define SHMEM_SB(sb) (&sb->u.shmem_sb)
57 
58 /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
59 enum sgp_type {
60 	SGP_READ,	/* don't exceed i_size, don't allocate page */
61 	SGP_CACHE,	/* don't exceed i_size, may allocate page */
62 	SGP_WRITE,	/* may exceed i_size, may allocate page */
63 };
64 
65 static int shmem_getpage(struct inode *inode, unsigned long idx,
66 			 struct page **pagep, enum sgp_type sgp);
67 
68 static struct super_operations shmem_ops;
69 static struct address_space_operations shmem_aops;
70 static struct file_operations shmem_file_operations;
71 static struct inode_operations shmem_inode_operations;
72 static struct inode_operations shmem_dir_inode_operations;
73 static struct vm_operations_struct shmem_vm_ops;
74 
75 LIST_HEAD(shmem_inodes);
76 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
77 
shmem_free_block(struct inode * inode)78 static void shmem_free_block(struct inode *inode)
79 {
80 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
81 	spin_lock(&sbinfo->stat_lock);
82 	sbinfo->free_blocks++;
83 	inode->i_blocks -= BLOCKS_PER_PAGE;
84 	spin_unlock(&sbinfo->stat_lock);
85 }
86 
shmem_removepage(struct page * page)87 static void shmem_removepage(struct page *page)
88 {
89 	if (!PageLaunder(page) && !PageError(page))
90 		shmem_free_block(page->mapping->host);
91 }
92 
93 /*
94  * shmem_swp_entry - find the swap vector position in the info structure
95  *
96  * @info:  info structure for the inode
97  * @index: index of the page to find
98  * @page:  optional page to add to the structure. Has to be preset to
99  *         all zeros
100  *
101  * If there is no space allocated yet it will return NULL when
102  * page is 0, else it will use the page for the needed block,
103  * setting it to 0 on return to indicate that it has been used.
104  *
105  * The swap vector is organized the following way:
106  *
107  * There are SHMEM_NR_DIRECT entries directly stored in the
108  * shmem_inode_info structure. So small files do not need an addional
109  * allocation.
110  *
111  * For pages with index > SHMEM_NR_DIRECT there is the pointer
112  * i_indirect which points to a page which holds in the first half
113  * doubly indirect blocks, in the second half triple indirect blocks:
114  *
115  * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
116  * following layout (for SHMEM_NR_DIRECT == 16):
117  *
118  * i_indirect -> dir --> 16-19
119  * 	      |	     +-> 20-23
120  * 	      |
121  * 	      +-->dir2 --> 24-27
122  * 	      |	       +-> 28-31
123  * 	      |	       +-> 32-35
124  * 	      |	       +-> 36-39
125  * 	      |
126  * 	      +-->dir3 --> 40-43
127  * 	       	       +-> 44-47
128  * 	      	       +-> 48-51
129  * 	      	       +-> 52-55
130  */
shmem_swp_entry(struct shmem_inode_info * info,unsigned long index,unsigned long * page)131 static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, unsigned long *page)
132 {
133 	unsigned long offset;
134 	void **dir;
135 
136 	if (index < SHMEM_NR_DIRECT)
137 		return info->i_direct+index;
138 	if (!info->i_indirect) {
139 		if (page) {
140 			info->i_indirect = (void **) *page;
141 			*page = 0;
142 		}
143 		return NULL;			/* need another page */
144 	}
145 
146 	index -= SHMEM_NR_DIRECT;
147 	offset = index % ENTRIES_PER_PAGE;
148 	index /= ENTRIES_PER_PAGE;
149 	dir = info->i_indirect;
150 
151 	if (index >= ENTRIES_PER_PAGE/2) {
152 		index -= ENTRIES_PER_PAGE/2;
153 		dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
154 		index %= ENTRIES_PER_PAGE;
155 		if (!*dir) {
156 			if (page) {
157 				*dir = (void *) *page;
158 				*page = 0;
159 			}
160 			return NULL;		/* need another page */
161 		}
162 		dir = (void **) *dir;
163 	}
164 
165 	dir += index;
166 	if (!*dir) {
167 		if (!page || !*page)
168 			return NULL;		/* need a page */
169 		*dir = (void *) *page;
170 		*page = 0;
171 	}
172 	return (swp_entry_t *) *dir + offset;
173 }
174 
175 /*
176  * shmem_swp_alloc - get the position of the swap entry for the page.
177  *                   If it does not exist allocate the entry.
178  *
179  * @info:	info structure for the inode
180  * @index:	index of the page to find
181  * @sgp:	check and recheck i_size? skip allocation?
182  */
shmem_swp_alloc(struct shmem_inode_info * info,unsigned long index,enum sgp_type sgp)183 static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
184 {
185 	struct inode *inode = info->inode;
186 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
187 	unsigned long page = 0;
188 	swp_entry_t *entry;
189 	static const swp_entry_t unswapped = {0};
190 
191 	if (sgp != SGP_WRITE &&
192 	    ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size)
193 		return ERR_PTR(-EINVAL);
194 
195 	while (!(entry = shmem_swp_entry(info, index, &page))) {
196 		if (sgp == SGP_READ)
197 			return (swp_entry_t *) &unswapped;
198 		/*
199 		 * Test free_blocks against 1 not 0, since we have 1 data
200 		 * page (and perhaps indirect index pages) yet to allocate:
201 		 * a waste to allocate index if we cannot allocate data.
202 		 */
203 		spin_lock(&sbinfo->stat_lock);
204 		if (sbinfo->free_blocks <= 1) {
205 			spin_unlock(&sbinfo->stat_lock);
206 			return ERR_PTR(-ENOSPC);
207 		}
208 		sbinfo->free_blocks--;
209 		inode->i_blocks += BLOCKS_PER_PAGE;
210 		spin_unlock(&sbinfo->stat_lock);
211 
212 		spin_unlock(&info->lock);
213 		page = get_zeroed_page(GFP_USER);
214 		spin_lock(&info->lock);
215 
216 		if (!page) {
217 			shmem_free_block(inode);
218 			return ERR_PTR(-ENOMEM);
219 		}
220 		if (sgp != SGP_WRITE &&
221 		    ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) {
222 			entry = ERR_PTR(-EINVAL);
223 			break;
224 		}
225 		if (info->next_index <= index)
226 			info->next_index = index + 1;
227 	}
228 	if (page) {
229 		/* another task gave its page, or truncated the file */
230 		shmem_free_block(inode);
231 		free_page(page);
232 	}
233 	if (info->next_index <= index && !IS_ERR(entry))
234 		info->next_index = index + 1;
235 	return entry;
236 }
237 
238 /*
239  * shmem_free_swp - free some swap entries in a directory
240  *
241  * @dir:   pointer to the directory
242  * @edir:  pointer after last entry of the directory
243  */
shmem_free_swp(swp_entry_t * dir,swp_entry_t * edir)244 static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
245 {
246 	swp_entry_t *ptr;
247 	int freed = 0;
248 
249 	for (ptr = dir; ptr < edir; ptr++) {
250 		if (ptr->val) {
251 			free_swap_and_cache(*ptr);
252 			*ptr = (swp_entry_t){0};
253 			freed++;
254 		}
255 	}
256 	return freed;
257 }
258 
259 /*
260  * shmem_truncate_direct - free the swap entries of a whole doubly
261  *                         indirect block
262  *
263  * @info:	the info structure of the inode
264  * @dir:	pointer to the pointer to the block
265  * @start:	offset to start from (in pages)
266  * @len:	how many pages are stored in this block
267  */
268 static inline unsigned long
shmem_truncate_direct(struct shmem_inode_info * info,swp_entry_t *** dir,unsigned long start,unsigned long len)269 shmem_truncate_direct(struct shmem_inode_info *info, swp_entry_t ***dir, unsigned long start, unsigned long len)
270 {
271 	swp_entry_t **last, **ptr;
272 	unsigned long off, freed_swp, freed = 0;
273 
274 	last = *dir + (len + ENTRIES_PER_PAGE - 1) / ENTRIES_PER_PAGE;
275 	off = start % ENTRIES_PER_PAGE;
276 
277 	for (ptr = *dir + start/ENTRIES_PER_PAGE; ptr < last; ptr++, off = 0) {
278 		if (!*ptr)
279 			continue;
280 
281 		if (info->swapped) {
282 			freed_swp = shmem_free_swp(*ptr + off,
283 						*ptr + ENTRIES_PER_PAGE);
284 			info->swapped -= freed_swp;
285 			freed += freed_swp;
286 		}
287 
288 		if (!off) {
289 			freed++;
290 			free_page((unsigned long) *ptr);
291 			*ptr = 0;
292 		}
293 	}
294 
295 	if (!start) {
296 		freed++;
297 		free_page((unsigned long) *dir);
298 		*dir = 0;
299 	}
300 	return freed;
301 }
302 
303 /*
304  * shmem_truncate_indirect - truncate an inode
305  *
306  * @info:  the info structure of the inode
307  * @index: the index to truncate
308  *
309  * This function locates the last doubly indirect block and calls
310  * then shmem_truncate_direct to do the real work
311  */
312 static inline unsigned long
shmem_truncate_indirect(struct shmem_inode_info * info,unsigned long index)313 shmem_truncate_indirect(struct shmem_inode_info *info, unsigned long index)
314 {
315 	swp_entry_t ***base;
316 	unsigned long baseidx, start;
317 	unsigned long len = info->next_index;
318 	unsigned long freed;
319 
320 	if (len <= SHMEM_NR_DIRECT) {
321 		info->next_index = index;
322 		if (!info->swapped)
323 			return 0;
324 		freed = shmem_free_swp(info->i_direct + index,
325 					info->i_direct + len);
326 		info->swapped -= freed;
327 		return freed;
328 	}
329 
330 	if (len <= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT) {
331 		len -= SHMEM_NR_DIRECT;
332 		base = (swp_entry_t ***) &info->i_indirect;
333 		baseidx = SHMEM_NR_DIRECT;
334 	} else {
335 		len -= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT;
336 		BUG_ON(len > ENTRIES_PER_PAGEPAGE*ENTRIES_PER_PAGE/2);
337 		baseidx = len - 1;
338 		baseidx -= baseidx % ENTRIES_PER_PAGEPAGE;
339 		base = (swp_entry_t ***) info->i_indirect +
340 			ENTRIES_PER_PAGE/2 + baseidx/ENTRIES_PER_PAGEPAGE;
341 		len -= baseidx;
342 		baseidx += ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT;
343 	}
344 
345 	if (index > baseidx) {
346 		info->next_index = index;
347 		start = index - baseidx;
348 	} else {
349 		info->next_index = baseidx;
350 		start = 0;
351 	}
352 	return *base? shmem_truncate_direct(info, base, start, len): 0;
353 }
354 
shmem_truncate(struct inode * inode)355 static void shmem_truncate(struct inode *inode)
356 {
357 	struct shmem_inode_info *info = SHMEM_I(inode);
358 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
359 	unsigned long freed = 0;
360 	unsigned long index;
361 
362 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
363 	index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
364 	if (index >= info->next_index)
365 		return;
366 
367 	spin_lock(&info->lock);
368 	while (index < info->next_index)
369 		freed += shmem_truncate_indirect(info, index);
370 	BUG_ON(info->swapped > info->next_index);
371 
372 	if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) {
373 		/*
374 		 * Call truncate_inode_pages again: racing shmem_unuse_inode
375 		 * may have swizzled a page in from swap since vmtruncate or
376 		 * generic_delete_inode did it, before we lowered next_index.
377 		 * Also, though shmem_getpage checks i_size before adding to
378 		 * cache, no recheck after: so fix the narrow window there too.
379 		 */
380 		info->flags |= SHMEM_TRUNCATE;
381 		spin_unlock(&info->lock);
382 		truncate_inode_pages(inode->i_mapping, inode->i_size);
383 		spin_lock(&info->lock);
384 		info->flags &= ~SHMEM_TRUNCATE;
385 	}
386 
387 	spin_unlock(&info->lock);
388 	spin_lock(&sbinfo->stat_lock);
389 	sbinfo->free_blocks += freed;
390 	inode->i_blocks -= freed*BLOCKS_PER_PAGE;
391 	spin_unlock(&sbinfo->stat_lock);
392 }
393 
shmem_notify_change(struct dentry * dentry,struct iattr * attr)394 static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
395 {
396 	struct inode *inode = dentry->d_inode;
397 	struct page *page = NULL;
398 	int error;
399 
400 	if (attr->ia_valid & ATTR_SIZE) {
401 		if (attr->ia_size < inode->i_size) {
402 			/*
403 			 * If truncating down to a partial page, then
404 			 * if that page is already allocated, hold it
405 			 * in memory until the truncation is over, so
406 			 * truncate_partial_page cannnot miss it were
407 			 * it assigned to swap.
408 			 */
409 			if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
410 				(void) shmem_getpage(inode,
411 					attr->ia_size>>PAGE_CACHE_SHIFT,
412 						&page, SGP_READ);
413 			}
414 			/*
415 			 * Reset SHMEM_PAGEIN flag so that shmem_truncate can
416 			 * detect if any pages might have been added to cache
417 			 * after truncate_inode_pages.  But we needn't bother
418 			 * if it's being fully truncated to zero-length: the
419 			 * nrpages check is efficient enough in that case.
420 			 */
421 			if (attr->ia_size) {
422 				struct shmem_inode_info *info = SHMEM_I(inode);
423 				spin_lock(&info->lock);
424 				info->flags &= ~SHMEM_PAGEIN;
425 				spin_unlock(&info->lock);
426 			}
427 		}
428 	}
429 
430 	error = inode_change_ok(inode, attr);
431 	if (!error)
432 		error = inode_setattr(inode, attr);
433 	if (page)
434 		page_cache_release(page);
435 	return error;
436 }
437 
shmem_delete_inode(struct inode * inode)438 static void shmem_delete_inode(struct inode *inode)
439 {
440 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
441 	struct shmem_inode_info *info = SHMEM_I(inode);
442 
443 	if (inode->i_op->truncate == shmem_truncate) {
444 		spin_lock(&shmem_ilock);
445 		list_del(&info->list);
446 		spin_unlock(&shmem_ilock);
447 		inode->i_size = 0;
448 		shmem_truncate(inode);
449 	}
450 	BUG_ON(inode->i_blocks);
451 	spin_lock(&sbinfo->stat_lock);
452 	sbinfo->free_inodes++;
453 	spin_unlock(&sbinfo->stat_lock);
454 	clear_inode(inode);
455 }
456 
shmem_find_swp(swp_entry_t entry,swp_entry_t * dir,swp_entry_t * edir)457 static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
458 {
459 	swp_entry_t *ptr;
460 
461 	for (ptr = dir; ptr < edir; ptr++) {
462 		if (ptr->val == entry.val)
463 			return ptr - dir;
464 	}
465 	return -1;
466 }
467 
shmem_unuse_inode(struct shmem_inode_info * info,swp_entry_t entry,struct page * page)468 static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
469 {
470 	struct inode *inode;
471 	struct address_space *mapping;
472 	swp_entry_t *ptr;
473 	unsigned long idx;
474 	int offset;
475 
476 	idx = 0;
477 	ptr = info->i_direct;
478 	spin_lock(&info->lock);
479 	offset = info->next_index;
480 	if (offset > SHMEM_NR_DIRECT)
481 		offset = SHMEM_NR_DIRECT;
482 	offset = shmem_find_swp(entry, ptr, ptr + offset);
483 	if (offset >= 0)
484 		goto found;
485 
486 	for (idx = SHMEM_NR_DIRECT; idx < info->next_index;
487 	     idx += ENTRIES_PER_PAGE) {
488 		ptr = shmem_swp_entry(info, idx, NULL);
489 		if (!ptr)
490 			continue;
491 		offset = info->next_index - idx;
492 		if (offset > ENTRIES_PER_PAGE)
493 			offset = ENTRIES_PER_PAGE;
494 		offset = shmem_find_swp(entry, ptr, ptr + offset);
495 		if (offset >= 0)
496 			goto found;
497 	}
498 	spin_unlock(&info->lock);
499 	return 0;
500 found:
501 	idx += offset;
502 	inode = info->inode;
503 	mapping = inode->i_mapping;
504 	delete_from_swap_cache(page);
505 	if (add_to_page_cache_unique(page,
506 			mapping, idx, page_hash(mapping, idx)) == 0) {
507 		info->flags |= SHMEM_PAGEIN;
508 		ptr[offset].val = 0;
509 		info->swapped--;
510 	} else if (add_to_swap_cache(page, entry) != 0)
511 		BUG();
512 	spin_unlock(&info->lock);
513 	SetPageUptodate(page);
514 	/*
515 	 * Decrement swap count even when the entry is left behind:
516 	 * try_to_unuse will skip over mms, then reincrement count.
517 	 */
518 	swap_free(entry);
519 	return 1;
520 }
521 
522 /*
523  * shmem_unuse() search for an eventually swapped out shmem page.
524  */
shmem_unuse(swp_entry_t entry,struct page * page)525 int shmem_unuse(swp_entry_t entry, struct page *page)
526 {
527 	struct list_head *p;
528 	struct shmem_inode_info *info;
529 	int found = 0;
530 
531 	spin_lock(&shmem_ilock);
532 	list_for_each(p, &shmem_inodes) {
533 		info = list_entry(p, struct shmem_inode_info, list);
534 
535 		if (info->swapped && shmem_unuse_inode(info, entry, page)) {
536 			/* move head to start search for next from here */
537 			list_move_tail(&shmem_inodes, &info->list);
538 			found = 1;
539 			break;
540 		}
541 	}
542 	spin_unlock(&shmem_ilock);
543 	return found;
544 }
545 
546 /*
547  * Move the page from the page cache to the swap cache.
548  */
shmem_writepage(struct page * page)549 static int shmem_writepage(struct page *page)
550 {
551 	struct shmem_inode_info *info;
552 	swp_entry_t *entry, swap;
553 	struct address_space *mapping;
554 	unsigned long index;
555 	struct inode *inode;
556 
557 	BUG_ON(!PageLocked(page));
558 	if (!PageLaunder(page))
559 		goto fail;
560 
561 	mapping = page->mapping;
562 	index = page->index;
563 	inode = mapping->host;
564 	info = SHMEM_I(inode);
565 	if (info->flags & VM_LOCKED)
566 		goto fail;
567 getswap:
568 	swap = get_swap_page();
569 	if (!swap.val)
570 		goto fail;
571 
572 	spin_lock(&info->lock);
573 	if (index >= info->next_index) {
574 		BUG_ON(!(info->flags & SHMEM_TRUNCATE));
575 		spin_unlock(&info->lock);
576 		swap_free(swap);
577 		goto fail;
578 	}
579 	entry = shmem_swp_entry(info, index, NULL);
580 	BUG_ON(!entry);
581 	BUG_ON(entry->val);
582 
583 	/* Remove it from the page cache */
584 	remove_inode_page(page);
585 	page_cache_release(page);
586 
587 	/* Add it to the swap cache */
588 	if (add_to_swap_cache(page, swap) != 0) {
589 		/*
590 		 * Raced with "speculative" read_swap_cache_async.
591 		 * Add page back to page cache, unref swap, try again.
592 		 */
593 		add_to_page_cache_locked(page, mapping, index);
594 		info->flags |= SHMEM_PAGEIN;
595 		spin_unlock(&info->lock);
596 		swap_free(swap);
597 		goto getswap;
598 	}
599 
600 	*entry = swap;
601 	info->swapped++;
602 	spin_unlock(&info->lock);
603 	SetPageUptodate(page);
604 	set_page_dirty(page);
605 	UnlockPage(page);
606 	return 0;
607 fail:
608 	return fail_writepage(page);
609 }
610 
611 /*
612  * shmem_getpage - either get the page from swap or allocate a new one
613  *
614  * If we allocate a new one we do not mark it dirty. That's up to the
615  * vm. If we swap it in we mark it dirty since we also free the swap
616  * entry since a page cannot live in both the swap and page cache
617  */
shmem_getpage(struct inode * inode,unsigned long idx,struct page ** pagep,enum sgp_type sgp)618 static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp)
619 {
620 	struct address_space *mapping = inode->i_mapping;
621 	struct shmem_inode_info *info = SHMEM_I(inode);
622 	struct shmem_sb_info *sbinfo;
623 	struct page *filepage = *pagep;
624 	struct page *swappage;
625 	swp_entry_t *entry;
626 	swp_entry_t swap;
627 	int error = 0;
628 
629 	if (idx >= SHMEM_MAX_INDEX) {
630 		error = -EFBIG;
631 		goto failed;
632 	}
633 
634 	/*
635 	 * Normally, filepage is NULL on entry, and either found
636 	 * uptodate immediately, or allocated and zeroed, or read
637 	 * in under swappage, which is then assigned to filepage.
638 	 * But shmem_readpage and shmem_prepare_write pass in a locked
639 	 * filepage, which may be found not uptodate by other callers
640 	 * too, and may need to be copied from the swappage read in.
641 	 */
642 repeat:
643 	if (!filepage)
644 		filepage = find_lock_page(mapping, idx);
645 	if (filepage && Page_Uptodate(filepage))
646 		goto done;
647 
648 	spin_lock(&info->lock);
649 	entry = shmem_swp_alloc(info, idx, sgp);
650 	if (IS_ERR(entry)) {
651 		spin_unlock(&info->lock);
652 		error = PTR_ERR(entry);
653 		goto failed;
654 	}
655 	swap = *entry;
656 
657 	if (swap.val) {
658 		/* Look it up and read it in.. */
659 		swappage = lookup_swap_cache(swap);
660 		if (!swappage) {
661 			spin_unlock(&info->lock);
662 			swapin_readahead(swap);
663 			swappage = read_swap_cache_async(swap);
664 			if (!swappage) {
665 				spin_lock(&info->lock);
666 				entry = shmem_swp_alloc(info, idx, sgp);
667 				if (IS_ERR(entry))
668 					error = PTR_ERR(entry);
669 				else if (entry->val == swap.val)
670 					error = -ENOMEM;
671 				spin_unlock(&info->lock);
672 				if (error)
673 					goto failed;
674 				goto repeat;
675 			}
676 			wait_on_page(swappage);
677 			page_cache_release(swappage);
678 			goto repeat;
679 		}
680 
681 		/* We have to do this with page locked to prevent races */
682 		if (TryLockPage(swappage)) {
683 			spin_unlock(&info->lock);
684 			wait_on_page(swappage);
685 			page_cache_release(swappage);
686 			goto repeat;
687 		}
688 		if (!Page_Uptodate(swappage)) {
689 			spin_unlock(&info->lock);
690 			UnlockPage(swappage);
691 			page_cache_release(swappage);
692 			error = -EIO;
693 			goto failed;
694 		}
695 
696 		delete_from_swap_cache(swappage);
697 		if (filepage) {
698 			entry->val = 0;
699 			info->swapped--;
700 			spin_unlock(&info->lock);
701 			flush_page_to_ram(swappage);
702 			copy_highpage(filepage, swappage);
703 			UnlockPage(swappage);
704 			page_cache_release(swappage);
705 			flush_dcache_page(filepage);
706 			SetPageUptodate(filepage);
707 			SetPageDirty(filepage);
708 			swap_free(swap);
709 		} else if (add_to_page_cache_unique(swappage,
710 			mapping, idx, page_hash(mapping, idx)) == 0) {
711 			info->flags |= SHMEM_PAGEIN;
712 			entry->val = 0;
713 			info->swapped--;
714 			spin_unlock(&info->lock);
715 			filepage = swappage;
716 			SetPageUptodate(filepage);
717 			SetPageDirty(filepage);
718 			swap_free(swap);
719 		} else {
720 			if (add_to_swap_cache(swappage, swap) != 0)
721 				BUG();
722 			spin_unlock(&info->lock);
723 			SetPageUptodate(swappage);
724 			SetPageDirty(swappage);
725 			UnlockPage(swappage);
726 			page_cache_release(swappage);
727 			goto repeat;
728 		}
729 	} else if (sgp == SGP_READ && !filepage) {
730 		filepage = find_get_page(mapping, idx);
731 		if (filepage &&
732 		    (!Page_Uptodate(filepage) || TryLockPage(filepage))) {
733 			spin_unlock(&info->lock);
734 			wait_on_page(filepage);
735 			page_cache_release(filepage);
736 			filepage = NULL;
737 			goto repeat;
738 		}
739 		spin_unlock(&info->lock);
740 	} else {
741 		sbinfo = SHMEM_SB(inode->i_sb);
742 		spin_lock(&sbinfo->stat_lock);
743 		if (sbinfo->free_blocks == 0) {
744 			spin_unlock(&sbinfo->stat_lock);
745 			spin_unlock(&info->lock);
746 			error = -ENOSPC;
747 			goto failed;
748 		}
749 		sbinfo->free_blocks--;
750 		inode->i_blocks += BLOCKS_PER_PAGE;
751 		spin_unlock(&sbinfo->stat_lock);
752 
753 		if (!filepage) {
754 			spin_unlock(&info->lock);
755 			filepage = page_cache_alloc(mapping);
756 			if (!filepage) {
757 				shmem_free_block(inode);
758 				error = -ENOMEM;
759 				goto failed;
760 			}
761 
762 			spin_lock(&info->lock);
763 			entry = shmem_swp_alloc(info, idx, sgp);
764 			if (IS_ERR(entry))
765 				error = PTR_ERR(entry);
766 			if (error || entry->val ||
767 			    add_to_page_cache_unique(filepage,
768 			    mapping, idx, page_hash(mapping, idx)) != 0) {
769 				spin_unlock(&info->lock);
770 				page_cache_release(filepage);
771 				shmem_free_block(inode);
772 				filepage = NULL;
773 				if (error)
774 					goto failed;
775 				goto repeat;
776 			}
777 			info->flags |= SHMEM_PAGEIN;
778 		}
779 
780 		spin_unlock(&info->lock);
781 		clear_highpage(filepage);
782 		flush_dcache_page(filepage);
783 		SetPageUptodate(filepage);
784 	}
785 done:
786 	if (!*pagep) {
787 		if (filepage)
788 			UnlockPage(filepage);
789 		else
790 			filepage = ZERO_PAGE(0);
791 		*pagep = filepage;
792 	}
793 	if (PageError(filepage))
794 		ClearPageError(filepage);
795 	return 0;
796 
797 failed:
798 	if (filepage) {
799 		if (*pagep == filepage)
800 			SetPageError(filepage);
801 		else {
802 			UnlockPage(filepage);
803 			page_cache_release(filepage);
804 		}
805 	}
806 	return error;
807 }
808 
shmem_nopage(struct vm_area_struct * vma,unsigned long address,int unused)809 struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int unused)
810 {
811 	struct inode *inode = vma->vm_file->f_dentry->d_inode;
812 	struct page *page = NULL;
813 	unsigned long idx;
814 	int error;
815 
816 	idx = (address - vma->vm_start) >> PAGE_SHIFT;
817 	idx += vma->vm_pgoff;
818 	idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
819 
820 	error = shmem_getpage(inode, idx, &page, SGP_CACHE);
821 	if (error)
822 		return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
823 
824 	mark_page_accessed(page);
825 	flush_page_to_ram(page);
826 	return page;
827 }
828 
shmem_lock(struct file * file,int lock)829 void shmem_lock(struct file *file, int lock)
830 {
831 	struct inode *inode = file->f_dentry->d_inode;
832 	struct shmem_inode_info *info = SHMEM_I(inode);
833 
834 	spin_lock(&info->lock);
835 	if (lock)
836 		info->flags |= VM_LOCKED;
837 	else
838 		info->flags &= ~VM_LOCKED;
839 	spin_unlock(&info->lock);
840 }
841 
shmem_mmap(struct file * file,struct vm_area_struct * vma)842 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
843 {
844 	struct vm_operations_struct *ops;
845 	struct inode *inode = file->f_dentry->d_inode;
846 
847 	ops = &shmem_vm_ops;
848 	if (!S_ISREG(inode->i_mode))
849 		return -EACCES;
850 	UPDATE_ATIME(inode);
851 	vma->vm_ops = ops;
852 	return 0;
853 }
854 
shmem_get_inode(struct super_block * sb,int mode,int dev)855 static struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
856 {
857 	struct inode *inode;
858 	struct shmem_inode_info *info;
859 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
860 
861 	spin_lock(&sbinfo->stat_lock);
862 	if (!sbinfo->free_inodes) {
863 		spin_unlock(&sbinfo->stat_lock);
864 		return NULL;
865 	}
866 	sbinfo->free_inodes--;
867 	spin_unlock(&sbinfo->stat_lock);
868 
869 	inode = new_inode(sb);
870 	if (inode) {
871 		inode->i_mode = mode;
872 		inode->i_uid = current->fsuid;
873 		inode->i_gid = current->fsgid;
874 		inode->i_blksize = PAGE_CACHE_SIZE;
875 		inode->i_blocks = 0;
876 		inode->i_rdev = NODEV;
877 		inode->i_mapping->a_ops = &shmem_aops;
878 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
879 		info = SHMEM_I(inode);
880 		info->inode = inode;
881 		spin_lock_init(&info->lock);
882 		switch (mode & S_IFMT) {
883 		default:
884 			init_special_inode(inode, mode, dev);
885 			break;
886 		case S_IFREG:
887 			inode->i_op = &shmem_inode_operations;
888 			inode->i_fop = &shmem_file_operations;
889 			spin_lock(&shmem_ilock);
890 			list_add_tail(&info->list, &shmem_inodes);
891 			spin_unlock(&shmem_ilock);
892 			break;
893 		case S_IFDIR:
894 			inode->i_nlink++;
895 			/* Some things misbehave if size == 0 on a directory */
896 			inode->i_size = 2 * BOGO_DIRENT_SIZE;
897 			inode->i_op = &shmem_dir_inode_operations;
898 			inode->i_fop = &dcache_dir_ops;
899 			break;
900 		case S_IFLNK:
901 			break;
902 		}
903 	}
904 	return inode;
905 }
906 
shmem_set_size(struct shmem_sb_info * info,unsigned long max_blocks,unsigned long max_inodes)907 static int shmem_set_size(struct shmem_sb_info *info,
908 			  unsigned long max_blocks, unsigned long max_inodes)
909 {
910 	int error;
911 	unsigned long blocks, inodes;
912 
913 	spin_lock(&info->stat_lock);
914 	blocks = info->max_blocks - info->free_blocks;
915 	inodes = info->max_inodes - info->free_inodes;
916 	error = -EINVAL;
917 	if (max_blocks < blocks)
918 		goto out;
919 	if (max_inodes < inodes)
920 		goto out;
921 	error = 0;
922 	info->max_blocks  = max_blocks;
923 	info->free_blocks = max_blocks - blocks;
924 	info->max_inodes  = max_inodes;
925 	info->free_inodes = max_inodes - inodes;
926 out:
927 	spin_unlock(&info->stat_lock);
928 	return error;
929 }
930 
931 #ifdef CONFIG_TMPFS
932 
933 static struct inode_operations shmem_symlink_inode_operations;
934 static struct inode_operations shmem_symlink_inline_operations;
935 
936 /*
937  * tmpfs itself makes no use of generic_file_read, generic_file_mmap
938  * or generic_file_write; but shmem_readpage, shmem_prepare_write and
939  * shmem_commit_write let a tmpfs file be used below the loop driver,
940  * and shmem_readpage lets a tmpfs file be used by sendfile.
941  */
942 static int
shmem_readpage(struct file * file,struct page * page)943 shmem_readpage(struct file *file, struct page *page)
944 {
945 	struct inode *inode = page->mapping->host;
946 	int error = shmem_getpage(inode, page->index, &page, SGP_CACHE);
947 	UnlockPage(page);
948 	return error;
949 }
950 
951 static int
shmem_prepare_write(struct file * file,struct page * page,unsigned offset,unsigned to)952 shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
953 {
954 	struct inode *inode = page->mapping->host;
955 	return shmem_getpage(inode, page->index, &page, SGP_WRITE);
956 }
957 
958 static int
shmem_commit_write(struct file * file,struct page * page,unsigned offset,unsigned to)959 shmem_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
960 {
961 	struct inode *inode = page->mapping->host;
962 	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
963 
964 	if (pos > inode->i_size)
965 		inode->i_size = pos;
966 	SetPageDirty(page);
967 	return 0;
968 }
969 
970 static ssize_t
shmem_file_write(struct file * file,const char * buf,size_t count,loff_t * ppos)971 shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
972 {
973 	struct inode	*inode = file->f_dentry->d_inode;
974 	loff_t		pos;
975 	unsigned long	written;
976 	ssize_t		err;
977 
978 	if ((ssize_t) count < 0)
979 		return -EINVAL;
980 
981 	if (!access_ok(VERIFY_READ, buf, count))
982 		return -EFAULT;
983 
984 	down(&inode->i_sem);
985 
986 	pos = *ppos;
987 	written = 0;
988 
989 	err = precheck_file_write(file, inode, &count, &pos);
990 	if (err || !count)
991 		goto out;
992 
993 	remove_suid(inode);
994 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
995 
996 	do {
997 		struct page *page = NULL;
998 		unsigned long bytes, index, offset;
999 		char *kaddr;
1000 		int left;
1001 
1002 		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
1003 		index = pos >> PAGE_CACHE_SHIFT;
1004 		bytes = PAGE_CACHE_SIZE - offset;
1005 		if (bytes > count)
1006 			bytes = count;
1007 
1008 		/*
1009 		 * We don't hold page lock across copy from user -
1010 		 * what would it guard against? - so no deadlock here.
1011 		 */
1012 
1013 		err = shmem_getpage(inode, index, &page, SGP_WRITE);
1014 		if (err)
1015 			break;
1016 
1017 		kaddr = kmap(page);
1018 		left = __copy_from_user(kaddr + offset, buf, bytes);
1019 		kunmap(page);
1020 
1021 		written += bytes;
1022 		count -= bytes;
1023 		pos += bytes;
1024 		buf += bytes;
1025 		if (pos > inode->i_size)
1026 			inode->i_size = pos;
1027 
1028 		flush_dcache_page(page);
1029 		SetPageDirty(page);
1030 		SetPageReferenced(page);
1031 		page_cache_release(page);
1032 
1033 		if (left) {
1034 			pos -= left;
1035 			written -= left;
1036 			err = -EFAULT;
1037 			break;
1038 		}
1039 	} while (count);
1040 
1041 	*ppos = pos;
1042 	if (written)
1043 		err = written;
1044 out:
1045 	up(&inode->i_sem);
1046 	return err;
1047 }
1048 
do_shmem_file_read(struct file * filp,loff_t * ppos,read_descriptor_t * desc)1049 static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc)
1050 {
1051 	struct inode *inode = filp->f_dentry->d_inode;
1052 	struct address_space *mapping = inode->i_mapping;
1053 	unsigned long index, offset;
1054 	loff_t pos = *ppos;
1055 
1056 	if (unlikely(pos < 0))
1057 		return;
1058 
1059 	index = pos >> PAGE_CACHE_SHIFT;
1060 	offset = pos & ~PAGE_CACHE_MASK;
1061 
1062 	for (;;) {
1063 		struct page *page = NULL;
1064 		unsigned long end_index, nr, ret;
1065 
1066 		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
1067 		if (index > end_index)
1068 			break;
1069 		if (index == end_index) {
1070 			nr = inode->i_size & ~PAGE_CACHE_MASK;
1071 			if (nr <= offset)
1072 				break;
1073 		}
1074 
1075 		desc->error = shmem_getpage(inode, index, &page, SGP_READ);
1076 		if (desc->error) {
1077 			if (desc->error == -EINVAL)
1078 				desc->error = 0;
1079 			break;
1080 		}
1081 
1082 		/*
1083 		 * We must evaluate after, since reads (unlike writes)
1084 		 * are called without i_sem protection against truncate
1085 		 */
1086 		nr = PAGE_CACHE_SIZE;
1087 		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
1088 		if (index == end_index) {
1089 			nr = inode->i_size & ~PAGE_CACHE_MASK;
1090 			if (nr <= offset) {
1091 				page_cache_release(page);
1092 				break;
1093 			}
1094 		}
1095 		nr -= offset;
1096 
1097 		if (page != ZERO_PAGE(0)) {
1098 			/*
1099 			 * If users can be writing to this page using arbitrary
1100 			 * virtual addresses, take care about potential aliasing
1101 			 * before reading the page on the kernel side.
1102 			 */
1103 			if (mapping->i_mmap_shared != NULL)
1104 				flush_dcache_page(page);
1105 			/*
1106 			 * Mark the page accessed if we read the
1107 			 * beginning or we just did an lseek.
1108 			 */
1109 			if (!offset || !filp->f_reada)
1110 				mark_page_accessed(page);
1111 		}
1112 
1113 		/*
1114 		 * Ok, we have the page, and it's up-to-date, so
1115 		 * now we can copy it to user space...
1116 		 *
1117 		 * The actor routine returns how many bytes were actually used..
1118 		 * NOTE! This may not be the same as how much of a user buffer
1119 		 * we filled up (we may be padding etc), so we can only update
1120 		 * "pos" here (the actor routine has to update the user buffer
1121 		 * pointers and the remaining count).
1122 		 */
1123 		ret = file_read_actor(desc, page, offset, nr);
1124 		offset += ret;
1125 		index += offset >> PAGE_CACHE_SHIFT;
1126 		offset &= ~PAGE_CACHE_MASK;
1127 
1128 		page_cache_release(page);
1129 		if (ret != nr || !desc->count)
1130 			break;
1131 	}
1132 
1133 	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1134 	filp->f_reada = 1;
1135 	UPDATE_ATIME(inode);
1136 }
1137 
shmem_file_read(struct file * filp,char * buf,size_t count,loff_t * ppos)1138 static ssize_t shmem_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
1139 {
1140 	read_descriptor_t desc;
1141 
1142 	if ((ssize_t) count < 0)
1143 		return -EINVAL;
1144 	if (!access_ok(VERIFY_WRITE, buf, count))
1145 		return -EFAULT;
1146 	if (!count)
1147 		return 0;
1148 
1149 	desc.written = 0;
1150 	desc.count = count;
1151 	desc.buf = buf;
1152 	desc.error = 0;
1153 
1154 	do_shmem_file_read(filp, ppos, &desc);
1155 	if (desc.written)
1156 		return desc.written;
1157 	return desc.error;
1158 }
1159 
shmem_statfs(struct super_block * sb,struct statfs * buf)1160 static int shmem_statfs(struct super_block *sb, struct statfs *buf)
1161 {
1162 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1163 
1164 	buf->f_type = TMPFS_MAGIC;
1165 	buf->f_bsize = PAGE_CACHE_SIZE;
1166 	spin_lock(&sbinfo->stat_lock);
1167 	buf->f_blocks = sbinfo->max_blocks;
1168 	buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
1169 	buf->f_files = sbinfo->max_inodes;
1170 	buf->f_ffree = sbinfo->free_inodes;
1171 	spin_unlock(&sbinfo->stat_lock);
1172 	buf->f_namelen = NAME_MAX;
1173 	return 0;
1174 }
1175 
1176 /*
1177  * Retaining negative dentries for an in-memory filesystem just wastes
1178  * memory and lookup time: arrange for them to be deleted immediately.
1179  */
shmem_delete_dentry(struct dentry * dentry)1180 static int shmem_delete_dentry(struct dentry *dentry)
1181 {
1182 	return 1;
1183 }
1184 
1185 /*
1186  * Lookup the data. This is trivial - if the dentry didn't already
1187  * exist, we know it is negative.  Set d_op to delete negative dentries.
1188  */
shmem_lookup(struct inode * dir,struct dentry * dentry)1189 static struct dentry *shmem_lookup(struct inode *dir, struct dentry *dentry)
1190 {
1191 	static struct dentry_operations shmem_dentry_operations = {
1192 		.d_delete = shmem_delete_dentry,
1193 	};
1194 
1195 	if (dentry->d_name.len > NAME_MAX)
1196 		return ERR_PTR(-ENAMETOOLONG);
1197 	dentry->d_op = &shmem_dentry_operations;
1198 	d_add(dentry, NULL);
1199 	return NULL;
1200 }
1201 
1202 /*
1203  * File creation. Allocate an inode, and we're done..
1204  */
shmem_mknod(struct inode * dir,struct dentry * dentry,int mode,int dev)1205 static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev)
1206 {
1207 	struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
1208 	int error = -ENOSPC;
1209 
1210 	if (inode) {
1211 		if (dir->i_mode & S_ISGID) {
1212 			inode->i_gid = dir->i_gid;
1213 			if (S_ISDIR(mode))
1214 				inode->i_mode |= S_ISGID;
1215 		}
1216 		dir->i_size += BOGO_DIRENT_SIZE;
1217 		dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1218 		d_instantiate(dentry, inode);
1219 		dget(dentry); /* Extra count - pin the dentry in core */
1220 		error = 0;
1221 	}
1222 	return error;
1223 }
1224 
shmem_mkdir(struct inode * dir,struct dentry * dentry,int mode)1225 static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1226 {
1227 	int error;
1228 
1229 	if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
1230 		return error;
1231 	dir->i_nlink++;
1232 	return 0;
1233 }
1234 
shmem_create(struct inode * dir,struct dentry * dentry,int mode)1235 static int shmem_create(struct inode *dir, struct dentry *dentry, int mode)
1236 {
1237 	return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
1238 }
1239 
1240 /*
1241  * Link a file..
1242  */
shmem_link(struct dentry * old_dentry,struct inode * dir,struct dentry * dentry)1243 static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
1244 {
1245 	struct inode *inode = old_dentry->d_inode;
1246 
1247 	if (S_ISDIR(inode->i_mode))
1248 		return -EPERM;
1249 
1250 	dir->i_size += BOGO_DIRENT_SIZE;
1251 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1252 	inode->i_nlink++;
1253 	atomic_inc(&inode->i_count);	/* New dentry reference */
1254 	dget(dentry);		/* Extra pinning count for the created dentry */
1255 	d_instantiate(dentry, inode);
1256 	return 0;
1257 }
1258 
shmem_positive(struct dentry * dentry)1259 static inline int shmem_positive(struct dentry *dentry)
1260 {
1261 	return dentry->d_inode && !d_unhashed(dentry);
1262 }
1263 
1264 /*
1265  * Check that a directory is empty (this works
1266  * for regular files too, they'll just always be
1267  * considered empty..).
1268  *
1269  * Note that an empty directory can still have
1270  * children, they just all have to be negative..
1271  */
shmem_empty(struct dentry * dentry)1272 static int shmem_empty(struct dentry *dentry)
1273 {
1274 	struct list_head *list;
1275 
1276 	spin_lock(&dcache_lock);
1277 	list = dentry->d_subdirs.next;
1278 
1279 	while (list != &dentry->d_subdirs) {
1280 		struct dentry *de = list_entry(list, struct dentry, d_child);
1281 
1282 		if (shmem_positive(de)) {
1283 			spin_unlock(&dcache_lock);
1284 			return 0;
1285 		}
1286 		list = list->next;
1287 	}
1288 	spin_unlock(&dcache_lock);
1289 	return 1;
1290 }
1291 
shmem_unlink(struct inode * dir,struct dentry * dentry)1292 static int shmem_unlink(struct inode *dir, struct dentry *dentry)
1293 {
1294 	struct inode *inode = dentry->d_inode;
1295 
1296 	dir->i_size -= BOGO_DIRENT_SIZE;
1297 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1298 	inode->i_nlink--;
1299 	dput(dentry);	/* Undo the count from "create" - this does all the work */
1300 	return 0;
1301 }
1302 
shmem_rmdir(struct inode * dir,struct dentry * dentry)1303 static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
1304 {
1305 	if (!shmem_empty(dentry))
1306 		return -ENOTEMPTY;
1307 
1308 	dir->i_nlink--;
1309 	return shmem_unlink(dir, dentry);
1310 }
1311 
1312 /*
1313  * The VFS layer already does all the dentry stuff for rename,
1314  * we just have to decrement the usage count for the target if
1315  * it exists so that the VFS layer correctly free's it when it
1316  * gets overwritten.
1317  */
shmem_rename(struct inode * old_dir,struct dentry * old_dentry,struct inode * new_dir,struct dentry * new_dentry)1318 static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
1319 {
1320 	struct inode *inode = old_dentry->d_inode;
1321 	int they_are_dirs = S_ISDIR(inode->i_mode);
1322 
1323 	if (!shmem_empty(new_dentry))
1324 		return -ENOTEMPTY;
1325 
1326 	if (new_dentry->d_inode) {
1327 		(void) shmem_unlink(new_dir, new_dentry);
1328 		if (they_are_dirs)
1329 			old_dir->i_nlink--;
1330 	} else if (they_are_dirs) {
1331 		old_dir->i_nlink--;
1332 		new_dir->i_nlink++;
1333 	}
1334 
1335 	old_dir->i_size -= BOGO_DIRENT_SIZE;
1336 	new_dir->i_size += BOGO_DIRENT_SIZE;
1337 	old_dir->i_ctime = old_dir->i_mtime =
1338 	new_dir->i_ctime = new_dir->i_mtime =
1339 	inode->i_ctime = CURRENT_TIME;
1340 	return 0;
1341 }
1342 
shmem_symlink(struct inode * dir,struct dentry * dentry,const char * symname)1343 static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1344 {
1345 	int error;
1346 	int len;
1347 	struct inode *inode;
1348 	struct page *page = NULL;
1349 	char *kaddr;
1350 	struct shmem_inode_info *info;
1351 
1352 	len = strlen(symname) + 1;
1353 	if (len > PAGE_CACHE_SIZE)
1354 		return -ENAMETOOLONG;
1355 
1356 	inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
1357 	if (!inode)
1358 		return -ENOSPC;
1359 
1360 	info = SHMEM_I(inode);
1361 	inode->i_size = len-1;
1362 	if (len <= sizeof(struct shmem_inode_info)) {
1363 		/* do it inline */
1364 		memcpy(info, symname, len);
1365 		inode->i_op = &shmem_symlink_inline_operations;
1366 	} else {
1367 		error = shmem_getpage(inode, 0, &page, SGP_WRITE);
1368 		if (error) {
1369 			iput(inode);
1370 			return error;
1371 		}
1372 		inode->i_op = &shmem_symlink_inode_operations;
1373 		spin_lock(&shmem_ilock);
1374 		list_add_tail(&info->list, &shmem_inodes);
1375 		spin_unlock(&shmem_ilock);
1376 		kaddr = kmap(page);
1377 		memcpy(kaddr, symname, len);
1378 		kunmap(page);
1379 		SetPageDirty(page);
1380 		page_cache_release(page);
1381 	}
1382 	if (dir->i_mode & S_ISGID)
1383 		inode->i_gid = dir->i_gid;
1384 	dir->i_size += BOGO_DIRENT_SIZE;
1385 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
1386 	d_instantiate(dentry, inode);
1387 	dget(dentry);
1388 	return 0;
1389 }
1390 
shmem_readlink_inline(struct dentry * dentry,char * buffer,int buflen)1391 static int shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen)
1392 {
1393 	return vfs_readlink(dentry, buffer, buflen, (const char *)SHMEM_I(dentry->d_inode));
1394 }
1395 
shmem_follow_link_inline(struct dentry * dentry,struct nameidata * nd)1396 static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
1397 {
1398 	return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode));
1399 }
1400 
shmem_readlink(struct dentry * dentry,char * buffer,int buflen)1401 static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
1402 {
1403 	struct page *page = NULL;
1404 	int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
1405 	if (res)
1406 		return res;
1407 	res = vfs_readlink(dentry, buffer, buflen, kmap(page));
1408 	kunmap(page);
1409 	mark_page_accessed(page);
1410 	page_cache_release(page);
1411 	return res;
1412 }
1413 
shmem_follow_link(struct dentry * dentry,struct nameidata * nd)1414 static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
1415 {
1416 	struct page *page = NULL;
1417 	int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
1418 	if (res)
1419 		return res;
1420 	res = vfs_follow_link(nd, kmap(page));
1421 	kunmap(page);
1422 	mark_page_accessed(page);
1423 	page_cache_release(page);
1424 	return res;
1425 }
1426 
1427 static struct inode_operations shmem_symlink_inline_operations = {
1428 	readlink:	shmem_readlink_inline,
1429 	follow_link:	shmem_follow_link_inline,
1430 };
1431 
1432 static struct inode_operations shmem_symlink_inode_operations = {
1433 	truncate:	shmem_truncate,
1434 	readlink:	shmem_readlink,
1435 	follow_link:	shmem_follow_link,
1436 };
1437 
shmem_parse_options(char * options,int * mode,uid_t * uid,gid_t * gid,unsigned long * blocks,unsigned long * inodes)1438 static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes)
1439 {
1440 	char *this_char, *value, *rest;
1441 
1442 	while ((this_char = strsep(&options, ",")) != NULL) {
1443 		if (!*this_char)
1444 			continue;
1445 		if ((value = strchr(this_char,'=')) != NULL) {
1446 			*value++ = 0;
1447 		} else {
1448 			printk(KERN_ERR
1449 			    "tmpfs: No value for mount option '%s'\n",
1450 			    this_char);
1451 			return 1;
1452 		}
1453 
1454 		if (!strcmp(this_char,"size")) {
1455 			unsigned long long size;
1456 			size = memparse(value,&rest);
1457 			if (*rest == '%') {
1458 				struct sysinfo si;
1459 				si_meminfo(&si);
1460 				size <<= PAGE_SHIFT;
1461 				size *= si.totalram;
1462 				do_div(size, 100);
1463 				rest++;
1464 			}
1465 			if (*rest)
1466 				goto bad_val;
1467 			*blocks = size >> PAGE_CACHE_SHIFT;
1468 		} else if (!strcmp(this_char,"nr_blocks")) {
1469 			*blocks = memparse(value,&rest);
1470 			if (*rest)
1471 				goto bad_val;
1472 		} else if (!strcmp(this_char,"nr_inodes")) {
1473 			*inodes = memparse(value,&rest);
1474 			if (*rest)
1475 				goto bad_val;
1476 		} else if (!strcmp(this_char,"mode")) {
1477 			if (!mode)
1478 				continue;
1479 			*mode = simple_strtoul(value,&rest,8);
1480 			if (*rest)
1481 				goto bad_val;
1482 		} else if (!strcmp(this_char,"uid")) {
1483 			if (!uid)
1484 				continue;
1485 			*uid = simple_strtoul(value,&rest,0);
1486 			if (*rest)
1487 				goto bad_val;
1488 		} else if (!strcmp(this_char,"gid")) {
1489 			if (!gid)
1490 				continue;
1491 			*gid = simple_strtoul(value,&rest,0);
1492 			if (*rest)
1493 				goto bad_val;
1494 		} else {
1495 			printk(KERN_ERR "tmpfs: Bad mount option %s\n",
1496 			       this_char);
1497 			return 1;
1498 		}
1499 	}
1500 	return 0;
1501 
1502 bad_val:
1503 	printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
1504 	       value, this_char);
1505 	return 1;
1506 }
1507 
shmem_remount_fs(struct super_block * sb,int * flags,char * data)1508 static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
1509 {
1510 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1511 	unsigned long max_blocks = sbinfo->max_blocks;
1512 	unsigned long max_inodes = sbinfo->max_inodes;
1513 
1514 	if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes))
1515 		return -EINVAL;
1516 	return shmem_set_size(sbinfo, max_blocks, max_inodes);
1517 }
1518 
shmem_sync_file(struct file * file,struct dentry * dentry,int datasync)1519 static int shmem_sync_file(struct file *file, struct dentry *dentry, int datasync)
1520 {
1521 	return 0;
1522 }
1523 #endif
1524 
shmem_read_super(struct super_block * sb,void * data,int silent)1525 static struct super_block *shmem_read_super(struct super_block *sb, void *data, int silent)
1526 {
1527 	struct inode *inode;
1528 	struct dentry *root;
1529 	unsigned long blocks, inodes;
1530 	int mode   = S_IRWXUGO | S_ISVTX;
1531 	uid_t uid = current->fsuid;
1532 	gid_t gid = current->fsgid;
1533 	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
1534 	struct sysinfo si;
1535 
1536 	/*
1537 	 * Per default we only allow half of the physical ram per
1538 	 * tmpfs instance
1539 	 */
1540 	si_meminfo(&si);
1541 	blocks = inodes = si.totalram / 2;
1542 
1543 #ifdef CONFIG_TMPFS
1544 	if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes))
1545 		return NULL;
1546 #endif
1547 
1548 	spin_lock_init(&sbinfo->stat_lock);
1549 	sbinfo->max_blocks = blocks;
1550 	sbinfo->free_blocks = blocks;
1551 	sbinfo->max_inodes = inodes;
1552 	sbinfo->free_inodes = inodes;
1553 	sb->s_maxbytes = SHMEM_MAX_BYTES;
1554 	sb->s_blocksize = PAGE_CACHE_SIZE;
1555 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
1556 	sb->s_magic = TMPFS_MAGIC;
1557 	sb->s_op = &shmem_ops;
1558 	inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
1559 	if (!inode)
1560 		return NULL;
1561 
1562 	inode->i_uid = uid;
1563 	inode->i_gid = gid;
1564 	root = d_alloc_root(inode);
1565 	if (!root) {
1566 		iput(inode);
1567 		return NULL;
1568 	}
1569 	sb->s_root = root;
1570 	return sb;
1571 }
1572 
1573 static struct address_space_operations shmem_aops = {
1574 	removepage:	shmem_removepage,
1575 	writepage:	shmem_writepage,
1576 #ifdef CONFIG_TMPFS
1577 	readpage:	shmem_readpage,
1578 	prepare_write:	shmem_prepare_write,
1579 	commit_write:	shmem_commit_write,
1580 #endif
1581 };
1582 
1583 static struct file_operations shmem_file_operations = {
1584 	mmap:		shmem_mmap,
1585 #ifdef CONFIG_TMPFS
1586 	read:		shmem_file_read,
1587 	write:		shmem_file_write,
1588 	fsync:		shmem_sync_file,
1589 #endif
1590 };
1591 
1592 static struct inode_operations shmem_inode_operations = {
1593 	truncate:	shmem_truncate,
1594 	setattr:	shmem_notify_change,
1595 };
1596 
1597 static struct inode_operations shmem_dir_inode_operations = {
1598 #ifdef CONFIG_TMPFS
1599 	create:		shmem_create,
1600 	lookup:		shmem_lookup,
1601 	link:		shmem_link,
1602 	unlink:		shmem_unlink,
1603 	symlink:	shmem_symlink,
1604 	mkdir:		shmem_mkdir,
1605 	rmdir:		shmem_rmdir,
1606 	mknod:		shmem_mknod,
1607 	rename:		shmem_rename,
1608 #endif
1609 };
1610 
1611 static struct super_operations shmem_ops = {
1612 #ifdef CONFIG_TMPFS
1613 	statfs:		shmem_statfs,
1614 	remount_fs:	shmem_remount_fs,
1615 #endif
1616 	delete_inode:	shmem_delete_inode,
1617 	put_inode:	force_delete,
1618 };
1619 
1620 static struct vm_operations_struct shmem_vm_ops = {
1621 	nopage:		shmem_nopage,
1622 };
1623 
1624 #ifdef CONFIG_TMPFS
1625 /* type "shm" will be tagged obsolete in 2.5 */
1626 static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER);
1627 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER);
1628 #else
1629 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER|FS_NOMOUNT);
1630 #endif
1631 static struct vfsmount *shm_mnt;
1632 
init_tmpfs(void)1633 static int __init init_tmpfs(void)
1634 {
1635 	int error;
1636 
1637 	error = register_filesystem(&tmpfs_fs_type);
1638 	if (error) {
1639 		printk(KERN_ERR "Could not register tmpfs\n");
1640 		goto out3;
1641 	}
1642 #ifdef CONFIG_TMPFS
1643 	error = register_filesystem(&shmem_fs_type);
1644 	if (error) {
1645 		printk(KERN_ERR "Could not register shm fs\n");
1646 		goto out2;
1647 	}
1648 	devfs_mk_dir(NULL, "shm", NULL);
1649 #endif
1650 	shm_mnt = kern_mount(&tmpfs_fs_type);
1651 	if (IS_ERR(shm_mnt)) {
1652 		error = PTR_ERR(shm_mnt);
1653 		printk(KERN_ERR "Could not kern_mount tmpfs\n");
1654 		goto out1;
1655 	}
1656 
1657 	/* The internal instance should not do size checking */
1658 	shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
1659 	return 0;
1660 
1661 out1:
1662 #ifdef CONFIG_TMPFS
1663 	unregister_filesystem(&shmem_fs_type);
1664 out2:
1665 #endif
1666 	unregister_filesystem(&tmpfs_fs_type);
1667 out3:
1668 	shm_mnt = ERR_PTR(error);
1669 	return error;
1670 }
module_init(init_tmpfs)1671 module_init(init_tmpfs)
1672 
1673 /*
1674  * shmem_file_setup - get an unlinked file living in tmpfs
1675  *
1676  * @name: name for dentry (to be seen in /proc/<pid>/maps
1677  * @size: size to be set for the file
1678  *
1679  */
1680 struct file *shmem_file_setup(char *name, loff_t size)
1681 {
1682 	int error;
1683 	struct file *file;
1684 	struct inode *inode;
1685 	struct dentry *dentry, *root;
1686 	struct qstr this;
1687 	int vm_enough_memory(long pages);
1688 
1689 	if (IS_ERR(shm_mnt))
1690 		return (void *)shm_mnt;
1691 
1692 	if (size > SHMEM_MAX_BYTES)
1693 		return ERR_PTR(-EINVAL);
1694 
1695 	if (!vm_enough_memory(VM_ACCT(size)))
1696 		return ERR_PTR(-ENOMEM);
1697 
1698 	this.name = name;
1699 	this.len = strlen(name);
1700 	this.hash = 0; /* will go */
1701 	root = shm_mnt->mnt_root;
1702 	dentry = d_alloc(root, &this);
1703 	if (!dentry)
1704 		return ERR_PTR(-ENOMEM);
1705 
1706 	error = -ENFILE;
1707 	file = get_empty_filp();
1708 	if (!file)
1709 		goto put_dentry;
1710 
1711 	error = -ENOSPC;
1712 	inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
1713 	if (!inode)
1714 		goto close_file;
1715 
1716 	d_instantiate(dentry, inode);
1717 	inode->i_size = size;
1718 	inode->i_nlink = 0;	/* It is unlinked */
1719 	file->f_vfsmnt = mntget(shm_mnt);
1720 	file->f_dentry = dentry;
1721 	file->f_op = &shmem_file_operations;
1722 	file->f_mode = FMODE_WRITE | FMODE_READ;
1723 	return file;
1724 
1725 close_file:
1726 	put_filp(file);
1727 put_dentry:
1728 	dput(dentry);
1729 	return ERR_PTR(error);
1730 }
1731 
1732 /*
1733  * shmem_zero_setup - setup a shared anonymous mapping
1734  *
1735  * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
1736  */
shmem_zero_setup(struct vm_area_struct * vma)1737 int shmem_zero_setup(struct vm_area_struct *vma)
1738 {
1739 	struct file *file;
1740 	loff_t size = vma->vm_end - vma->vm_start;
1741 
1742 	file = shmem_file_setup("dev/zero", size);
1743 	if (IS_ERR(file))
1744 		return PTR_ERR(file);
1745 
1746 	if (vma->vm_file)
1747 		fput(vma->vm_file);
1748 	vma->vm_file = file;
1749 	vma->vm_ops = &shmem_vm_ops;
1750 	return 0;
1751 }
1752 
1753 EXPORT_SYMBOL(shmem_file_setup);
1754