1 /*
2  * fs/dcache.c
3  *
4  * Complete reimplementation
5  * (C) 1997 Thomas Schoebel-Theuer,
6  * with heavy changes by Linus Torvalds
7  */
8 
9 /*
10  * Notes on the allocation strategy:
11  *
12  * The dcache is a master of the icache - whenever a dcache entry
13  * exists, the inode will always exist. "iput()" is done either when
14  * the dcache entry is deleted or garbage collected.
15  */
16 
17 #include <linux/config.h>
18 #include <linux/string.h>
19 #include <linux/mm.h>
20 #include <linux/fs.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/smp_lock.h>
24 #include <linux/cache.h>
25 #include <linux/module.h>
26 
27 #include <asm/uaccess.h>
28 
29 #define DCACHE_PARANOIA 1
30 /* #define DCACHE_DEBUG 1 */
31 
32 spinlock_t dcache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
33 
34 /* Right now the dcache depends on the kernel lock */
35 #define check_lock()	if (!kernel_locked()) BUG()
36 
37 static kmem_cache_t *dentry_cache;
38 
39 /*
40  * This is the single most critical data structure when it comes
41  * to the dcache: the hashtable for lookups. Somebody should try
42  * to make this good - I've just made it work.
43  *
44  * This hash-function tries to avoid losing too many bits of hash
45  * information, yet avoid using a prime hash-size or similar.
46  */
47 #define D_HASHBITS     d_hash_shift
48 #define D_HASHMASK     d_hash_mask
49 
50 static unsigned int d_hash_mask;
51 static unsigned int d_hash_shift;
52 static struct list_head *dentry_hashtable;
53 static LIST_HEAD(dentry_unused);
54 
55 /* Statistics gathering. */
56 struct dentry_stat_t dentry_stat = {0, 0, 45, 0,};
57 
58 /*
59  * no dcache_lock, please.  The caller must decrement dentry_stat.nr_dentry
60  * inside dcache_lock.
61  */
d_free(struct dentry * dentry)62 static inline void d_free(struct dentry *dentry)
63 {
64 	if (dentry->d_op && dentry->d_op->d_release)
65 		dentry->d_op->d_release(dentry);
66 	if (dname_external(dentry))
67 		kfree(dentry->d_name.name);
68 	kmem_cache_free(dentry_cache, dentry);
69 }
70 
71 /*
72  * Release the dentry's inode, using the filesystem
73  * d_iput() operation if defined.
74  * Called with dcache_lock held, drops it.
75  */
dentry_iput(struct dentry * dentry)76 static inline void dentry_iput(struct dentry * dentry)
77 {
78 	struct inode *inode = dentry->d_inode;
79 	if (inode) {
80 		dentry->d_inode = NULL;
81 		list_del_init(&dentry->d_alias);
82 		spin_unlock(&dcache_lock);
83 		if (dentry->d_op && dentry->d_op->d_iput)
84 			dentry->d_op->d_iput(dentry, inode);
85 		else
86 			iput(inode);
87 	} else
88 		spin_unlock(&dcache_lock);
89 }
90 
91 /*
92  * This is dput
93  *
94  * This is complicated by the fact that we do not want to put
95  * dentries that are no longer on any hash chain on the unused
96  * list: we'd much rather just get rid of them immediately.
97  *
98  * However, that implies that we have to traverse the dentry
99  * tree upwards to the parents which might _also_ now be
100  * scheduled for deletion (it may have been only waiting for
101  * its last child to go away).
102  *
103  * This tail recursion is done by hand as we don't want to depend
104  * on the compiler to always get this right (gcc generally doesn't).
105  * Real recursion would eat up our stack space.
106  */
107 
108 /*
109  * dput - release a dentry
110  * @dentry: dentry to release
111  *
112  * Release a dentry. This will drop the usage count and if appropriate
113  * call the dentry unlink method as well as removing it from the queues and
114  * releasing its resources. If the parent dentries were scheduled for release
115  * they too may now get deleted.
116  *
117  * no dcache lock, please.
118  */
119 
dput(struct dentry * dentry)120 void dput(struct dentry *dentry)
121 {
122 	if (!dentry)
123 		return;
124 
125 repeat:
126 	if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
127 		return;
128 
129 	/* dput on a free dentry? */
130 	if (!list_empty(&dentry->d_lru))
131 		BUG();
132 	/*
133 	 * AV: ->d_delete() is _NOT_ allowed to block now.
134 	 */
135 	if (dentry->d_op && dentry->d_op->d_delete) {
136 		if (dentry->d_op->d_delete(dentry))
137 			goto unhash_it;
138 	}
139 	/* Unreachable? Get rid of it */
140 	if (list_empty(&dentry->d_hash))
141 		goto kill_it;
142 	list_add(&dentry->d_lru, &dentry_unused);
143 	dentry_stat.nr_unused++;
144 	spin_unlock(&dcache_lock);
145 	return;
146 
147 unhash_it:
148 	list_del_init(&dentry->d_hash);
149 
150 kill_it: {
151 		struct dentry *parent;
152 		list_del(&dentry->d_child);
153 		dentry_stat.nr_dentry--;	/* For d_free, below */
154 		/* drops the lock, at that point nobody can reach this dentry */
155 		dentry_iput(dentry);
156 		parent = dentry->d_parent;
157 		d_free(dentry);
158 		if (dentry == parent)
159 			return;
160 		dentry = parent;
161 		goto repeat;
162 	}
163 }
164 
165 /**
166  * d_invalidate - invalidate a dentry
167  * @dentry: dentry to invalidate
168  *
169  * Try to invalidate the dentry if it turns out to be
170  * possible. If there are other dentries that can be
171  * reached through this one we can't delete it and we
172  * return -EBUSY. On success we return 0.
173  *
174  * no dcache lock.
175  */
176 
d_invalidate(struct dentry * dentry)177 int d_invalidate(struct dentry * dentry)
178 {
179 	/*
180 	 * If it's already been dropped, return OK.
181 	 */
182 	spin_lock(&dcache_lock);
183 	if (list_empty(&dentry->d_hash)) {
184 		spin_unlock(&dcache_lock);
185 		return 0;
186 	}
187 	/*
188 	 * Check whether to do a partial shrink_dcache
189 	 * to get rid of unused child entries.
190 	 */
191 	if (!list_empty(&dentry->d_subdirs)) {
192 		spin_unlock(&dcache_lock);
193 		shrink_dcache_parent(dentry);
194 		spin_lock(&dcache_lock);
195 	}
196 
197 	/*
198 	 * Somebody else still using it?
199 	 *
200 	 * If it's a directory, we can't drop it
201 	 * for fear of somebody re-populating it
202 	 * with children (even though dropping it
203 	 * would make it unreachable from the root,
204 	 * we might still populate it if it was a
205 	 * working directory or similar).
206 	 */
207 	if (atomic_read(&dentry->d_count) > 1) {
208 		if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
209 			spin_unlock(&dcache_lock);
210 			return -EBUSY;
211 		}
212 	}
213 
214 	list_del_init(&dentry->d_hash);
215 	spin_unlock(&dcache_lock);
216 	return 0;
217 }
218 
219 /* This should be called _only_ with dcache_lock held */
220 
__dget_locked(struct dentry * dentry)221 static inline struct dentry * __dget_locked(struct dentry *dentry)
222 {
223 	atomic_inc(&dentry->d_count);
224 	if (!list_empty(&dentry->d_lru)) {
225 		dentry_stat.nr_unused--;
226 		list_del_init(&dentry->d_lru);
227 	}
228 	return dentry;
229 }
230 
dget_locked(struct dentry * dentry)231 struct dentry * dget_locked(struct dentry *dentry)
232 {
233 	return __dget_locked(dentry);
234 }
235 
236 /**
237  * d_find_alias - grab a hashed alias of inode
238  * @inode: inode in question
239  *
240  * If inode has a hashed alias - acquire the reference to alias and
241  * return it. Otherwise return NULL. Notice that if inode is a directory
242  * there can be only one alias and it can be unhashed only if it has
243  * no children.
244  */
245 
d_find_alias(struct inode * inode)246 struct dentry * d_find_alias(struct inode *inode)
247 {
248 	struct list_head *head, *next, *tmp;
249 	struct dentry *alias;
250 
251 	spin_lock(&dcache_lock);
252 	head = &inode->i_dentry;
253 	next = inode->i_dentry.next;
254 	while (next != head) {
255 		tmp = next;
256 		next = tmp->next;
257 		alias = list_entry(tmp, struct dentry, d_alias);
258 		if (!list_empty(&alias->d_hash)) {
259 			__dget_locked(alias);
260 			spin_unlock(&dcache_lock);
261 			return alias;
262 		}
263 	}
264 	spin_unlock(&dcache_lock);
265 	return NULL;
266 }
267 
268 /*
269  *	Try to kill dentries associated with this inode.
270  * WARNING: you must own a reference to inode.
271  */
d_prune_aliases(struct inode * inode)272 void d_prune_aliases(struct inode *inode)
273 {
274 	struct list_head *tmp, *head = &inode->i_dentry;
275 restart:
276 	spin_lock(&dcache_lock);
277 	tmp = head;
278 	while ((tmp = tmp->next) != head) {
279 		struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
280 		if (!atomic_read(&dentry->d_count)) {
281 			__dget_locked(dentry);
282 			spin_unlock(&dcache_lock);
283 			d_drop(dentry);
284 			dput(dentry);
285 			goto restart;
286 		}
287 	}
288 	spin_unlock(&dcache_lock);
289 }
290 
291 /*
292  * Throw away a dentry - free the inode, dput the parent.
293  * This requires that the LRU list has already been
294  * removed.
295  * Called with dcache_lock, drops it and then regains.
296  */
prune_one_dentry(struct dentry * dentry)297 static inline void prune_one_dentry(struct dentry * dentry)
298 {
299 	struct dentry * parent;
300 
301 	list_del_init(&dentry->d_hash);
302 	list_del(&dentry->d_child);
303 	dentry_stat.nr_dentry--;	/* For d_free, below */
304 	dentry_iput(dentry);
305 	parent = dentry->d_parent;
306 	d_free(dentry);
307 	if (parent != dentry)
308 		dput(parent);
309 	spin_lock(&dcache_lock);
310 }
311 
312 /**
313  * prune_dcache - shrink the dcache
314  * @count: number of entries to try and free
315  *
316  * Shrink the dcache. This is done when we need
317  * more memory, or simply when we need to unmount
318  * something (at which point we need to unuse
319  * all dentries).
320  *
321  * This function may fail to free any resources if
322  * all the dentries are in use.
323  */
324 
prune_dcache(int count)325 void prune_dcache(int count)
326 {
327 	spin_lock(&dcache_lock);
328 	for (;;) {
329 		struct dentry *dentry;
330 		struct list_head *tmp;
331 
332 		tmp = dentry_unused.prev;
333 
334 		if (tmp == &dentry_unused)
335 			break;
336 		list_del_init(tmp);
337 		dentry = list_entry(tmp, struct dentry, d_lru);
338 
339 		/* If the dentry was recently referenced, don't free it. */
340 		if (dentry->d_vfs_flags & DCACHE_REFERENCED) {
341 			dentry->d_vfs_flags &= ~DCACHE_REFERENCED;
342 			list_add(&dentry->d_lru, &dentry_unused);
343 			continue;
344 		}
345 		dentry_stat.nr_unused--;
346 
347 		/* Unused dentry with a count? */
348 		if (atomic_read(&dentry->d_count))
349 			BUG();
350 
351 		prune_one_dentry(dentry);
352 		if (!--count)
353 			break;
354 	}
355 	spin_unlock(&dcache_lock);
356 }
357 
358 /*
359  * Shrink the dcache for the specified super block.
360  * This allows us to unmount a device without disturbing
361  * the dcache for the other devices.
362  *
363  * This implementation makes just two traversals of the
364  * unused list.  On the first pass we move the selected
365  * dentries to the most recent end, and on the second
366  * pass we free them.  The second pass must restart after
367  * each dput(), but since the target dentries are all at
368  * the end, it's really just a single traversal.
369  */
370 
371 /**
372  * shrink_dcache_sb - shrink dcache for a superblock
373  * @sb: superblock
374  *
375  * Shrink the dcache for the specified super block. This
376  * is used to free the dcache before unmounting a file
377  * system
378  */
379 
shrink_dcache_sb(struct super_block * sb)380 void shrink_dcache_sb(struct super_block * sb)
381 {
382 	struct list_head *tmp, *next;
383 	struct dentry *dentry;
384 
385 	/*
386 	 * Pass one ... move the dentries for the specified
387 	 * superblock to the most recent end of the unused list.
388 	 */
389 	spin_lock(&dcache_lock);
390 	next = dentry_unused.next;
391 	while (next != &dentry_unused) {
392 		tmp = next;
393 		next = tmp->next;
394 		dentry = list_entry(tmp, struct dentry, d_lru);
395 		if (dentry->d_sb != sb)
396 			continue;
397 		list_del(tmp);
398 		list_add(tmp, &dentry_unused);
399 	}
400 
401 	/*
402 	 * Pass two ... free the dentries for this superblock.
403 	 */
404 repeat:
405 	next = dentry_unused.next;
406 	while (next != &dentry_unused) {
407 		tmp = next;
408 		next = tmp->next;
409 		dentry = list_entry(tmp, struct dentry, d_lru);
410 		if (dentry->d_sb != sb)
411 			continue;
412 		if (atomic_read(&dentry->d_count))
413 			continue;
414 		dentry_stat.nr_unused--;
415 		list_del_init(tmp);
416 		prune_one_dentry(dentry);
417 		goto repeat;
418 	}
419 	spin_unlock(&dcache_lock);
420 }
421 
422 /*
423  * Search for at least 1 mount point in the dentry's subdirs.
424  * We descend to the next level whenever the d_subdirs
425  * list is non-empty and continue searching.
426  */
427 
428 /**
429  * have_submounts - check for mounts over a dentry
430  * @parent: dentry to check.
431  *
432  * Return true if the parent or its subdirectories contain
433  * a mount point
434  */
435 
have_submounts(struct dentry * parent)436 int have_submounts(struct dentry *parent)
437 {
438 	struct dentry *this_parent = parent;
439 	struct list_head *next;
440 
441 	spin_lock(&dcache_lock);
442 	if (d_mountpoint(parent))
443 		goto positive;
444 repeat:
445 	next = this_parent->d_subdirs.next;
446 resume:
447 	while (next != &this_parent->d_subdirs) {
448 		struct list_head *tmp = next;
449 		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
450 		next = tmp->next;
451 		/* Have we found a mount point ? */
452 		if (d_mountpoint(dentry))
453 			goto positive;
454 		if (!list_empty(&dentry->d_subdirs)) {
455 			this_parent = dentry;
456 			goto repeat;
457 		}
458 	}
459 	/*
460 	 * All done at this level ... ascend and resume the search.
461 	 */
462 	if (this_parent != parent) {
463 		next = this_parent->d_child.next;
464 		this_parent = this_parent->d_parent;
465 		goto resume;
466 	}
467 	spin_unlock(&dcache_lock);
468 	return 0; /* No mount points found in tree */
469 positive:
470 	spin_unlock(&dcache_lock);
471 	return 1;
472 }
473 
474 /*
475  * Search the dentry child list for the specified parent,
476  * and move any unused dentries to the end of the unused
477  * list for prune_dcache(). We descend to the next level
478  * whenever the d_subdirs list is non-empty and continue
479  * searching.
480  */
select_parent(struct dentry * parent)481 static int select_parent(struct dentry * parent)
482 {
483 	struct dentry *this_parent = parent;
484 	struct list_head *next;
485 	int found = 0;
486 
487 	spin_lock(&dcache_lock);
488 repeat:
489 	next = this_parent->d_subdirs.next;
490 resume:
491 	while (next != &this_parent->d_subdirs) {
492 		struct list_head *tmp = next;
493 		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
494 		next = tmp->next;
495 		if (!atomic_read(&dentry->d_count)) {
496 			list_del(&dentry->d_lru);
497 			list_add(&dentry->d_lru, dentry_unused.prev);
498 			found++;
499 		}
500 		/*
501 		 * Descend a level if the d_subdirs list is non-empty.
502 		 */
503 		if (!list_empty(&dentry->d_subdirs)) {
504 			this_parent = dentry;
505 #ifdef DCACHE_DEBUG
506 printk(KERN_DEBUG "select_parent: descending to %s/%s, found=%d\n",
507 dentry->d_parent->d_name.name, dentry->d_name.name, found);
508 #endif
509 			goto repeat;
510 		}
511 	}
512 	/*
513 	 * All done at this level ... ascend and resume the search.
514 	 */
515 	if (this_parent != parent) {
516 		next = this_parent->d_child.next;
517 		this_parent = this_parent->d_parent;
518 #ifdef DCACHE_DEBUG
519 printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
520 this_parent->d_parent->d_name.name, this_parent->d_name.name, found);
521 #endif
522 		goto resume;
523 	}
524 	spin_unlock(&dcache_lock);
525 	return found;
526 }
527 
528 /**
529  * shrink_dcache_parent - prune dcache
530  * @parent: parent of entries to prune
531  *
532  * Prune the dcache to remove unused children of the parent dentry.
533  */
534 
shrink_dcache_parent(struct dentry * parent)535 void shrink_dcache_parent(struct dentry * parent)
536 {
537 	int found;
538 
539 	while ((found = select_parent(parent)) != 0)
540 		prune_dcache(found);
541 }
542 
543 /*
544  * This is called from kswapd when we think we need some
545  * more memory, but aren't really sure how much. So we
546  * carefully try to free a _bit_ of our dcache, but not
547  * too much.
548  *
549  * Priority:
550  *   0 - very urgent: shrink everything
551  *  ...
552  *   6 - base-level: try to shrink a bit.
553  */
shrink_dcache_memory(int priority,unsigned int gfp_mask)554 int shrink_dcache_memory(int priority, unsigned int gfp_mask)
555 {
556 	int count = 0;
557 
558 	/*
559 	 * Nasty deadlock avoidance.
560 	 *
561 	 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
562 	 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->
563 	 * put_inode->ext2_discard_prealloc->ext2_free_blocks->lock_super->
564 	 * DEADLOCK.
565 	 *
566 	 * We should make sure we don't hold the superblock lock over
567 	 * block allocations, but for now:
568 	 */
569 	if (!(gfp_mask & __GFP_FS))
570 		return 0;
571 
572 	count = dentry_stat.nr_unused / priority;
573 
574 	prune_dcache(count);
575 	return kmem_cache_shrink(dentry_cache);
576 }
577 
578 #define NAME_ALLOC_LEN(len)	((len+16) & ~15)
579 
580 /**
581  * d_alloc	-	allocate a dcache entry
582  * @parent: parent of entry to allocate
583  * @name: qstr of the name
584  *
585  * Allocates a dentry. It returns %NULL if there is insufficient memory
586  * available. On a success the dentry is returned. The name passed in is
587  * copied and the copy passed in may be reused after this call.
588  */
589 
d_alloc(struct dentry * parent,const struct qstr * name)590 struct dentry * d_alloc(struct dentry * parent, const struct qstr *name)
591 {
592 	char * str;
593 	struct dentry *dentry;
594 
595 	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
596 	if (!dentry)
597 		return NULL;
598 
599 	if (name->len > DNAME_INLINE_LEN-1) {
600 		str = kmalloc(NAME_ALLOC_LEN(name->len), GFP_KERNEL);
601 		if (!str) {
602 			kmem_cache_free(dentry_cache, dentry);
603 			return NULL;
604 		}
605 	} else
606 		str = dentry->d_iname;
607 
608 	memcpy(str, name->name, name->len);
609 	str[name->len] = 0;
610 
611 	atomic_set(&dentry->d_count, 1);
612 	dentry->d_vfs_flags = 0;
613 	dentry->d_flags = 0;
614 	dentry->d_inode = NULL;
615 	dentry->d_parent = NULL;
616 	dentry->d_sb = NULL;
617 	dentry->d_name.name = str;
618 	dentry->d_name.len = name->len;
619 	dentry->d_name.hash = name->hash;
620 	dentry->d_op = NULL;
621 	dentry->d_fsdata = NULL;
622 	dentry->d_mounted = 0;
623 	INIT_LIST_HEAD(&dentry->d_hash);
624 	INIT_LIST_HEAD(&dentry->d_lru);
625 	INIT_LIST_HEAD(&dentry->d_subdirs);
626 	INIT_LIST_HEAD(&dentry->d_alias);
627 	if (parent) {
628 		dentry->d_parent = dget(parent);
629 		dentry->d_sb = parent->d_sb;
630 	} else
631 		INIT_LIST_HEAD(&dentry->d_child);
632 
633 	spin_lock(&dcache_lock);
634 	if (parent)
635 		list_add(&dentry->d_child, &parent->d_subdirs);
636 	dentry_stat.nr_dentry++;
637 	spin_unlock(&dcache_lock);
638 
639 	return dentry;
640 }
641 
642 /**
643  * d_instantiate - fill in inode information for a dentry
644  * @entry: dentry to complete
645  * @inode: inode to attach to this dentry
646  *
647  * Fill in inode information in the entry.
648  *
649  * This turns negative dentries into productive full members
650  * of society.
651  *
652  * NOTE! This assumes that the inode count has been incremented
653  * (or otherwise set) by the caller to indicate that it is now
654  * in use by the dcache.
655  */
656 
d_instantiate(struct dentry * entry,struct inode * inode)657 void d_instantiate(struct dentry *entry, struct inode * inode)
658 {
659 	if (!list_empty(&entry->d_alias)) BUG();
660 	spin_lock(&dcache_lock);
661 	if (inode)
662 		list_add(&entry->d_alias, &inode->i_dentry);
663 	entry->d_inode = inode;
664 	spin_unlock(&dcache_lock);
665 }
666 
667 /**
668  * d_alloc_root - allocate root dentry
669  * @root_inode: inode to allocate the root for
670  *
671  * Allocate a root ("/") dentry for the inode given. The inode is
672  * instantiated and returned. %NULL is returned if there is insufficient
673  * memory or the inode passed is %NULL.
674  */
675 
d_alloc_root(struct inode * root_inode)676 struct dentry * d_alloc_root(struct inode * root_inode)
677 {
678 	struct dentry *res = NULL;
679 
680 	if (root_inode) {
681 		res = d_alloc(NULL, &(const struct qstr) { "/", 1, 0 });
682 		if (res) {
683 			res->d_sb = root_inode->i_sb;
684 			res->d_parent = res;
685 			d_instantiate(res, root_inode);
686 		}
687 	}
688 	return res;
689 }
690 
d_hash(struct dentry * parent,unsigned long hash)691 static inline struct list_head * d_hash(struct dentry * parent, unsigned long hash)
692 {
693 	hash += (unsigned long) parent / L1_CACHE_BYTES;
694 	hash = hash ^ (hash >> D_HASHBITS);
695 	return dentry_hashtable + (hash & D_HASHMASK);
696 }
697 
698 /**
699  * d_lookup - search for a dentry
700  * @parent: parent dentry
701  * @name: qstr of name we wish to find
702  *
703  * Searches the children of the parent dentry for the name in question. If
704  * the dentry is found its reference count is incremented and the dentry
705  * is returned. The caller must use d_put to free the entry when it has
706  * finished using it. %NULL is returned on failure.
707  */
708 
d_lookup(struct dentry * parent,struct qstr * name)709 struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
710 {
711 	unsigned int len = name->len;
712 	unsigned int hash = name->hash;
713 	const unsigned char *str = name->name;
714 	struct list_head *head = d_hash(parent,hash);
715 	struct list_head *tmp;
716 
717 	spin_lock(&dcache_lock);
718 	tmp = head->next;
719 	for (;;) {
720 		struct dentry * dentry = list_entry(tmp, struct dentry, d_hash);
721 		if (tmp == head)
722 			break;
723 		tmp = tmp->next;
724 		if (dentry->d_name.hash != hash)
725 			continue;
726 		if (dentry->d_parent != parent)
727 			continue;
728 		if (parent->d_op && parent->d_op->d_compare) {
729 			if (parent->d_op->d_compare(parent, &dentry->d_name, name))
730 				continue;
731 		} else {
732 			if (dentry->d_name.len != len)
733 				continue;
734 			if (memcmp(dentry->d_name.name, str, len))
735 				continue;
736 		}
737 		__dget_locked(dentry);
738 		dentry->d_vfs_flags |= DCACHE_REFERENCED;
739 		spin_unlock(&dcache_lock);
740 		return dentry;
741 	}
742 	spin_unlock(&dcache_lock);
743 	return NULL;
744 }
745 
746 /**
747  * d_validate - verify dentry provided from insecure source
748  * @dentry: The dentry alleged to be valid child of @dparent
749  * @dparent: The parent dentry (known to be valid)
750  * @hash: Hash of the dentry
751  * @len: Length of the name
752  *
753  * An insecure source has sent us a dentry, here we verify it and dget() it.
754  * This is used by ncpfs in its readdir implementation.
755  * Zero is returned in the dentry is invalid.
756  */
757 
d_validate(struct dentry * dentry,struct dentry * dparent)758 int d_validate(struct dentry *dentry, struct dentry *dparent)
759 {
760 	unsigned long dent_addr = (unsigned long) dentry;
761 	unsigned long min_addr = PAGE_OFFSET;
762 	unsigned long align_mask = 0x0F;
763 	struct list_head *base, *lhp;
764 
765 	if (dent_addr < min_addr)
766 		goto out;
767 	if (dent_addr > (unsigned long)high_memory - sizeof(struct dentry))
768 		goto out;
769 	if (dent_addr & align_mask)
770 		goto out;
771 	if ((!kern_addr_valid(dent_addr)) || (!kern_addr_valid(dent_addr -1 +
772 						sizeof(struct dentry))))
773 		goto out;
774 
775 	if (dentry->d_parent != dparent)
776 		goto out;
777 
778 	spin_lock(&dcache_lock);
779 	lhp = base = d_hash(dparent, dentry->d_name.hash);
780 	while ((lhp = lhp->next) != base) {
781 		if (dentry == list_entry(lhp, struct dentry, d_hash)) {
782 			__dget_locked(dentry);
783 			spin_unlock(&dcache_lock);
784 			return 1;
785 		}
786 	}
787 	spin_unlock(&dcache_lock);
788 out:
789 	return 0;
790 }
791 
792 /*
793  * When a file is deleted, we have two options:
794  * - turn this dentry into a negative dentry
795  * - unhash this dentry and free it.
796  *
797  * Usually, we want to just turn this into
798  * a negative dentry, but if anybody else is
799  * currently using the dentry or the inode
800  * we can't do that and we fall back on removing
801  * it from the hash queues and waiting for
802  * it to be deleted later when it has no users
803  */
804 
805 /**
806  * d_delete - delete a dentry
807  * @dentry: The dentry to delete
808  *
809  * Turn the dentry into a negative dentry if possible, otherwise
810  * remove it from the hash queues so it can be deleted later
811  */
812 
d_delete(struct dentry * dentry)813 void d_delete(struct dentry * dentry)
814 {
815 	/*
816 	 * Are we the only user?
817 	 */
818 	spin_lock(&dcache_lock);
819 	if (atomic_read(&dentry->d_count) == 1) {
820 		dentry_iput(dentry);
821 		return;
822 	}
823 	spin_unlock(&dcache_lock);
824 
825 	/*
826 	 * If not, just drop the dentry and let dput
827 	 * pick up the tab..
828 	 */
829 	d_drop(dentry);
830 }
831 
832 /**
833  * d_rehash	- add an entry back to the hash
834  * @entry: dentry to add to the hash
835  *
836  * Adds a dentry to the hash according to its name.
837  */
838 
d_rehash(struct dentry * entry)839 void d_rehash(struct dentry * entry)
840 {
841 	struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
842 	if (!list_empty(&entry->d_hash)) BUG();
843 	spin_lock(&dcache_lock);
844 	list_add(&entry->d_hash, list);
845 	spin_unlock(&dcache_lock);
846 }
847 
848 #define do_switch(x,y) do { \
849 	__typeof__ (x) __tmp = x; \
850 	x = y; y = __tmp; } while (0)
851 
852 /*
853  * When switching names, the actual string doesn't strictly have to
854  * be preserved in the target - because we're dropping the target
855  * anyway. As such, we can just do a simple memcpy() to copy over
856  * the new name before we switch.
857  *
858  * Note that we have to be a lot more careful about getting the hash
859  * switched - we have to switch the hash value properly even if it
860  * then no longer matches the actual (corrupted) string of the target.
861  * The hash value has to match the hash queue that the dentry is on..
862  */
switch_names(struct dentry * dentry,struct dentry * target)863 static inline void switch_names(struct dentry * dentry, struct dentry * target)
864 {
865 	const unsigned char *old_name, *new_name;
866 
867 	check_lock();
868 	memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN);
869 	old_name = target->d_name.name;
870 	new_name = dentry->d_name.name;
871 	if (old_name == target->d_iname)
872 		old_name = dentry->d_iname;
873 	if (new_name == dentry->d_iname)
874 		new_name = target->d_iname;
875 	target->d_name.name = new_name;
876 	dentry->d_name.name = old_name;
877 }
878 
879 /*
880  * We cannibalize "target" when moving dentry on top of it,
881  * because it's going to be thrown away anyway. We could be more
882  * polite about it, though.
883  *
884  * This forceful removal will result in ugly /proc output if
885  * somebody holds a file open that got deleted due to a rename.
886  * We could be nicer about the deleted file, and let it show
887  * up under the name it got deleted rather than the name that
888  * deleted it.
889  *
890  * Careful with the hash switch. The hash switch depends on
891  * the fact that any list-entry can be a head of the list.
892  * Think about it.
893  */
894 
895 /**
896  * d_move - move a dentry
897  * @dentry: entry to move
898  * @target: new dentry
899  *
900  * Update the dcache to reflect the move of a file name. Negative
901  * dcache entries should not be moved in this way.
902  */
903 
d_move(struct dentry * dentry,struct dentry * target)904 void d_move(struct dentry * dentry, struct dentry * target)
905 {
906 	check_lock();
907 
908 	if (!dentry->d_inode)
909 		printk(KERN_WARNING "VFS: moving negative dcache entry\n");
910 
911 	spin_lock(&dcache_lock);
912 	/* Move the dentry to the target hash queue */
913 	list_del(&dentry->d_hash);
914 	list_add(&dentry->d_hash, &target->d_hash);
915 
916 	/* Unhash the target: dput() will then get rid of it */
917 	list_del_init(&target->d_hash);
918 
919 	list_del(&dentry->d_child);
920 	list_del(&target->d_child);
921 
922 	/* Switch the parents and the names.. */
923 	switch_names(dentry, target);
924 	do_switch(dentry->d_parent, target->d_parent);
925 	do_switch(dentry->d_name.len, target->d_name.len);
926 	do_switch(dentry->d_name.hash, target->d_name.hash);
927 
928 	/* And add them back to the (new) parent lists */
929 	list_add(&target->d_child, &target->d_parent->d_subdirs);
930 	list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
931 	spin_unlock(&dcache_lock);
932 }
933 
934 /**
935  * d_path - return the path of a dentry
936  * @dentry: dentry to report
937  * @vfsmnt: vfsmnt to which the dentry belongs
938  * @root: root dentry
939  * @rootmnt: vfsmnt to which the root dentry belongs
940  * @buffer: buffer to return value in
941  * @buflen: buffer length
942  *
943  * Convert a dentry into an ASCII path name. If the entry has been deleted
944  * the string " (deleted)" is appended. Note that this is ambiguous. Returns
945  * the buffer.
946  *
947  * "buflen" should be %PAGE_SIZE or more. Caller holds the dcache_lock.
948  */
__d_path(struct dentry * dentry,struct vfsmount * vfsmnt,struct dentry * root,struct vfsmount * rootmnt,char * buffer,int buflen)949 char * __d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
950 		struct dentry *root, struct vfsmount *rootmnt,
951 		char *buffer, int buflen)
952 {
953 	char * end = buffer+buflen;
954 	char * retval;
955 	int namelen;
956 
957 	*--end = '\0';
958 	buflen--;
959 	if (!IS_ROOT(dentry) && list_empty(&dentry->d_hash)) {
960 		buflen -= 10;
961 		end -= 10;
962 		memcpy(end, " (deleted)", 10);
963 	}
964 
965 	/* Get '/' right */
966 	retval = end-1;
967 	*retval = '/';
968 
969 	for (;;) {
970 		struct dentry * parent;
971 
972 		if (dentry == root && vfsmnt == rootmnt)
973 			break;
974 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
975 			/* Global root? */
976 			if (vfsmnt->mnt_parent == vfsmnt)
977 				goto global_root;
978 			dentry = vfsmnt->mnt_mountpoint;
979 			vfsmnt = vfsmnt->mnt_parent;
980 			continue;
981 		}
982 		parent = dentry->d_parent;
983 		namelen = dentry->d_name.len;
984 		buflen -= namelen + 1;
985 		if (buflen < 0)
986 			return ERR_PTR(-ENAMETOOLONG);
987 		end -= namelen;
988 		memcpy(end, dentry->d_name.name, namelen);
989 		*--end = '/';
990 		retval = end;
991 		dentry = parent;
992 	}
993 
994 	return retval;
995 
996 global_root:
997 	namelen = dentry->d_name.len;
998 	buflen -= namelen;
999 	if (buflen >= 0) {
1000 		retval -= namelen-1;	/* hit the slash */
1001 		memcpy(retval, dentry->d_name.name, namelen);
1002 	} else
1003 		retval = ERR_PTR(-ENAMETOOLONG);
1004 	return retval;
1005 }
1006 
1007 /*
1008  * NOTE! The user-level library version returns a
1009  * character pointer. The kernel system call just
1010  * returns the length of the buffer filled (which
1011  * includes the ending '\0' character), or a negative
1012  * error value. So libc would do something like
1013  *
1014  *	char *getcwd(char * buf, size_t size)
1015  *	{
1016  *		int retval;
1017  *
1018  *		retval = sys_getcwd(buf, size);
1019  *		if (retval >= 0)
1020  *			return buf;
1021  *		errno = -retval;
1022  *		return NULL;
1023  *	}
1024  */
sys_getcwd(char * buf,unsigned long size)1025 asmlinkage long sys_getcwd(char *buf, unsigned long size)
1026 {
1027 	int error;
1028 	struct vfsmount *pwdmnt, *rootmnt;
1029 	struct dentry *pwd, *root;
1030 	char *page = (char *) __get_free_page(GFP_USER);
1031 
1032 	if (!page)
1033 		return -ENOMEM;
1034 
1035 	read_lock(&current->fs->lock);
1036 	pwdmnt = mntget(current->fs->pwdmnt);
1037 	pwd = dget(current->fs->pwd);
1038 	rootmnt = mntget(current->fs->rootmnt);
1039 	root = dget(current->fs->root);
1040 	read_unlock(&current->fs->lock);
1041 
1042 	error = -ENOENT;
1043 	/* Has the current directory has been unlinked? */
1044 	spin_lock(&dcache_lock);
1045 	if (pwd->d_parent == pwd || !list_empty(&pwd->d_hash)) {
1046 		unsigned long len;
1047 		char * cwd;
1048 
1049 		cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE);
1050 		spin_unlock(&dcache_lock);
1051 
1052 		error = PTR_ERR(cwd);
1053 		if (IS_ERR(cwd))
1054 			goto out;
1055 
1056 		error = -ERANGE;
1057 		len = PAGE_SIZE + page - cwd;
1058 		if (len <= size) {
1059 			error = len;
1060 			if (copy_to_user(buf, cwd, len))
1061 				error = -EFAULT;
1062 		}
1063 	} else
1064 		spin_unlock(&dcache_lock);
1065 
1066 out:
1067 	dput(pwd);
1068 	mntput(pwdmnt);
1069 	dput(root);
1070 	mntput(rootmnt);
1071 	free_page((unsigned long) page);
1072 	return error;
1073 }
1074 
1075 /*
1076  * Test whether new_dentry is a subdirectory of old_dentry.
1077  *
1078  * Trivially implemented using the dcache structure
1079  */
1080 
1081 /**
1082  * is_subdir - is new dentry a subdirectory of old_dentry
1083  * @new_dentry: new dentry
1084  * @old_dentry: old dentry
1085  *
1086  * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
1087  * Returns 0 otherwise.
1088  */
1089 
is_subdir(struct dentry * new_dentry,struct dentry * old_dentry)1090 int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry)
1091 {
1092 	int result;
1093 
1094 	result = 0;
1095 	for (;;) {
1096 		if (new_dentry != old_dentry) {
1097 			struct dentry * parent = new_dentry->d_parent;
1098 			if (parent == new_dentry)
1099 				break;
1100 			new_dentry = parent;
1101 			continue;
1102 		}
1103 		result = 1;
1104 		break;
1105 	}
1106 	return result;
1107 }
1108 
d_genocide(struct dentry * root)1109 void d_genocide(struct dentry *root)
1110 {
1111 	struct dentry *this_parent = root;
1112 	struct list_head *next;
1113 
1114 	spin_lock(&dcache_lock);
1115 repeat:
1116 	next = this_parent->d_subdirs.next;
1117 resume:
1118 	while (next != &this_parent->d_subdirs) {
1119 		struct list_head *tmp = next;
1120 		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
1121 		next = tmp->next;
1122 		if (d_unhashed(dentry)||!dentry->d_inode)
1123 			continue;
1124 		if (!list_empty(&dentry->d_subdirs)) {
1125 			this_parent = dentry;
1126 			goto repeat;
1127 		}
1128 		atomic_dec(&dentry->d_count);
1129 	}
1130 	if (this_parent != root) {
1131 		next = this_parent->d_child.next;
1132 		atomic_dec(&this_parent->d_count);
1133 		this_parent = this_parent->d_parent;
1134 		goto resume;
1135 	}
1136 	spin_unlock(&dcache_lock);
1137 }
1138 
1139 /**
1140  * find_inode_number - check for dentry with name
1141  * @dir: directory to check
1142  * @name: Name to find.
1143  *
1144  * Check whether a dentry already exists for the given name,
1145  * and return the inode number if it has an inode. Otherwise
1146  * 0 is returned.
1147  *
1148  * This routine is used to post-process directory listings for
1149  * filesystems using synthetic inode numbers, and is necessary
1150  * to keep getcwd() working.
1151  */
1152 
find_inode_number(struct dentry * dir,struct qstr * name)1153 ino_t find_inode_number(struct dentry *dir, struct qstr *name)
1154 {
1155 	struct dentry * dentry;
1156 	ino_t ino = 0;
1157 
1158 	/*
1159 	 * Check for a fs-specific hash function. Note that we must
1160 	 * calculate the standard hash first, as the d_op->d_hash()
1161 	 * routine may choose to leave the hash value unchanged.
1162 	 */
1163 	name->hash = full_name_hash(name->name, name->len);
1164 	if (dir->d_op && dir->d_op->d_hash)
1165 	{
1166 		if (dir->d_op->d_hash(dir, name) != 0)
1167 			goto out;
1168 	}
1169 
1170 	dentry = d_lookup(dir, name);
1171 	if (dentry)
1172 	{
1173 		if (dentry->d_inode)
1174 			ino = dentry->d_inode->i_ino;
1175 		dput(dentry);
1176 	}
1177 out:
1178 	return ino;
1179 }
1180 
dcache_init(unsigned long mempages)1181 static void __init dcache_init(unsigned long mempages)
1182 {
1183 	struct list_head *d;
1184 	unsigned long order;
1185 	unsigned int nr_hash;
1186 	int i;
1187 
1188 	/*
1189 	 * A constructor could be added for stable state like the lists,
1190 	 * but it is probably not worth it because of the cache nature
1191 	 * of the dcache.
1192 	 * If fragmentation is too bad then the SLAB_HWCACHE_ALIGN
1193 	 * flag could be removed here, to hint to the allocator that
1194 	 * it should not try to get multiple page regions.
1195 	 */
1196 	dentry_cache = kmem_cache_create("dentry_cache",
1197 					 sizeof(struct dentry),
1198 					 0,
1199 					 SLAB_HWCACHE_ALIGN,
1200 					 NULL, NULL);
1201 	if (!dentry_cache)
1202 		panic("Cannot create dentry cache");
1203 
1204 #if PAGE_SHIFT < 13
1205 	mempages >>= (13 - PAGE_SHIFT);
1206 #endif
1207 	mempages *= sizeof(struct list_head);
1208 	for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
1209 		;
1210 
1211 	do {
1212 		unsigned long tmp;
1213 
1214 		nr_hash = (1UL << order) * PAGE_SIZE /
1215 			sizeof(struct list_head);
1216 		d_hash_mask = (nr_hash - 1);
1217 
1218 		tmp = nr_hash;
1219 		d_hash_shift = 0;
1220 		while ((tmp >>= 1UL) != 0UL)
1221 			d_hash_shift++;
1222 
1223 		dentry_hashtable = (struct list_head *)
1224 			__get_free_pages(GFP_ATOMIC, order);
1225 	} while (dentry_hashtable == NULL && --order >= 0);
1226 
1227 	printk(KERN_INFO "Dentry cache hash table entries: %d (order: %ld, %ld bytes)\n",
1228 			nr_hash, order, (PAGE_SIZE << order));
1229 
1230 	if (!dentry_hashtable)
1231 		panic("Failed to allocate dcache hash table\n");
1232 
1233 	d = dentry_hashtable;
1234 	i = nr_hash;
1235 	do {
1236 		INIT_LIST_HEAD(d);
1237 		d++;
1238 		i--;
1239 	} while (i);
1240 }
1241 
init_buffer_head(void * foo,kmem_cache_t * cachep,unsigned long flags)1242 static void init_buffer_head(void * foo, kmem_cache_t * cachep, unsigned long flags)
1243 {
1244 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
1245 	    SLAB_CTOR_CONSTRUCTOR)
1246 	{
1247 		struct buffer_head * bh = (struct buffer_head *) foo;
1248 
1249 		memset(bh, 0, sizeof(*bh));
1250 		init_waitqueue_head(&bh->b_wait);
1251 	}
1252 }
1253 
1254 /* SLAB cache for __getname() consumers */
1255 kmem_cache_t *names_cachep;
1256 
1257 /* SLAB cache for file structures */
1258 kmem_cache_t *filp_cachep;
1259 
1260 /* SLAB cache for dquot structures */
1261 kmem_cache_t *dquot_cachep;
1262 
1263 /* SLAB cache for buffer_head structures */
1264 kmem_cache_t *bh_cachep;
1265 EXPORT_SYMBOL(bh_cachep);
1266 
1267 extern void bdev_cache_init(void);
1268 extern void cdev_cache_init(void);
1269 extern void iobuf_cache_init(void);
1270 
vfs_caches_init(unsigned long mempages)1271 void __init vfs_caches_init(unsigned long mempages)
1272 {
1273 	bh_cachep = kmem_cache_create("buffer_head",
1274 			sizeof(struct buffer_head), 0,
1275 			SLAB_HWCACHE_ALIGN, init_buffer_head, NULL);
1276 	if(!bh_cachep)
1277 		panic("Cannot create buffer head SLAB cache");
1278 
1279 	names_cachep = kmem_cache_create("names_cache",
1280 			PATH_MAX, 0,
1281 			SLAB_HWCACHE_ALIGN, NULL, NULL);
1282 	if (!names_cachep)
1283 		panic("Cannot create names SLAB cache");
1284 
1285 	filp_cachep = kmem_cache_create("filp",
1286 			sizeof(struct file), 0,
1287 			SLAB_HWCACHE_ALIGN, NULL, NULL);
1288 	if(!filp_cachep)
1289 		panic("Cannot create filp SLAB cache");
1290 
1291 #if defined (CONFIG_QUOTA)
1292 	dquot_cachep = kmem_cache_create("dquot",
1293 			sizeof(struct dquot), sizeof(unsigned long) * 4,
1294 			SLAB_HWCACHE_ALIGN, NULL, NULL);
1295 	if (!dquot_cachep)
1296 		panic("Cannot create dquot SLAB cache");
1297 #endif
1298 
1299 	dcache_init(mempages);
1300 	inode_init(mempages);
1301 	files_init(mempages);
1302 	mnt_init(mempages);
1303 	bdev_cache_init();
1304 	cdev_cache_init();
1305 	iobuf_cache_init();
1306 }
1307