1 /*
2  *   Copyright (C) International Business Machines Corp., 2000-2004
3  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
4  *
5  *   This program is free software;  you can redistribute it and/or modify
6  *   it under the terms of the GNU General Public License as published by
7  *   the Free Software Foundation; either version 2 of the License, or
8  *   (at your option) any later version.
9  *
10  *   This program is distributed in the hope that it will be useful,
11  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
13  *   the GNU General Public License for more details.
14  *
15  *   You should have received a copy of the GNU General Public License
16  *   along with this program;  if not, write to the Free Software
17  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */
19 
20 #include <linux/fs.h>
21 #include <linux/init.h>
22 #include "jfs_incore.h"
23 #include "jfs_superblock.h"
24 #include "jfs_filsys.h"
25 #include "jfs_metapage.h"
26 #include "jfs_txnmgr.h"
27 #include "jfs_debug.h"
28 
29 extern struct task_struct *jfsCommitTask;
30 static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
31 static wait_queue_head_t meta_wait;
32 
33 #ifdef CONFIG_JFS_STATISTICS
34 struct {
35 	uint	pagealloc;	/* # of page allocations */
36 	uint	pagefree;	/* # of page frees */
37 	uint	lockwait;	/* # of sleeping lock_metapage() calls */
38 	uint	allocwait;	/* # of sleeping alloc_metapage() calls */
39 } mpStat;
40 #endif
41 
42 
43 #define HASH_BITS 10		/* This makes hash_table 1 4K page */
44 #define HASH_SIZE (1 << HASH_BITS)
45 static struct metapage **hash_table = NULL;
46 static unsigned long hash_order;
47 
48 
metapage_locked(struct metapage * mp)49 static inline int metapage_locked(struct metapage *mp)
50 {
51 	return test_bit(META_locked, &mp->flag);
52 }
53 
trylock_metapage(struct metapage * mp)54 static inline int trylock_metapage(struct metapage *mp)
55 {
56 	return test_and_set_bit(META_locked, &mp->flag);
57 }
58 
unlock_metapage(struct metapage * mp)59 static inline void unlock_metapage(struct metapage *mp)
60 {
61 	clear_bit(META_locked, &mp->flag);
62 	wake_up(&mp->wait);
63 }
64 
__lock_metapage(struct metapage * mp)65 static void __lock_metapage(struct metapage *mp)
66 {
67 	DECLARE_WAITQUEUE(wait, current);
68 
69 	INCREMENT(mpStat.lockwait);
70 
71 	add_wait_queue_exclusive(&mp->wait, &wait);
72 	do {
73 		set_current_state(TASK_UNINTERRUPTIBLE);
74 		if (metapage_locked(mp)) {
75 			spin_unlock(&meta_lock);
76 			schedule();
77 			spin_lock(&meta_lock);
78 		}
79 	} while (trylock_metapage(mp));
80 	__set_current_state(TASK_RUNNING);
81 	remove_wait_queue(&mp->wait, &wait);
82 }
83 
84 /* needs meta_lock */
lock_metapage(struct metapage * mp)85 static inline void lock_metapage(struct metapage *mp)
86 {
87 	if (trylock_metapage(mp))
88 		__lock_metapage(mp);
89 }
90 
91 /*
92  * metapage pool is based on Linux 2.5's mempool
93  *
94  * Tap into reserved structures in critical paths where waiting on a
95  * memory allocation could cause deadlock
96  */
97 #define METAPOOL_MIN_PAGES 32
98 static struct metapage *reserved_metapages[METAPOOL_MIN_PAGES];
99 static int num_reserved = 0;
100 kmem_cache_t *metapage_cache;
101 
init_once(void * foo,kmem_cache_t * cachep,unsigned long flags)102 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
103 {
104 	struct metapage *mp = (struct metapage *)foo;
105 
106 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
107 	    SLAB_CTOR_CONSTRUCTOR) {
108 		mp->lid = 0;
109 		mp->lsn = 0;
110 		mp->flag = 0;
111 		mp->data = NULL;
112 		mp->clsn = 0;
113 		mp->log = NULL;
114 		set_bit(META_free, &mp->flag);
115 		init_waitqueue_head(&mp->wait);
116 	}
117 }
118 
empty_reserved(void)119 static void empty_reserved(void)
120 {
121 	while (num_reserved--)
122 		kmem_cache_free(metapage_cache,
123 				reserved_metapages[num_reserved]);
124 }
125 
alloc_metapage(int * dropped_lock,int no_wait)126 static struct metapage *alloc_metapage(int *dropped_lock, int no_wait)
127 {
128 	struct metapage *new;
129 
130 	*dropped_lock = 0;
131 
132 	/*
133 	 * Always try an atomic alloc first, to avoid dropping the
134 	 * spinlock
135 	 */
136 	new = kmem_cache_alloc(metapage_cache, GFP_ATOMIC);
137 	if (new)
138 		return new;
139 
140 	if (no_wait && num_reserved)
141 		return reserved_metapages[--num_reserved];
142 
143 	*dropped_lock = 1;
144 	spin_unlock(&meta_lock);
145 	new = kmem_cache_alloc(metapage_cache, GFP_NOFS);
146 	spin_lock(&meta_lock);
147 	return new;
148 }
149 
__free_metapage(struct metapage * mp)150 static void __free_metapage(struct metapage *mp)
151 {
152 	mp->flag = 0;
153 	set_bit(META_free, &mp->flag);
154 
155 	if (num_reserved < METAPOOL_MIN_PAGES)
156 		reserved_metapages[num_reserved++] = mp;
157 	else
158 		kmem_cache_free(metapage_cache, mp);
159 }
160 
free_metapage(struct metapage * mp)161 static inline void free_metapage(struct metapage * mp)
162 {
163 	spin_lock(&meta_lock);
164 	__free_metapage(mp);
165 	spin_unlock(&meta_lock);
166 }
167 
metapage_init(void)168 int __init metapage_init(void)
169 {
170 	struct metapage *mp;
171 
172 	/*
173 	 * Initialize wait queue
174 	 */
175 	init_waitqueue_head(&meta_wait);
176 
177 	/*
178 	 * Allocate the metapage structures
179 	 */
180 	metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
181 					   0, 0, init_once, NULL);
182 	if (metapage_cache == NULL)
183 		return -ENOMEM;
184 
185 	while (num_reserved < METAPOOL_MIN_PAGES) {
186 		mp = kmem_cache_alloc(metapage_cache, GFP_NOFS);
187 		if (mp)
188 			reserved_metapages[num_reserved++] = mp;
189 		else {
190 			empty_reserved();
191 			kmem_cache_destroy(metapage_cache);
192 			return -ENOMEM;
193 		}
194 	}
195 	/*
196 	 * Now the hash list
197 	 */
198 	for (hash_order = 0;
199 	     ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
200 	     hash_order++);
201 	hash_table =
202 	    (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
203 	assert(hash_table);
204 	memset(hash_table, 0, PAGE_SIZE << hash_order);
205 
206 	return 0;
207 }
208 
metapage_exit(void)209 void metapage_exit(void)
210 {
211 	empty_reserved();
212 	kmem_cache_destroy(metapage_cache);
213 }
214 
215 /*
216  * Basically same hash as in pagemap.h, but using our hash table
217  */
meta_hash(struct address_space * mapping,unsigned long index)218 static struct metapage **meta_hash(struct address_space *mapping,
219 				   unsigned long index)
220 {
221 #define i (((unsigned long)mapping)/ \
222 	   (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
223 #define s(x) ((x) + ((x) >> HASH_BITS))
224 	return hash_table + (s(i + index) & (HASH_SIZE - 1));
225 #undef i
226 #undef s
227 }
228 
search_hash(struct metapage ** hash_ptr,struct address_space * mapping,unsigned long index)229 static struct metapage *search_hash(struct metapage ** hash_ptr,
230 				    struct address_space *mapping,
231 			       unsigned long index)
232 {
233 	struct metapage *ptr;
234 
235 	for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
236 		if ((ptr->mapping == mapping) && (ptr->index == index))
237 			return ptr;
238 	}
239 
240 	return NULL;
241 }
242 
add_to_hash(struct metapage * mp,struct metapage ** hash_ptr)243 static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
244 {
245 	if (*hash_ptr)
246 		(*hash_ptr)->hash_prev = mp;
247 
248 	mp->hash_prev = NULL;
249 	mp->hash_next = *hash_ptr;
250 	*hash_ptr = mp;
251 }
252 
remove_from_hash(struct metapage * mp,struct metapage ** hash_ptr)253 static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
254 {
255 	if (mp->hash_prev)
256 		mp->hash_prev->hash_next = mp->hash_next;
257 	else {
258 		assert(*hash_ptr == mp);
259 		*hash_ptr = mp->hash_next;
260 	}
261 
262 	if (mp->hash_next)
263 		mp->hash_next->hash_prev = mp->hash_prev;
264 }
265 
__get_metapage(struct inode * inode,unsigned long lblock,unsigned int size,int absolute,unsigned long new)266 struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
267 				unsigned int size, int absolute,
268 				unsigned long new)
269 {
270 	int dropped_lock;
271 	struct metapage **hash_ptr;
272 	int l2BlocksPerPage;
273 	int l2bsize;
274 	int no_wait;
275 	struct address_space *mapping;
276 	struct metapage *mp;
277 	unsigned long page_index;
278 	unsigned long page_offset;
279 
280 	jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock);
281 
282 	if (absolute)
283 		mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
284 	else {
285 		/*
286 		 * If an nfs client tries to read an inode that is larger
287 		 * than any existing inodes, we may try to read past the
288 		 * end of the inode map
289 		 */
290 		if ((lblock << inode->i_blkbits) >= inode->i_size)
291 			return NULL;
292 		mapping = inode->i_mapping;
293 	}
294 
295 	hash_ptr = meta_hash(mapping, lblock);
296 again:
297 	spin_lock(&meta_lock);
298 	mp = search_hash(hash_ptr, mapping, lblock);
299 	if (mp) {
300 	      page_found:
301 		if (test_bit(META_stale, &mp->flag)) {
302 			spin_unlock(&meta_lock);
303 			yield();
304 			goto again;
305 		}
306 		mp->count++;
307 		lock_metapage(mp);
308 		spin_unlock(&meta_lock);
309 		if (test_bit(META_discard, &mp->flag)) {
310 			if (!new) {
311 				jfs_error(inode->i_sb,
312 					  "__get_metapage: using a "
313 					  "discarded metapage");
314 				release_metapage(mp);
315 				return NULL;
316 			}
317 			clear_bit(META_discard, &mp->flag);
318 		}
319 		jfs_info("__get_metapage: found 0x%p, in hash", mp);
320 		if (mp->logical_size != size) {
321 			jfs_error(inode->i_sb,
322 				  "__get_metapage: mp->logical_size != size");
323 			release_metapage(mp);
324 			return NULL;
325 		}
326 	} else {
327 		l2bsize = inode->i_blkbits;
328 		l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
329 		page_index = lblock >> l2BlocksPerPage;
330 		page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
331 		    l2bsize;
332 		if ((page_offset + size) > PAGE_CACHE_SIZE) {
333 			spin_unlock(&meta_lock);
334 			jfs_err("MetaData crosses page boundary!!");
335 			return NULL;
336 		}
337 
338 		/*
339 		 * Locks held on aggregate inode pages are usually
340 		 * not held long, and they are taken in critical code
341 		 * paths (committing dirty inodes, txCommit thread)
342 		 *
343 		 * Attempt to get metapage without blocking, tapping into
344 		 * reserves if necessary.
345 		 */
346 		if (JFS_IP(inode)->fileset == AGGREGATE_I)
347 			no_wait = 1;
348 		else
349 			no_wait = 0;
350 
351 		mp = alloc_metapage(&dropped_lock, no_wait);
352 		if (!mp) {
353 			spin_unlock(&meta_lock);
354 			return NULL;
355 		}
356 		if (dropped_lock) {
357 			/* alloc_metapage blocked, we need to search the hash
358 			 * again.
359 			 */
360 			struct metapage *mp2;
361 			mp2 = search_hash(hash_ptr, mapping, lblock);
362 			if (mp2) {
363 				__free_metapage(mp);
364 				mp = mp2;
365 				goto page_found;
366 			}
367 		}
368 		mp->flag = 0;
369 		lock_metapage(mp);
370 		if (absolute)
371 			set_bit(META_absolute, &mp->flag);
372 		mp->xflag = COMMIT_PAGE;
373 		mp->count = 1;
374 		atomic_set(&mp->nohomeok,0);
375 		mp->mapping = mapping;
376 		mp->index = lblock;
377 		mp->page = 0;
378 		mp->logical_size = size;
379 		add_to_hash(mp, hash_ptr);
380 		spin_unlock(&meta_lock);
381 
382 		if (new) {
383 			jfs_info("__get_metapage: Calling grab_cache_page");
384 			mp->page = grab_cache_page(mapping, page_index);
385 			if (!mp->page) {
386 				jfs_err("grab_cache_page failed!");
387 				goto freeit;
388 			} else {
389 				INCREMENT(mpStat.pagealloc);
390 				UnlockPage(mp->page);
391 			}
392 		} else {
393 			jfs_info("__get_metapage: Calling read_cache_page");
394 			mp->page = read_cache_page(mapping, lblock,
395 				    (filler_t *)mapping->a_ops->readpage, NULL);
396 			if (IS_ERR(mp->page)) {
397 				jfs_err("read_cache_page failed!");
398 				goto freeit;
399 			} else
400 				INCREMENT(mpStat.pagealloc);
401 		}
402 		mp->data = kmap(mp->page) + page_offset;
403 	}
404 
405 	if (new)
406 		memset(mp->data, 0, PSIZE);
407 
408 	jfs_info("__get_metapage: returning = 0x%p", mp);
409 	return mp;
410 
411 freeit:
412 	spin_lock(&meta_lock);
413 	remove_from_hash(mp, hash_ptr);
414 	__free_metapage(mp);
415 	spin_unlock(&meta_lock);
416 	return NULL;
417 }
418 
hold_metapage(struct metapage * mp,int force)419 void hold_metapage(struct metapage * mp, int force)
420 {
421 	spin_lock(&meta_lock);
422 
423 	mp->count++;
424 
425 	if (force) {
426 		ASSERT (!(test_bit(META_forced, &mp->flag)));
427 		if (trylock_metapage(mp))
428 			set_bit(META_forced, &mp->flag);
429 	} else
430 		lock_metapage(mp);
431 
432 	spin_unlock(&meta_lock);
433 }
434 
__write_metapage(struct metapage * mp)435 static void __write_metapage(struct metapage * mp)
436 {
437 	int l2bsize = mp->mapping->host->i_blkbits;
438 	int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
439 	unsigned long page_index;
440 	unsigned long page_offset;
441 	int rc;
442 
443 	jfs_info("__write_metapage: mp = 0x%p", mp);
444 
445 	if (test_bit(META_discard, &mp->flag)) {
446 		/*
447 		 * This metadata is no longer valid
448 		 */
449 		clear_bit(META_dirty, &mp->flag);
450 		return;
451 	}
452 
453 	page_index = mp->page->index;
454 	page_offset =
455 	    (mp->index - (page_index << l2BlocksPerPage)) << l2bsize;
456 
457 	lock_page(mp->page);
458 	rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
459 					       page_offset +
460 					       mp->logical_size);
461 	if (rc) {
462 		jfs_err("prepare_write return %d!", rc);
463 		ClearPageUptodate(mp->page);
464 		UnlockPage(mp->page);
465 		kunmap(mp->page);
466 		clear_bit(META_dirty, &mp->flag);
467 		return;
468 	}
469 	rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
470 					      page_offset +
471 					      mp->logical_size);
472 	if (rc) {
473 		jfs_err("commit_write returned %d", rc);
474 	}
475 
476 	UnlockPage(mp->page);
477 	clear_bit(META_dirty, &mp->flag);
478 
479 	jfs_info("__write_metapage done");
480 }
481 
sync_metapage(struct metapage * mp)482 static inline void sync_metapage(struct metapage *mp)
483 {
484 	struct page *page = mp->page;
485 
486 	page_cache_get(page);
487 	lock_page(page);
488 
489 	/* we're done with this page - no need to check for errors */
490 	if (page->buffers) {
491 		writeout_one_page(page);
492 		waitfor_one_page(page);
493 	}
494 
495 	UnlockPage(page);
496 	page_cache_release(page);
497 }
498 
release_metapage(struct metapage * mp)499 void release_metapage(struct metapage * mp)
500 {
501 	struct jfs_log *log;
502 
503 	jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
504 
505 	spin_lock(&meta_lock);
506 	if (test_bit(META_forced, &mp->flag)) {
507 		clear_bit(META_forced, &mp->flag);
508 		mp->count--;
509 		spin_unlock(&meta_lock);
510 		return;
511 	}
512 
513 	assert(mp->count);
514 	if (--mp->count || atomic_read(&mp->nohomeok)) {
515 		unlock_metapage(mp);
516 		spin_unlock(&meta_lock);
517 		return;
518 	}
519 
520 	if (mp->page) {
521 		set_bit(META_stale, &mp->flag);
522 		spin_unlock(&meta_lock);
523 		kunmap(mp->page);
524 		mp->data = 0;
525 		if (test_bit(META_dirty, &mp->flag))
526 			__write_metapage(mp);
527 		if (test_bit(META_sync, &mp->flag)) {
528 			sync_metapage(mp);
529 			clear_bit(META_sync, &mp->flag);
530 		}
531 
532 		if (test_bit(META_discard, &mp->flag)) {
533 			lock_page(mp->page);
534 			block_flushpage(mp->page, 0);
535 			UnlockPage(mp->page);
536 		}
537 
538 		page_cache_release(mp->page);
539 		mp->page = NULL;
540 		INCREMENT(mpStat.pagefree);
541 		spin_lock(&meta_lock);
542 	}
543 
544 	if (mp->lsn) {
545 		/*
546 		 * Remove metapage from logsynclist.
547 		 */
548 		log = mp->log;
549 		LOGSYNC_LOCK(log);
550 		mp->log = 0;
551 		mp->lsn = 0;
552 		mp->clsn = 0;
553 		log->count--;
554 		list_del(&mp->synclist);
555 		LOGSYNC_UNLOCK(log);
556 	}
557 	remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
558 	spin_unlock(&meta_lock);
559 
560 	free_metapage(mp);
561 }
562 
__invalidate_metapages(struct inode * ip,s64 addr,int len)563 void __invalidate_metapages(struct inode *ip, s64 addr, int len)
564 {
565 	struct metapage **hash_ptr;
566 	unsigned long lblock;
567 	int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
568 	/* All callers are interested in block device's mapping */
569 	struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
570 	struct metapage *mp;
571 	struct page *page;
572 
573 	/*
574 	 * First, mark metapages to discard.  They will eventually be
575 	 * released, but should not be written.
576 	 */
577 	for (lblock = addr; lblock < addr + len;
578 	     lblock += 1 << l2BlocksPerPage) {
579 		hash_ptr = meta_hash(mapping, lblock);
580 again:
581 		spin_lock(&meta_lock);
582 		mp = search_hash(hash_ptr, mapping, lblock);
583 		if (mp) {
584 			if (test_bit(META_stale, &mp->flag)) {
585 				spin_unlock(&meta_lock);
586 				yield();
587 				goto again;
588 			}
589 
590 			set_bit(META_discard, &mp->flag);
591 			spin_unlock(&meta_lock);
592 		} else {
593 			spin_unlock(&meta_lock);
594 			page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
595 			if (page) {
596 				block_flushpage(page, 0);
597 				UnlockPage(page);
598 				page_cache_release(page);
599 			}
600 		}
601 	}
602 }
603 
604 #ifdef CONFIG_JFS_STATISTICS
jfs_mpstat_read(char * buffer,char ** start,off_t offset,int length,int * eof,void * data)605 int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
606 		    int *eof, void *data)
607 {
608 	int len = 0;
609 	off_t begin;
610 
611 	len += sprintf(buffer,
612 		       "JFS Metapage statistics\n"
613 		       "=======================\n"
614 		       "page allocations = %d\n"
615 		       "page frees = %d\n"
616 		       "lock waits = %d\n"
617 		       "allocation waits = %d\n",
618 		       mpStat.pagealloc,
619 		       mpStat.pagefree,
620 		       mpStat.lockwait,
621 		       mpStat.allocwait);
622 
623 	begin = offset;
624 	*start = buffer + begin;
625 	len -= begin;
626 
627 	if (len > length)
628 		len = length;
629 	else
630 		*eof = 1;
631 
632 	if (len < 0)
633 		len = 0;
634 
635 	return len;
636 }
637 #endif
638