/* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include #include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" #include "jfs_metapage.h" #include "jfs_txnmgr.h" #include "jfs_debug.h" extern struct task_struct *jfsCommitTask; static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED; static wait_queue_head_t meta_wait; #ifdef CONFIG_JFS_STATISTICS struct { uint pagealloc; /* # of page allocations */ uint pagefree; /* # of page frees */ uint lockwait; /* # of sleeping lock_metapage() calls */ uint allocwait; /* # of sleeping alloc_metapage() calls */ } mpStat; #endif #define HASH_BITS 10 /* This makes hash_table 1 4K page */ #define HASH_SIZE (1 << HASH_BITS) static struct metapage **hash_table = NULL; static unsigned long hash_order; static inline int metapage_locked(struct metapage *mp) { return test_bit(META_locked, &mp->flag); } static inline int trylock_metapage(struct metapage *mp) { return test_and_set_bit(META_locked, &mp->flag); } static inline void unlock_metapage(struct metapage *mp) { clear_bit(META_locked, &mp->flag); wake_up(&mp->wait); } static void __lock_metapage(struct metapage *mp) { DECLARE_WAITQUEUE(wait, current); INCREMENT(mpStat.lockwait); add_wait_queue_exclusive(&mp->wait, &wait); do { set_current_state(TASK_UNINTERRUPTIBLE); if (metapage_locked(mp)) { spin_unlock(&meta_lock); schedule(); spin_lock(&meta_lock); } } while (trylock_metapage(mp)); __set_current_state(TASK_RUNNING); remove_wait_queue(&mp->wait, &wait); } /* needs meta_lock */ static inline void lock_metapage(struct metapage *mp) { if (trylock_metapage(mp)) __lock_metapage(mp); } /* * metapage pool is based on Linux 2.5's mempool * * Tap into reserved structures in critical paths where waiting on a * memory allocation could cause deadlock */ #define METAPOOL_MIN_PAGES 32 static struct metapage *reserved_metapages[METAPOOL_MIN_PAGES]; static int num_reserved = 0; kmem_cache_t *metapage_cache; static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) { struct metapage *mp = (struct metapage *)foo; if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { mp->lid = 0; mp->lsn = 0; mp->flag = 0; mp->data = NULL; mp->clsn = 0; mp->log = NULL; set_bit(META_free, &mp->flag); init_waitqueue_head(&mp->wait); } } static void empty_reserved(void) { while (num_reserved--) kmem_cache_free(metapage_cache, reserved_metapages[num_reserved]); } static struct metapage *alloc_metapage(int *dropped_lock, int no_wait) { struct metapage *new; *dropped_lock = 0; /* * Always try an atomic alloc first, to avoid dropping the * spinlock */ new = kmem_cache_alloc(metapage_cache, GFP_ATOMIC); if (new) return new; if (no_wait && num_reserved) return reserved_metapages[--num_reserved]; *dropped_lock = 1; spin_unlock(&meta_lock); new = kmem_cache_alloc(metapage_cache, GFP_NOFS); spin_lock(&meta_lock); return new; } static void __free_metapage(struct metapage *mp) { mp->flag = 0; set_bit(META_free, &mp->flag); if (num_reserved < METAPOOL_MIN_PAGES) reserved_metapages[num_reserved++] = mp; else kmem_cache_free(metapage_cache, mp); } static inline void free_metapage(struct metapage * mp) { spin_lock(&meta_lock); __free_metapage(mp); spin_unlock(&meta_lock); } int __init metapage_init(void) { struct metapage *mp; /* * Initialize wait queue */ init_waitqueue_head(&meta_wait); /* * Allocate the metapage structures */ metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage), 0, 0, init_once, NULL); if (metapage_cache == NULL) return -ENOMEM; while (num_reserved < METAPOOL_MIN_PAGES) { mp = kmem_cache_alloc(metapage_cache, GFP_NOFS); if (mp) reserved_metapages[num_reserved++] = mp; else { empty_reserved(); kmem_cache_destroy(metapage_cache); return -ENOMEM; } } /* * Now the hash list */ for (hash_order = 0; ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE; hash_order++); hash_table = (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order); assert(hash_table); memset(hash_table, 0, PAGE_SIZE << hash_order); return 0; } void metapage_exit(void) { empty_reserved(); kmem_cache_destroy(metapage_cache); } /* * Basically same hash as in pagemap.h, but using our hash table */ static struct metapage **meta_hash(struct address_space *mapping, unsigned long index) { #define i (((unsigned long)mapping)/ \ (sizeof(struct inode) & ~(sizeof(struct inode) -1 ))) #define s(x) ((x) + ((x) >> HASH_BITS)) return hash_table + (s(i + index) & (HASH_SIZE - 1)); #undef i #undef s } static struct metapage *search_hash(struct metapage ** hash_ptr, struct address_space *mapping, unsigned long index) { struct metapage *ptr; for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) { if ((ptr->mapping == mapping) && (ptr->index == index)) return ptr; } return NULL; } static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr) { if (*hash_ptr) (*hash_ptr)->hash_prev = mp; mp->hash_prev = NULL; mp->hash_next = *hash_ptr; *hash_ptr = mp; } static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr) { if (mp->hash_prev) mp->hash_prev->hash_next = mp->hash_next; else { assert(*hash_ptr == mp); *hash_ptr = mp->hash_next; } if (mp->hash_next) mp->hash_next->hash_prev = mp->hash_prev; } struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, unsigned int size, int absolute, unsigned long new) { int dropped_lock; struct metapage **hash_ptr; int l2BlocksPerPage; int l2bsize; int no_wait; struct address_space *mapping; struct metapage *mp; unsigned long page_index; unsigned long page_offset; jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock); if (absolute) mapping = inode->i_sb->s_bdev->bd_inode->i_mapping; else { /* * If an nfs client tries to read an inode that is larger * than any existing inodes, we may try to read past the * end of the inode map */ if ((lblock << inode->i_blkbits) >= inode->i_size) return NULL; mapping = inode->i_mapping; } hash_ptr = meta_hash(mapping, lblock); again: spin_lock(&meta_lock); mp = search_hash(hash_ptr, mapping, lblock); if (mp) { page_found: if (test_bit(META_stale, &mp->flag)) { spin_unlock(&meta_lock); yield(); goto again; } mp->count++; lock_metapage(mp); spin_unlock(&meta_lock); if (test_bit(META_discard, &mp->flag)) { if (!new) { jfs_error(inode->i_sb, "__get_metapage: using a " "discarded metapage"); release_metapage(mp); return NULL; } clear_bit(META_discard, &mp->flag); } jfs_info("__get_metapage: found 0x%p, in hash", mp); if (mp->logical_size != size) { jfs_error(inode->i_sb, "__get_metapage: mp->logical_size != size"); release_metapage(mp); return NULL; } } else { l2bsize = inode->i_blkbits; l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; page_index = lblock >> l2BlocksPerPage; page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize; if ((page_offset + size) > PAGE_CACHE_SIZE) { spin_unlock(&meta_lock); jfs_err("MetaData crosses page boundary!!"); return NULL; } /* * Locks held on aggregate inode pages are usually * not held long, and they are taken in critical code * paths (committing dirty inodes, txCommit thread) * * Attempt to get metapage without blocking, tapping into * reserves if necessary. */ if (JFS_IP(inode)->fileset == AGGREGATE_I) no_wait = 1; else no_wait = 0; mp = alloc_metapage(&dropped_lock, no_wait); if (!mp) { spin_unlock(&meta_lock); return NULL; } if (dropped_lock) { /* alloc_metapage blocked, we need to search the hash * again. */ struct metapage *mp2; mp2 = search_hash(hash_ptr, mapping, lblock); if (mp2) { __free_metapage(mp); mp = mp2; goto page_found; } } mp->flag = 0; lock_metapage(mp); if (absolute) set_bit(META_absolute, &mp->flag); mp->xflag = COMMIT_PAGE; mp->count = 1; atomic_set(&mp->nohomeok,0); mp->mapping = mapping; mp->index = lblock; mp->page = 0; mp->logical_size = size; add_to_hash(mp, hash_ptr); spin_unlock(&meta_lock); if (new) { jfs_info("__get_metapage: Calling grab_cache_page"); mp->page = grab_cache_page(mapping, page_index); if (!mp->page) { jfs_err("grab_cache_page failed!"); goto freeit; } else { INCREMENT(mpStat.pagealloc); UnlockPage(mp->page); } } else { jfs_info("__get_metapage: Calling read_cache_page"); mp->page = read_cache_page(mapping, lblock, (filler_t *)mapping->a_ops->readpage, NULL); if (IS_ERR(mp->page)) { jfs_err("read_cache_page failed!"); goto freeit; } else INCREMENT(mpStat.pagealloc); } mp->data = kmap(mp->page) + page_offset; } if (new) memset(mp->data, 0, PSIZE); jfs_info("__get_metapage: returning = 0x%p", mp); return mp; freeit: spin_lock(&meta_lock); remove_from_hash(mp, hash_ptr); __free_metapage(mp); spin_unlock(&meta_lock); return NULL; } void hold_metapage(struct metapage * mp, int force) { spin_lock(&meta_lock); mp->count++; if (force) { ASSERT (!(test_bit(META_forced, &mp->flag))); if (trylock_metapage(mp)) set_bit(META_forced, &mp->flag); } else lock_metapage(mp); spin_unlock(&meta_lock); } static void __write_metapage(struct metapage * mp) { int l2bsize = mp->mapping->host->i_blkbits; int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize; unsigned long page_index; unsigned long page_offset; int rc; jfs_info("__write_metapage: mp = 0x%p", mp); if (test_bit(META_discard, &mp->flag)) { /* * This metadata is no longer valid */ clear_bit(META_dirty, &mp->flag); return; } page_index = mp->page->index; page_offset = (mp->index - (page_index << l2BlocksPerPage)) << l2bsize; lock_page(mp->page); rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset, page_offset + mp->logical_size); if (rc) { jfs_err("prepare_write return %d!", rc); ClearPageUptodate(mp->page); UnlockPage(mp->page); kunmap(mp->page); clear_bit(META_dirty, &mp->flag); return; } rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset, page_offset + mp->logical_size); if (rc) { jfs_err("commit_write returned %d", rc); } UnlockPage(mp->page); clear_bit(META_dirty, &mp->flag); jfs_info("__write_metapage done"); } static inline void sync_metapage(struct metapage *mp) { struct page *page = mp->page; page_cache_get(page); lock_page(page); /* we're done with this page - no need to check for errors */ if (page->buffers) { writeout_one_page(page); waitfor_one_page(page); } UnlockPage(page); page_cache_release(page); } void release_metapage(struct metapage * mp) { struct jfs_log *log; jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag); spin_lock(&meta_lock); if (test_bit(META_forced, &mp->flag)) { clear_bit(META_forced, &mp->flag); mp->count--; spin_unlock(&meta_lock); return; } assert(mp->count); if (--mp->count || atomic_read(&mp->nohomeok)) { unlock_metapage(mp); spin_unlock(&meta_lock); return; } if (mp->page) { set_bit(META_stale, &mp->flag); spin_unlock(&meta_lock); kunmap(mp->page); mp->data = 0; if (test_bit(META_dirty, &mp->flag)) __write_metapage(mp); if (test_bit(META_sync, &mp->flag)) { sync_metapage(mp); clear_bit(META_sync, &mp->flag); } if (test_bit(META_discard, &mp->flag)) { lock_page(mp->page); block_flushpage(mp->page, 0); UnlockPage(mp->page); } page_cache_release(mp->page); mp->page = NULL; INCREMENT(mpStat.pagefree); spin_lock(&meta_lock); } if (mp->lsn) { /* * Remove metapage from logsynclist. */ log = mp->log; LOGSYNC_LOCK(log); mp->log = 0; mp->lsn = 0; mp->clsn = 0; log->count--; list_del(&mp->synclist); LOGSYNC_UNLOCK(log); } remove_from_hash(mp, meta_hash(mp->mapping, mp->index)); spin_unlock(&meta_lock); free_metapage(mp); } void __invalidate_metapages(struct inode *ip, s64 addr, int len) { struct metapage **hash_ptr; unsigned long lblock; int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits; /* All callers are interested in block device's mapping */ struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping; struct metapage *mp; struct page *page; /* * First, mark metapages to discard. They will eventually be * released, but should not be written. */ for (lblock = addr; lblock < addr + len; lblock += 1 << l2BlocksPerPage) { hash_ptr = meta_hash(mapping, lblock); again: spin_lock(&meta_lock); mp = search_hash(hash_ptr, mapping, lblock); if (mp) { if (test_bit(META_stale, &mp->flag)) { spin_unlock(&meta_lock); yield(); goto again; } set_bit(META_discard, &mp->flag); spin_unlock(&meta_lock); } else { spin_unlock(&meta_lock); page = find_lock_page(mapping, lblock>>l2BlocksPerPage); if (page) { block_flushpage(page, 0); UnlockPage(page); page_cache_release(page); } } } } #ifdef CONFIG_JFS_STATISTICS int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length, int *eof, void *data) { int len = 0; off_t begin; len += sprintf(buffer, "JFS Metapage statistics\n" "=======================\n" "page allocations = %d\n" "page frees = %d\n" "lock waits = %d\n" "allocation waits = %d\n", mpStat.pagealloc, mpStat.pagefree, mpStat.lockwait, mpStat.allocwait); begin = offset; *start = buffer + begin; len -= begin; if (len > length) len = length; else *eof = 1; if (len < 0) len = 0; return len; } #endif