1 /*
2  * page.c - buffer/page management specific to NILFS
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Ryusuke Konishi <ryusuke@osrg.net>,
21  *            Seiji Kihara <kihara@osrg.net>.
22  */
23 
24 #include <linux/pagemap.h>
25 #include <linux/writeback.h>
26 #include <linux/swap.h>
27 #include <linux/bitops.h>
28 #include <linux/page-flags.h>
29 #include <linux/list.h>
30 #include <linux/highmem.h>
31 #include <linux/pagevec.h>
32 #include <linux/gfp.h>
33 #include "nilfs.h"
34 #include "page.h"
35 #include "mdt.h"
36 
37 
38 #define NILFS_BUFFER_INHERENT_BITS  \
39 	((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
40 	 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked))
41 
42 static struct buffer_head *
__nilfs_get_page_block(struct page * page,unsigned long block,pgoff_t index,int blkbits,unsigned long b_state)43 __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
44 		       int blkbits, unsigned long b_state)
45 
46 {
47 	unsigned long first_block;
48 	struct buffer_head *bh;
49 
50 	if (!page_has_buffers(page))
51 		create_empty_buffers(page, 1 << blkbits, b_state);
52 
53 	first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
54 	bh = nilfs_page_get_nth_block(page, block - first_block);
55 
56 	touch_buffer(bh);
57 	wait_on_buffer(bh);
58 	return bh;
59 }
60 
nilfs_grab_buffer(struct inode * inode,struct address_space * mapping,unsigned long blkoff,unsigned long b_state)61 struct buffer_head *nilfs_grab_buffer(struct inode *inode,
62 				      struct address_space *mapping,
63 				      unsigned long blkoff,
64 				      unsigned long b_state)
65 {
66 	int blkbits = inode->i_blkbits;
67 	pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
68 	struct page *page;
69 	struct buffer_head *bh;
70 
71 	page = grab_cache_page(mapping, index);
72 	if (unlikely(!page))
73 		return NULL;
74 
75 	bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
76 	if (unlikely(!bh)) {
77 		unlock_page(page);
78 		page_cache_release(page);
79 		return NULL;
80 	}
81 	return bh;
82 }
83 
84 /**
85  * nilfs_forget_buffer - discard dirty state
86  * @inode: owner inode of the buffer
87  * @bh: buffer head of the buffer to be discarded
88  */
nilfs_forget_buffer(struct buffer_head * bh)89 void nilfs_forget_buffer(struct buffer_head *bh)
90 {
91 	struct page *page = bh->b_page;
92 
93 	lock_buffer(bh);
94 	clear_buffer_nilfs_volatile(bh);
95 	clear_buffer_nilfs_checked(bh);
96 	clear_buffer_nilfs_redirected(bh);
97 	clear_buffer_async_write(bh);
98 	clear_buffer_dirty(bh);
99 	if (nilfs_page_buffers_clean(page))
100 		__nilfs_clear_page_dirty(page);
101 
102 	clear_buffer_uptodate(bh);
103 	clear_buffer_mapped(bh);
104 	bh->b_blocknr = -1;
105 	ClearPageUptodate(page);
106 	ClearPageMappedToDisk(page);
107 	unlock_buffer(bh);
108 	brelse(bh);
109 }
110 
111 /**
112  * nilfs_copy_buffer -- copy buffer data and flags
113  * @dbh: destination buffer
114  * @sbh: source buffer
115  */
nilfs_copy_buffer(struct buffer_head * dbh,struct buffer_head * sbh)116 void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
117 {
118 	void *kaddr0, *kaddr1;
119 	unsigned long bits;
120 	struct page *spage = sbh->b_page, *dpage = dbh->b_page;
121 	struct buffer_head *bh;
122 
123 	kaddr0 = kmap_atomic(spage);
124 	kaddr1 = kmap_atomic(dpage);
125 	memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
126 	kunmap_atomic(kaddr1);
127 	kunmap_atomic(kaddr0);
128 
129 	dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
130 	dbh->b_blocknr = sbh->b_blocknr;
131 	dbh->b_bdev = sbh->b_bdev;
132 
133 	bh = dbh;
134 	bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped));
135 	while ((bh = bh->b_this_page) != dbh) {
136 		lock_buffer(bh);
137 		bits &= bh->b_state;
138 		unlock_buffer(bh);
139 	}
140 	if (bits & (1UL << BH_Uptodate))
141 		SetPageUptodate(dpage);
142 	else
143 		ClearPageUptodate(dpage);
144 	if (bits & (1UL << BH_Mapped))
145 		SetPageMappedToDisk(dpage);
146 	else
147 		ClearPageMappedToDisk(dpage);
148 }
149 
150 /**
151  * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
152  * @page: page to be checked
153  *
154  * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
155  * Otherwise, it returns non-zero value.
156  */
nilfs_page_buffers_clean(struct page * page)157 int nilfs_page_buffers_clean(struct page *page)
158 {
159 	struct buffer_head *bh, *head;
160 
161 	bh = head = page_buffers(page);
162 	do {
163 		if (buffer_dirty(bh))
164 			return 0;
165 		bh = bh->b_this_page;
166 	} while (bh != head);
167 	return 1;
168 }
169 
nilfs_page_bug(struct page * page)170 void nilfs_page_bug(struct page *page)
171 {
172 	struct address_space *m;
173 	unsigned long ino;
174 
175 	if (unlikely(!page)) {
176 		printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
177 		return;
178 	}
179 
180 	m = page->mapping;
181 	ino = m ? m->host->i_ino : 0;
182 
183 	printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
184 	       "mapping=%p ino=%lu\n",
185 	       page, atomic_read(&page->_count),
186 	       (unsigned long long)page->index, page->flags, m, ino);
187 
188 	if (page_has_buffers(page)) {
189 		struct buffer_head *bh, *head;
190 		int i = 0;
191 
192 		bh = head = page_buffers(page);
193 		do {
194 			printk(KERN_CRIT
195 			       " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
196 			       i++, bh, atomic_read(&bh->b_count),
197 			       (unsigned long long)bh->b_blocknr, bh->b_state);
198 			bh = bh->b_this_page;
199 		} while (bh != head);
200 	}
201 }
202 
203 /**
204  * nilfs_copy_page -- copy the page with buffers
205  * @dst: destination page
206  * @src: source page
207  * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
208  *
209  * This function is for both data pages and btnode pages.  The dirty flag
210  * should be treated by caller.  The page must not be under i/o.
211  * Both src and dst page must be locked
212  */
nilfs_copy_page(struct page * dst,struct page * src,int copy_dirty)213 static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
214 {
215 	struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
216 	unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
217 
218 	BUG_ON(PageWriteback(dst));
219 
220 	sbh = sbufs = page_buffers(src);
221 	if (!page_has_buffers(dst))
222 		create_empty_buffers(dst, sbh->b_size, 0);
223 
224 	if (copy_dirty)
225 		mask |= (1UL << BH_Dirty);
226 
227 	dbh = dbufs = page_buffers(dst);
228 	do {
229 		lock_buffer(sbh);
230 		lock_buffer(dbh);
231 		dbh->b_state = sbh->b_state & mask;
232 		dbh->b_blocknr = sbh->b_blocknr;
233 		dbh->b_bdev = sbh->b_bdev;
234 		sbh = sbh->b_this_page;
235 		dbh = dbh->b_this_page;
236 	} while (dbh != dbufs);
237 
238 	copy_highpage(dst, src);
239 
240 	if (PageUptodate(src) && !PageUptodate(dst))
241 		SetPageUptodate(dst);
242 	else if (!PageUptodate(src) && PageUptodate(dst))
243 		ClearPageUptodate(dst);
244 	if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
245 		SetPageMappedToDisk(dst);
246 	else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
247 		ClearPageMappedToDisk(dst);
248 
249 	do {
250 		unlock_buffer(sbh);
251 		unlock_buffer(dbh);
252 		sbh = sbh->b_this_page;
253 		dbh = dbh->b_this_page;
254 	} while (dbh != dbufs);
255 }
256 
nilfs_copy_dirty_pages(struct address_space * dmap,struct address_space * smap)257 int nilfs_copy_dirty_pages(struct address_space *dmap,
258 			   struct address_space *smap)
259 {
260 	struct pagevec pvec;
261 	unsigned int i;
262 	pgoff_t index = 0;
263 	int err = 0;
264 
265 	pagevec_init(&pvec, 0);
266 repeat:
267 	if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
268 				PAGEVEC_SIZE))
269 		return 0;
270 
271 	for (i = 0; i < pagevec_count(&pvec); i++) {
272 		struct page *page = pvec.pages[i], *dpage;
273 
274 		lock_page(page);
275 		if (unlikely(!PageDirty(page)))
276 			NILFS_PAGE_BUG(page, "inconsistent dirty state");
277 
278 		dpage = grab_cache_page(dmap, page->index);
279 		if (unlikely(!dpage)) {
280 			/* No empty page is added to the page cache */
281 			err = -ENOMEM;
282 			unlock_page(page);
283 			break;
284 		}
285 		if (unlikely(!page_has_buffers(page)))
286 			NILFS_PAGE_BUG(page,
287 				       "found empty page in dat page cache");
288 
289 		nilfs_copy_page(dpage, page, 1);
290 		__set_page_dirty_nobuffers(dpage);
291 
292 		unlock_page(dpage);
293 		page_cache_release(dpage);
294 		unlock_page(page);
295 	}
296 	pagevec_release(&pvec);
297 	cond_resched();
298 
299 	if (likely(!err))
300 		goto repeat;
301 	return err;
302 }
303 
304 /**
305  * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache
306  * @dmap: destination page cache
307  * @smap: source page cache
308  *
309  * No pages must no be added to the cache during this process.
310  * This must be ensured by the caller.
311  */
nilfs_copy_back_pages(struct address_space * dmap,struct address_space * smap)312 void nilfs_copy_back_pages(struct address_space *dmap,
313 			   struct address_space *smap)
314 {
315 	struct pagevec pvec;
316 	unsigned int i, n;
317 	pgoff_t index = 0;
318 	int err;
319 
320 	pagevec_init(&pvec, 0);
321 repeat:
322 	n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE);
323 	if (!n)
324 		return;
325 	index = pvec.pages[n - 1]->index + 1;
326 
327 	for (i = 0; i < pagevec_count(&pvec); i++) {
328 		struct page *page = pvec.pages[i], *dpage;
329 		pgoff_t offset = page->index;
330 
331 		lock_page(page);
332 		dpage = find_lock_page(dmap, offset);
333 		if (dpage) {
334 			/* override existing page on the destination cache */
335 			WARN_ON(PageDirty(dpage));
336 			nilfs_copy_page(dpage, page, 0);
337 			unlock_page(dpage);
338 			page_cache_release(dpage);
339 		} else {
340 			struct page *page2;
341 
342 			/* move the page to the destination cache */
343 			spin_lock_irq(&smap->tree_lock);
344 			page2 = radix_tree_delete(&smap->page_tree, offset);
345 			WARN_ON(page2 != page);
346 
347 			smap->nrpages--;
348 			spin_unlock_irq(&smap->tree_lock);
349 
350 			spin_lock_irq(&dmap->tree_lock);
351 			err = radix_tree_insert(&dmap->page_tree, offset, page);
352 			if (unlikely(err < 0)) {
353 				WARN_ON(err == -EEXIST);
354 				page->mapping = NULL;
355 				page_cache_release(page); /* for cache */
356 			} else {
357 				page->mapping = dmap;
358 				dmap->nrpages++;
359 				if (PageDirty(page))
360 					radix_tree_tag_set(&dmap->page_tree,
361 							   offset,
362 							   PAGECACHE_TAG_DIRTY);
363 			}
364 			spin_unlock_irq(&dmap->tree_lock);
365 		}
366 		unlock_page(page);
367 	}
368 	pagevec_release(&pvec);
369 	cond_resched();
370 
371 	goto repeat;
372 }
373 
nilfs_clear_dirty_pages(struct address_space * mapping)374 void nilfs_clear_dirty_pages(struct address_space *mapping)
375 {
376 	struct pagevec pvec;
377 	unsigned int i;
378 	pgoff_t index = 0;
379 
380 	pagevec_init(&pvec, 0);
381 
382 	while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
383 				  PAGEVEC_SIZE)) {
384 		for (i = 0; i < pagevec_count(&pvec); i++) {
385 			struct page *page = pvec.pages[i];
386 			struct buffer_head *bh, *head;
387 
388 			lock_page(page);
389 			ClearPageUptodate(page);
390 			ClearPageMappedToDisk(page);
391 			bh = head = page_buffers(page);
392 			do {
393 				lock_buffer(bh);
394 				clear_buffer_async_write(bh);
395 				clear_buffer_dirty(bh);
396 				clear_buffer_nilfs_volatile(bh);
397 				clear_buffer_nilfs_checked(bh);
398 				clear_buffer_nilfs_redirected(bh);
399 				clear_buffer_uptodate(bh);
400 				clear_buffer_mapped(bh);
401 				unlock_buffer(bh);
402 				bh = bh->b_this_page;
403 			} while (bh != head);
404 
405 			__nilfs_clear_page_dirty(page);
406 			unlock_page(page);
407 		}
408 		pagevec_release(&pvec);
409 		cond_resched();
410 	}
411 }
412 
nilfs_page_count_clean_buffers(struct page * page,unsigned from,unsigned to)413 unsigned nilfs_page_count_clean_buffers(struct page *page,
414 					unsigned from, unsigned to)
415 {
416 	unsigned block_start, block_end;
417 	struct buffer_head *bh, *head;
418 	unsigned nc = 0;
419 
420 	for (bh = head = page_buffers(page), block_start = 0;
421 	     bh != head || !block_start;
422 	     block_start = block_end, bh = bh->b_this_page) {
423 		block_end = block_start + bh->b_size;
424 		if (block_end > from && block_start < to && !buffer_dirty(bh))
425 			nc++;
426 	}
427 	return nc;
428 }
429 
nilfs_mapping_init(struct address_space * mapping,struct inode * inode,struct backing_dev_info * bdi)430 void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
431 			struct backing_dev_info *bdi)
432 {
433 	mapping->host = inode;
434 	mapping->flags = 0;
435 	mapping_set_gfp_mask(mapping, GFP_NOFS);
436 	mapping->assoc_mapping = NULL;
437 	mapping->backing_dev_info = bdi;
438 	mapping->a_ops = &empty_aops;
439 }
440 
441 /*
442  * NILFS2 needs clear_page_dirty() in the following two cases:
443  *
444  * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
445  *    page dirty flags when it copies back pages from the shadow cache
446  *    (gcdat->{i_mapping,i_btnode_cache}) to its original cache
447  *    (dat->{i_mapping,i_btnode_cache}).
448  *
449  * 2) Some B-tree operations like insertion or deletion may dispose buffers
450  *    in dirty state, and this needs to cancel the dirty state of their pages.
451  */
__nilfs_clear_page_dirty(struct page * page)452 int __nilfs_clear_page_dirty(struct page *page)
453 {
454 	struct address_space *mapping = page->mapping;
455 
456 	if (mapping) {
457 		spin_lock_irq(&mapping->tree_lock);
458 		if (test_bit(PG_dirty, &page->flags)) {
459 			radix_tree_tag_clear(&mapping->page_tree,
460 					     page_index(page),
461 					     PAGECACHE_TAG_DIRTY);
462 			spin_unlock_irq(&mapping->tree_lock);
463 			return clear_page_dirty_for_io(page);
464 		}
465 		spin_unlock_irq(&mapping->tree_lock);
466 		return 0;
467 	}
468 	return TestClearPageDirty(page);
469 }
470 
471 /**
472  * nilfs_find_uncommitted_extent - find extent of uncommitted data
473  * @inode: inode
474  * @start_blk: start block offset (in)
475  * @blkoff: start offset of the found extent (out)
476  *
477  * This function searches an extent of buffers marked "delayed" which
478  * starts from a block offset equal to or larger than @start_blk.  If
479  * such an extent was found, this will store the start offset in
480  * @blkoff and return its length in blocks.  Otherwise, zero is
481  * returned.
482  */
nilfs_find_uncommitted_extent(struct inode * inode,sector_t start_blk,sector_t * blkoff)483 unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
484 					    sector_t start_blk,
485 					    sector_t *blkoff)
486 {
487 	unsigned int i;
488 	pgoff_t index;
489 	unsigned int nblocks_in_page;
490 	unsigned long length = 0;
491 	sector_t b;
492 	struct pagevec pvec;
493 	struct page *page;
494 
495 	if (inode->i_mapping->nrpages == 0)
496 		return 0;
497 
498 	index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
499 	nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
500 
501 	pagevec_init(&pvec, 0);
502 
503 repeat:
504 	pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
505 					pvec.pages);
506 	if (pvec.nr == 0)
507 		return length;
508 
509 	if (length > 0 && pvec.pages[0]->index > index)
510 		goto out;
511 
512 	b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
513 	i = 0;
514 	do {
515 		page = pvec.pages[i];
516 
517 		lock_page(page);
518 		if (page_has_buffers(page)) {
519 			struct buffer_head *bh, *head;
520 
521 			bh = head = page_buffers(page);
522 			do {
523 				if (b < start_blk)
524 					continue;
525 				if (buffer_delay(bh)) {
526 					if (length == 0)
527 						*blkoff = b;
528 					length++;
529 				} else if (length > 0) {
530 					goto out_locked;
531 				}
532 			} while (++b, bh = bh->b_this_page, bh != head);
533 		} else {
534 			if (length > 0)
535 				goto out_locked;
536 
537 			b += nblocks_in_page;
538 		}
539 		unlock_page(page);
540 
541 	} while (++i < pagevec_count(&pvec));
542 
543 	index = page->index + 1;
544 	pagevec_release(&pvec);
545 	cond_resched();
546 	goto repeat;
547 
548 out_locked:
549 	unlock_page(page);
550 out:
551 	pagevec_release(&pvec);
552 	return length;
553 }
554