1 /*
2  * page.c - buffer/page management specific to NILFS
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Ryusuke Konishi <ryusuke@osrg.net>,
21  *            Seiji Kihara <kihara@osrg.net>.
22  */
23 
24 #include <linux/pagemap.h>
25 #include <linux/writeback.h>
26 #include <linux/swap.h>
27 #include <linux/bitops.h>
28 #include <linux/page-flags.h>
29 #include <linux/list.h>
30 #include <linux/highmem.h>
31 #include <linux/pagevec.h>
32 #include <linux/gfp.h>
33 #include "nilfs.h"
34 #include "page.h"
35 #include "mdt.h"
36 
37 
38 #define NILFS_BUFFER_INHERENT_BITS  \
39 	((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
40 	 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \
41 	 (1UL << BH_NILFS_Checked))
42 
43 static struct buffer_head *
__nilfs_get_page_block(struct page * page,unsigned long block,pgoff_t index,int blkbits,unsigned long b_state)44 __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
45 		       int blkbits, unsigned long b_state)
46 
47 {
48 	unsigned long first_block;
49 	struct buffer_head *bh;
50 
51 	if (!page_has_buffers(page))
52 		create_empty_buffers(page, 1 << blkbits, b_state);
53 
54 	first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
55 	bh = nilfs_page_get_nth_block(page, block - first_block);
56 
57 	touch_buffer(bh);
58 	wait_on_buffer(bh);
59 	return bh;
60 }
61 
62 /*
63  * Since the page cache of B-tree node pages or data page cache of pseudo
64  * inodes does not have a valid mapping->host pointer, calling
65  * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
66  * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
67  * To avoid this problem, the old style mark_buffer_dirty() is used instead.
68  */
nilfs_mark_buffer_dirty(struct buffer_head * bh)69 void nilfs_mark_buffer_dirty(struct buffer_head *bh)
70 {
71 	if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
72 		__set_page_dirty_nobuffers(bh->b_page);
73 }
74 
nilfs_grab_buffer(struct inode * inode,struct address_space * mapping,unsigned long blkoff,unsigned long b_state)75 struct buffer_head *nilfs_grab_buffer(struct inode *inode,
76 				      struct address_space *mapping,
77 				      unsigned long blkoff,
78 				      unsigned long b_state)
79 {
80 	int blkbits = inode->i_blkbits;
81 	pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
82 	struct page *page;
83 	struct buffer_head *bh;
84 
85 	page = grab_cache_page(mapping, index);
86 	if (unlikely(!page))
87 		return NULL;
88 
89 	bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
90 	if (unlikely(!bh)) {
91 		unlock_page(page);
92 		page_cache_release(page);
93 		return NULL;
94 	}
95 	return bh;
96 }
97 
98 /**
99  * nilfs_forget_buffer - discard dirty state
100  * @inode: owner inode of the buffer
101  * @bh: buffer head of the buffer to be discarded
102  */
nilfs_forget_buffer(struct buffer_head * bh)103 void nilfs_forget_buffer(struct buffer_head *bh)
104 {
105 	struct page *page = bh->b_page;
106 
107 	lock_buffer(bh);
108 	clear_buffer_nilfs_volatile(bh);
109 	clear_buffer_nilfs_checked(bh);
110 	clear_buffer_nilfs_redirected(bh);
111 	clear_buffer_dirty(bh);
112 	if (nilfs_page_buffers_clean(page))
113 		__nilfs_clear_page_dirty(page);
114 
115 	clear_buffer_uptodate(bh);
116 	clear_buffer_mapped(bh);
117 	bh->b_blocknr = -1;
118 	ClearPageUptodate(page);
119 	ClearPageMappedToDisk(page);
120 	unlock_buffer(bh);
121 	brelse(bh);
122 }
123 
124 /**
125  * nilfs_copy_buffer -- copy buffer data and flags
126  * @dbh: destination buffer
127  * @sbh: source buffer
128  */
nilfs_copy_buffer(struct buffer_head * dbh,struct buffer_head * sbh)129 void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
130 {
131 	void *kaddr0, *kaddr1;
132 	unsigned long bits;
133 	struct page *spage = sbh->b_page, *dpage = dbh->b_page;
134 	struct buffer_head *bh;
135 
136 	kaddr0 = kmap_atomic(spage, KM_USER0);
137 	kaddr1 = kmap_atomic(dpage, KM_USER1);
138 	memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
139 	kunmap_atomic(kaddr1, KM_USER1);
140 	kunmap_atomic(kaddr0, KM_USER0);
141 
142 	dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
143 	dbh->b_blocknr = sbh->b_blocknr;
144 	dbh->b_bdev = sbh->b_bdev;
145 
146 	bh = dbh;
147 	bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped));
148 	while ((bh = bh->b_this_page) != dbh) {
149 		lock_buffer(bh);
150 		bits &= bh->b_state;
151 		unlock_buffer(bh);
152 	}
153 	if (bits & (1UL << BH_Uptodate))
154 		SetPageUptodate(dpage);
155 	else
156 		ClearPageUptodate(dpage);
157 	if (bits & (1UL << BH_Mapped))
158 		SetPageMappedToDisk(dpage);
159 	else
160 		ClearPageMappedToDisk(dpage);
161 }
162 
163 /**
164  * nilfs_page_buffers_clean - check if a page has dirty buffers or not.
165  * @page: page to be checked
166  *
167  * nilfs_page_buffers_clean() returns zero if the page has dirty buffers.
168  * Otherwise, it returns non-zero value.
169  */
nilfs_page_buffers_clean(struct page * page)170 int nilfs_page_buffers_clean(struct page *page)
171 {
172 	struct buffer_head *bh, *head;
173 
174 	bh = head = page_buffers(page);
175 	do {
176 		if (buffer_dirty(bh))
177 			return 0;
178 		bh = bh->b_this_page;
179 	} while (bh != head);
180 	return 1;
181 }
182 
nilfs_page_bug(struct page * page)183 void nilfs_page_bug(struct page *page)
184 {
185 	struct address_space *m;
186 	unsigned long ino = 0;
187 
188 	if (unlikely(!page)) {
189 		printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
190 		return;
191 	}
192 
193 	m = page->mapping;
194 	if (m) {
195 		struct inode *inode = NILFS_AS_I(m);
196 		if (inode != NULL)
197 			ino = inode->i_ino;
198 	}
199 	printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
200 	       "mapping=%p ino=%lu\n",
201 	       page, atomic_read(&page->_count),
202 	       (unsigned long long)page->index, page->flags, m, ino);
203 
204 	if (page_has_buffers(page)) {
205 		struct buffer_head *bh, *head;
206 		int i = 0;
207 
208 		bh = head = page_buffers(page);
209 		do {
210 			printk(KERN_CRIT
211 			       " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n",
212 			       i++, bh, atomic_read(&bh->b_count),
213 			       (unsigned long long)bh->b_blocknr, bh->b_state);
214 			bh = bh->b_this_page;
215 		} while (bh != head);
216 	}
217 }
218 
219 /**
220  * nilfs_alloc_private_page - allocate a private page with buffer heads
221  *
222  * Return Value: On success, a pointer to the allocated page is returned.
223  * On error, NULL is returned.
224  */
nilfs_alloc_private_page(struct block_device * bdev,int size,unsigned long state)225 struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
226 				      unsigned long state)
227 {
228 	struct buffer_head *bh, *head, *tail;
229 	struct page *page;
230 
231 	page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
232 	if (unlikely(!page))
233 		return NULL;
234 
235 	lock_page(page);
236 	head = alloc_page_buffers(page, size, 0);
237 	if (unlikely(!head)) {
238 		unlock_page(page);
239 		__free_page(page);
240 		return NULL;
241 	}
242 
243 	bh = head;
244 	do {
245 		bh->b_state = (1UL << BH_NILFS_Allocated) | state;
246 		tail = bh;
247 		bh->b_bdev = bdev;
248 		bh = bh->b_this_page;
249 	} while (bh);
250 
251 	tail->b_this_page = head;
252 	attach_page_buffers(page, head);
253 
254 	return page;
255 }
256 
nilfs_free_private_page(struct page * page)257 void nilfs_free_private_page(struct page *page)
258 {
259 	BUG_ON(!PageLocked(page));
260 	BUG_ON(page->mapping);
261 
262 	if (page_has_buffers(page) && !try_to_free_buffers(page))
263 		NILFS_PAGE_BUG(page, "failed to free page");
264 
265 	unlock_page(page);
266 	__free_page(page);
267 }
268 
269 /**
270  * nilfs_copy_page -- copy the page with buffers
271  * @dst: destination page
272  * @src: source page
273  * @copy_dirty: flag whether to copy dirty states on the page's buffer heads.
274  *
275  * This function is for both data pages and btnode pages.  The dirty flag
276  * should be treated by caller.  The page must not be under i/o.
277  * Both src and dst page must be locked
278  */
nilfs_copy_page(struct page * dst,struct page * src,int copy_dirty)279 static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty)
280 {
281 	struct buffer_head *dbh, *dbufs, *sbh, *sbufs;
282 	unsigned long mask = NILFS_BUFFER_INHERENT_BITS;
283 
284 	BUG_ON(PageWriteback(dst));
285 
286 	sbh = sbufs = page_buffers(src);
287 	if (!page_has_buffers(dst))
288 		create_empty_buffers(dst, sbh->b_size, 0);
289 
290 	if (copy_dirty)
291 		mask |= (1UL << BH_Dirty);
292 
293 	dbh = dbufs = page_buffers(dst);
294 	do {
295 		lock_buffer(sbh);
296 		lock_buffer(dbh);
297 		dbh->b_state = sbh->b_state & mask;
298 		dbh->b_blocknr = sbh->b_blocknr;
299 		dbh->b_bdev = sbh->b_bdev;
300 		sbh = sbh->b_this_page;
301 		dbh = dbh->b_this_page;
302 	} while (dbh != dbufs);
303 
304 	copy_highpage(dst, src);
305 
306 	if (PageUptodate(src) && !PageUptodate(dst))
307 		SetPageUptodate(dst);
308 	else if (!PageUptodate(src) && PageUptodate(dst))
309 		ClearPageUptodate(dst);
310 	if (PageMappedToDisk(src) && !PageMappedToDisk(dst))
311 		SetPageMappedToDisk(dst);
312 	else if (!PageMappedToDisk(src) && PageMappedToDisk(dst))
313 		ClearPageMappedToDisk(dst);
314 
315 	do {
316 		unlock_buffer(sbh);
317 		unlock_buffer(dbh);
318 		sbh = sbh->b_this_page;
319 		dbh = dbh->b_this_page;
320 	} while (dbh != dbufs);
321 }
322 
nilfs_copy_dirty_pages(struct address_space * dmap,struct address_space * smap)323 int nilfs_copy_dirty_pages(struct address_space *dmap,
324 			   struct address_space *smap)
325 {
326 	struct pagevec pvec;
327 	unsigned int i;
328 	pgoff_t index = 0;
329 	int err = 0;
330 
331 	pagevec_init(&pvec, 0);
332 repeat:
333 	if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY,
334 				PAGEVEC_SIZE))
335 		return 0;
336 
337 	for (i = 0; i < pagevec_count(&pvec); i++) {
338 		struct page *page = pvec.pages[i], *dpage;
339 
340 		lock_page(page);
341 		if (unlikely(!PageDirty(page)))
342 			NILFS_PAGE_BUG(page, "inconsistent dirty state");
343 
344 		dpage = grab_cache_page(dmap, page->index);
345 		if (unlikely(!dpage)) {
346 			/* No empty page is added to the page cache */
347 			err = -ENOMEM;
348 			unlock_page(page);
349 			break;
350 		}
351 		if (unlikely(!page_has_buffers(page)))
352 			NILFS_PAGE_BUG(page,
353 				       "found empty page in dat page cache");
354 
355 		nilfs_copy_page(dpage, page, 1);
356 		__set_page_dirty_nobuffers(dpage);
357 
358 		unlock_page(dpage);
359 		page_cache_release(dpage);
360 		unlock_page(page);
361 	}
362 	pagevec_release(&pvec);
363 	cond_resched();
364 
365 	if (likely(!err))
366 		goto repeat;
367 	return err;
368 }
369 
370 /**
371  * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache
372  * @dmap: destination page cache
373  * @smap: source page cache
374  *
375  * No pages must no be added to the cache during this process.
376  * This must be ensured by the caller.
377  */
nilfs_copy_back_pages(struct address_space * dmap,struct address_space * smap)378 void nilfs_copy_back_pages(struct address_space *dmap,
379 			   struct address_space *smap)
380 {
381 	struct pagevec pvec;
382 	unsigned int i, n;
383 	pgoff_t index = 0;
384 	int err;
385 
386 	pagevec_init(&pvec, 0);
387 repeat:
388 	n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE);
389 	if (!n)
390 		return;
391 	index = pvec.pages[n - 1]->index + 1;
392 
393 	for (i = 0; i < pagevec_count(&pvec); i++) {
394 		struct page *page = pvec.pages[i], *dpage;
395 		pgoff_t offset = page->index;
396 
397 		lock_page(page);
398 		dpage = find_lock_page(dmap, offset);
399 		if (dpage) {
400 			/* override existing page on the destination cache */
401 			WARN_ON(PageDirty(dpage));
402 			nilfs_copy_page(dpage, page, 0);
403 			unlock_page(dpage);
404 			page_cache_release(dpage);
405 		} else {
406 			struct page *page2;
407 
408 			/* move the page to the destination cache */
409 			spin_lock_irq(&smap->tree_lock);
410 			page2 = radix_tree_delete(&smap->page_tree, offset);
411 			WARN_ON(page2 != page);
412 
413 			smap->nrpages--;
414 			spin_unlock_irq(&smap->tree_lock);
415 
416 			spin_lock_irq(&dmap->tree_lock);
417 			err = radix_tree_insert(&dmap->page_tree, offset, page);
418 			if (unlikely(err < 0)) {
419 				WARN_ON(err == -EEXIST);
420 				page->mapping = NULL;
421 				page_cache_release(page); /* for cache */
422 			} else {
423 				page->mapping = dmap;
424 				dmap->nrpages++;
425 				if (PageDirty(page))
426 					radix_tree_tag_set(&dmap->page_tree,
427 							   offset,
428 							   PAGECACHE_TAG_DIRTY);
429 			}
430 			spin_unlock_irq(&dmap->tree_lock);
431 		}
432 		unlock_page(page);
433 	}
434 	pagevec_release(&pvec);
435 	cond_resched();
436 
437 	goto repeat;
438 }
439 
nilfs_clear_dirty_pages(struct address_space * mapping)440 void nilfs_clear_dirty_pages(struct address_space *mapping)
441 {
442 	struct pagevec pvec;
443 	unsigned int i;
444 	pgoff_t index = 0;
445 
446 	pagevec_init(&pvec, 0);
447 
448 	while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
449 				  PAGEVEC_SIZE)) {
450 		for (i = 0; i < pagevec_count(&pvec); i++) {
451 			struct page *page = pvec.pages[i];
452 			struct buffer_head *bh, *head;
453 
454 			lock_page(page);
455 			ClearPageUptodate(page);
456 			ClearPageMappedToDisk(page);
457 			bh = head = page_buffers(page);
458 			do {
459 				lock_buffer(bh);
460 				clear_buffer_dirty(bh);
461 				clear_buffer_nilfs_volatile(bh);
462 				clear_buffer_nilfs_checked(bh);
463 				clear_buffer_nilfs_redirected(bh);
464 				clear_buffer_uptodate(bh);
465 				clear_buffer_mapped(bh);
466 				unlock_buffer(bh);
467 				bh = bh->b_this_page;
468 			} while (bh != head);
469 
470 			__nilfs_clear_page_dirty(page);
471 			unlock_page(page);
472 		}
473 		pagevec_release(&pvec);
474 		cond_resched();
475 	}
476 }
477 
nilfs_page_count_clean_buffers(struct page * page,unsigned from,unsigned to)478 unsigned nilfs_page_count_clean_buffers(struct page *page,
479 					unsigned from, unsigned to)
480 {
481 	unsigned block_start, block_end;
482 	struct buffer_head *bh, *head;
483 	unsigned nc = 0;
484 
485 	for (bh = head = page_buffers(page), block_start = 0;
486 	     bh != head || !block_start;
487 	     block_start = block_end, bh = bh->b_this_page) {
488 		block_end = block_start + bh->b_size;
489 		if (block_end > from && block_start < to && !buffer_dirty(bh))
490 			nc++;
491 	}
492 	return nc;
493 }
494 
nilfs_mapping_init(struct address_space * mapping,struct backing_dev_info * bdi)495 void nilfs_mapping_init(struct address_space *mapping,
496 			struct backing_dev_info *bdi)
497 {
498 	mapping->host = NULL;
499 	mapping->flags = 0;
500 	mapping_set_gfp_mask(mapping, GFP_NOFS);
501 	mapping->assoc_mapping = NULL;
502 	mapping->backing_dev_info = bdi;
503 	mapping->a_ops = &empty_aops;
504 }
505 
506 /*
507  * NILFS2 needs clear_page_dirty() in the following two cases:
508  *
509  * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears
510  *    page dirty flags when it copies back pages from the shadow cache
511  *    (gcdat->{i_mapping,i_btnode_cache}) to its original cache
512  *    (dat->{i_mapping,i_btnode_cache}).
513  *
514  * 2) Some B-tree operations like insertion or deletion may dispose buffers
515  *    in dirty state, and this needs to cancel the dirty state of their pages.
516  */
__nilfs_clear_page_dirty(struct page * page)517 int __nilfs_clear_page_dirty(struct page *page)
518 {
519 	struct address_space *mapping = page->mapping;
520 
521 	if (mapping) {
522 		spin_lock_irq(&mapping->tree_lock);
523 		if (test_bit(PG_dirty, &page->flags)) {
524 			radix_tree_tag_clear(&mapping->page_tree,
525 					     page_index(page),
526 					     PAGECACHE_TAG_DIRTY);
527 			spin_unlock_irq(&mapping->tree_lock);
528 			return clear_page_dirty_for_io(page);
529 		}
530 		spin_unlock_irq(&mapping->tree_lock);
531 		return 0;
532 	}
533 	return TestClearPageDirty(page);
534 }
535 
536 /**
537  * nilfs_find_uncommitted_extent - find extent of uncommitted data
538  * @inode: inode
539  * @start_blk: start block offset (in)
540  * @blkoff: start offset of the found extent (out)
541  *
542  * This function searches an extent of buffers marked "delayed" which
543  * starts from a block offset equal to or larger than @start_blk.  If
544  * such an extent was found, this will store the start offset in
545  * @blkoff and return its length in blocks.  Otherwise, zero is
546  * returned.
547  */
nilfs_find_uncommitted_extent(struct inode * inode,sector_t start_blk,sector_t * blkoff)548 unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
549 					    sector_t start_blk,
550 					    sector_t *blkoff)
551 {
552 	unsigned int i;
553 	pgoff_t index;
554 	unsigned int nblocks_in_page;
555 	unsigned long length = 0;
556 	sector_t b;
557 	struct pagevec pvec;
558 	struct page *page;
559 
560 	if (inode->i_mapping->nrpages == 0)
561 		return 0;
562 
563 	index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
564 	nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
565 
566 	pagevec_init(&pvec, 0);
567 
568 repeat:
569 	pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
570 					pvec.pages);
571 	if (pvec.nr == 0)
572 		return length;
573 
574 	if (length > 0 && pvec.pages[0]->index > index)
575 		goto out;
576 
577 	b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
578 	i = 0;
579 	do {
580 		page = pvec.pages[i];
581 
582 		lock_page(page);
583 		if (page_has_buffers(page)) {
584 			struct buffer_head *bh, *head;
585 
586 			bh = head = page_buffers(page);
587 			do {
588 				if (b < start_blk)
589 					continue;
590 				if (buffer_delay(bh)) {
591 					if (length == 0)
592 						*blkoff = b;
593 					length++;
594 				} else if (length > 0) {
595 					goto out_locked;
596 				}
597 			} while (++b, bh = bh->b_this_page, bh != head);
598 		} else {
599 			if (length > 0)
600 				goto out_locked;
601 
602 			b += nblocks_in_page;
603 		}
604 		unlock_page(page);
605 
606 	} while (++i < pagevec_count(&pvec));
607 
608 	index = page->index + 1;
609 	pagevec_release(&pvec);
610 	cond_resched();
611 	goto repeat;
612 
613 out_locked:
614 	unlock_page(page);
615 out:
616 	pagevec_release(&pvec);
617 	return length;
618 }
619