1 /*
2  * fs/logfs/segment.c	- Handling the Object Store
3  *
4  * As should be obvious for Linux kernel code, license is GPLv2
5  *
6  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7  *
8  * Object store or ostore makes up the complete device with exception of
9  * the superblock and journal areas.  Apart from its own metadata it stores
10  * three kinds of objects: inodes, dentries and blocks, both data and indirect.
11  */
12 #include "logfs.h"
13 #include <linux/slab.h>
14 
logfs_mark_segment_bad(struct super_block * sb,u32 segno)15 static int logfs_mark_segment_bad(struct super_block *sb, u32 segno)
16 {
17 	struct logfs_super *super = logfs_super(sb);
18 	struct btree_head32 *head = &super->s_reserved_segments;
19 	int err;
20 
21 	err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
22 	if (err)
23 		return err;
24 	logfs_super(sb)->s_bad_segments++;
25 	/* FIXME: write to journal */
26 	return 0;
27 }
28 
logfs_erase_segment(struct super_block * sb,u32 segno,int ensure_erase)29 int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase)
30 {
31 	struct logfs_super *super = logfs_super(sb);
32 
33 	super->s_gec++;
34 
35 	return super->s_devops->erase(sb, (u64)segno << super->s_segshift,
36 			super->s_segsize, ensure_erase);
37 }
38 
logfs_get_free_bytes(struct logfs_area * area,size_t bytes)39 static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes)
40 {
41 	s32 ofs;
42 
43 	logfs_open_area(area, bytes);
44 
45 	ofs = area->a_used_bytes;
46 	area->a_used_bytes += bytes;
47 	BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize);
48 
49 	return dev_ofs(area->a_sb, area->a_segno, ofs);
50 }
51 
get_mapping_page(struct super_block * sb,pgoff_t index,int use_filler)52 static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
53 		int use_filler)
54 {
55 	struct logfs_super *super = logfs_super(sb);
56 	struct address_space *mapping = super->s_mapping_inode->i_mapping;
57 	filler_t *filler = super->s_devops->readpage;
58 	struct page *page;
59 
60 	BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS);
61 	if (use_filler)
62 		page = read_cache_page(mapping, index, filler, sb);
63 	else {
64 		page = find_or_create_page(mapping, index, GFP_NOFS);
65 		unlock_page(page);
66 	}
67 	return page;
68 }
69 
__logfs_buf_write(struct logfs_area * area,u64 ofs,void * buf,size_t len,int use_filler)70 int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
71 		int use_filler)
72 {
73 	pgoff_t index = ofs >> PAGE_SHIFT;
74 	struct page *page;
75 	long offset = ofs & (PAGE_SIZE-1);
76 	long copylen;
77 
78 	/* Only logfs_wbuf_recover may use len==0 */
79 	BUG_ON(!len && !use_filler);
80 	do {
81 		copylen = min((ulong)len, PAGE_SIZE - offset);
82 
83 		page = get_mapping_page(area->a_sb, index, use_filler);
84 		if (IS_ERR(page))
85 			return PTR_ERR(page);
86 		BUG_ON(!page); /* FIXME: reserve a pool */
87 		SetPageUptodate(page);
88 		memcpy(page_address(page) + offset, buf, copylen);
89 		SetPagePrivate(page);
90 		page_cache_release(page);
91 
92 		buf += copylen;
93 		len -= copylen;
94 		offset = 0;
95 		index++;
96 	} while (len);
97 	return 0;
98 }
99 
pad_partial_page(struct logfs_area * area)100 static void pad_partial_page(struct logfs_area *area)
101 {
102 	struct super_block *sb = area->a_sb;
103 	struct page *page;
104 	u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
105 	pgoff_t index = ofs >> PAGE_SHIFT;
106 	long offset = ofs & (PAGE_SIZE-1);
107 	u32 len = PAGE_SIZE - offset;
108 
109 	if (len % PAGE_SIZE) {
110 		page = get_mapping_page(sb, index, 0);
111 		BUG_ON(!page); /* FIXME: reserve a pool */
112 		memset(page_address(page) + offset, 0xff, len);
113 		SetPagePrivate(page);
114 		page_cache_release(page);
115 	}
116 }
117 
pad_full_pages(struct logfs_area * area)118 static void pad_full_pages(struct logfs_area *area)
119 {
120 	struct super_block *sb = area->a_sb;
121 	struct logfs_super *super = logfs_super(sb);
122 	u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
123 	u32 len = super->s_segsize - area->a_used_bytes;
124 	pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT;
125 	pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT;
126 	struct page *page;
127 
128 	while (no_indizes) {
129 		page = get_mapping_page(sb, index, 0);
130 		BUG_ON(!page); /* FIXME: reserve a pool */
131 		SetPageUptodate(page);
132 		memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
133 		SetPagePrivate(page);
134 		page_cache_release(page);
135 		index++;
136 		no_indizes--;
137 	}
138 }
139 
140 /*
141  * bdev_writeseg will write full pages.  Memset the tail to prevent data leaks.
142  * Also make sure we allocate (and memset) all pages for final writeout.
143  */
pad_wbuf(struct logfs_area * area,int final)144 static void pad_wbuf(struct logfs_area *area, int final)
145 {
146 	pad_partial_page(area);
147 	if (final)
148 		pad_full_pages(area);
149 }
150 
151 /*
152  * We have to be careful with the alias tree.  Since lookup is done by bix,
153  * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
154  * indirect blocks.  So always use it through accessor functions.
155  */
alias_tree_lookup(struct super_block * sb,u64 ino,u64 bix,level_t level)156 static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix,
157 		level_t level)
158 {
159 	struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
160 	pgoff_t index = logfs_pack_index(bix, level);
161 
162 	return btree_lookup128(head, ino, index);
163 }
164 
alias_tree_insert(struct super_block * sb,u64 ino,u64 bix,level_t level,void * val)165 static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix,
166 		level_t level, void *val)
167 {
168 	struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
169 	pgoff_t index = logfs_pack_index(bix, level);
170 
171 	return btree_insert128(head, ino, index, val, GFP_NOFS);
172 }
173 
btree_write_alias(struct super_block * sb,struct logfs_block * block,write_alias_t * write_one_alias)174 static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
175 		write_alias_t *write_one_alias)
176 {
177 	struct object_alias_item *item;
178 	int err;
179 
180 	list_for_each_entry(item, &block->item_list, list) {
181 		err = write_alias_journal(sb, block->ino, block->bix,
182 				block->level, item->child_no, item->val);
183 		if (err)
184 			return err;
185 	}
186 	return 0;
187 }
188 
189 static struct logfs_block_ops btree_block_ops = {
190 	.write_block	= btree_write_block,
191 	.free_block	= __free_block,
192 	.write_alias	= btree_write_alias,
193 };
194 
logfs_load_object_aliases(struct super_block * sb,struct logfs_obj_alias * oa,int count)195 int logfs_load_object_aliases(struct super_block *sb,
196 		struct logfs_obj_alias *oa, int count)
197 {
198 	struct logfs_super *super = logfs_super(sb);
199 	struct logfs_block *block;
200 	struct object_alias_item *item;
201 	u64 ino, bix;
202 	level_t level;
203 	int i, err;
204 
205 	super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
206 	count /= sizeof(*oa);
207 	for (i = 0; i < count; i++) {
208 		item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
209 		if (!item)
210 			return -ENOMEM;
211 		memset(item, 0, sizeof(*item));
212 
213 		super->s_no_object_aliases++;
214 		item->val = oa[i].val;
215 		item->child_no = be16_to_cpu(oa[i].child_no);
216 
217 		ino = be64_to_cpu(oa[i].ino);
218 		bix = be64_to_cpu(oa[i].bix);
219 		level = LEVEL(oa[i].level);
220 
221 		log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n",
222 				ino, bix, level, item->child_no,
223 				be64_to_cpu(item->val));
224 		block = alias_tree_lookup(sb, ino, bix, level);
225 		if (!block) {
226 			block = __alloc_block(sb, ino, bix, level);
227 			block->ops = &btree_block_ops;
228 			err = alias_tree_insert(sb, ino, bix, level, block);
229 			BUG_ON(err); /* mempool empty */
230 		}
231 		if (test_and_set_bit(item->child_no, block->alias_map)) {
232 			printk(KERN_ERR"LogFS: Alias collision detected\n");
233 			return -EIO;
234 		}
235 		list_move_tail(&block->alias_list, &super->s_object_alias);
236 		list_add(&item->list, &block->item_list);
237 	}
238 	return 0;
239 }
240 
kill_alias(void * _block,unsigned long ignore0,u64 ignore1,u64 ignore2,size_t ignore3)241 static void kill_alias(void *_block, unsigned long ignore0,
242 		u64 ignore1, u64 ignore2, size_t ignore3)
243 {
244 	struct logfs_block *block = _block;
245 	struct super_block *sb = block->sb;
246 	struct logfs_super *super = logfs_super(sb);
247 	struct object_alias_item *item;
248 
249 	while (!list_empty(&block->item_list)) {
250 		item = list_entry(block->item_list.next, typeof(*item), list);
251 		list_del(&item->list);
252 		mempool_free(item, super->s_alias_pool);
253 	}
254 	block->ops->free_block(sb, block);
255 }
256 
obj_type(struct inode * inode,level_t level)257 static int obj_type(struct inode *inode, level_t level)
258 {
259 	if (level == 0) {
260 		if (S_ISDIR(inode->i_mode))
261 			return OBJ_DENTRY;
262 		if (inode->i_ino == LOGFS_INO_MASTER)
263 			return OBJ_INODE;
264 	}
265 	return OBJ_BLOCK;
266 }
267 
obj_len(struct super_block * sb,int obj_type)268 static int obj_len(struct super_block *sb, int obj_type)
269 {
270 	switch (obj_type) {
271 	case OBJ_DENTRY:
272 		return sizeof(struct logfs_disk_dentry);
273 	case OBJ_INODE:
274 		return sizeof(struct logfs_disk_inode);
275 	case OBJ_BLOCK:
276 		return sb->s_blocksize;
277 	default:
278 		BUG();
279 	}
280 }
281 
__logfs_segment_write(struct inode * inode,void * buf,struct logfs_shadow * shadow,int type,int len,int compr)282 static int __logfs_segment_write(struct inode *inode, void *buf,
283 		struct logfs_shadow *shadow, int type, int len, int compr)
284 {
285 	struct logfs_area *area;
286 	struct super_block *sb = inode->i_sb;
287 	s64 ofs;
288 	struct logfs_object_header h;
289 	int acc_len;
290 
291 	if (shadow->gc_level == 0)
292 		acc_len = len;
293 	else
294 		acc_len = obj_len(sb, type);
295 
296 	area = get_area(sb, shadow->gc_level);
297 	ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE);
298 	LOGFS_BUG_ON(ofs <= 0, sb);
299 	/*
300 	 * Order is important.  logfs_get_free_bytes(), by modifying the
301 	 * segment file, may modify the content of the very page we're about
302 	 * to write now.  Which is fine, as long as the calculated crc and
303 	 * written data still match.  So do the modifications _before_
304 	 * calculating the crc.
305 	 */
306 
307 	h.len	= cpu_to_be16(len);
308 	h.type	= type;
309 	h.compr	= compr;
310 	h.ino	= cpu_to_be64(inode->i_ino);
311 	h.bix	= cpu_to_be64(shadow->bix);
312 	h.crc	= logfs_crc32(&h, sizeof(h) - 4, 4);
313 	h.data_crc = logfs_crc32(buf, len, 0);
314 
315 	logfs_buf_write(area, ofs, &h, sizeof(h));
316 	logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len);
317 
318 	shadow->new_ofs = ofs;
319 	shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE;
320 
321 	return 0;
322 }
323 
logfs_segment_write_compress(struct inode * inode,void * buf,struct logfs_shadow * shadow,int type,int len)324 static s64 logfs_segment_write_compress(struct inode *inode, void *buf,
325 		struct logfs_shadow *shadow, int type, int len)
326 {
327 	struct super_block *sb = inode->i_sb;
328 	void *compressor_buf = logfs_super(sb)->s_compressed_je;
329 	ssize_t compr_len;
330 	int ret;
331 
332 	mutex_lock(&logfs_super(sb)->s_journal_mutex);
333 	compr_len = logfs_compress(buf, compressor_buf, len, len);
334 
335 	if (compr_len >= 0) {
336 		ret = __logfs_segment_write(inode, compressor_buf, shadow,
337 				type, compr_len, COMPR_ZLIB);
338 	} else {
339 		ret = __logfs_segment_write(inode, buf, shadow, type, len,
340 				COMPR_NONE);
341 	}
342 	mutex_unlock(&logfs_super(sb)->s_journal_mutex);
343 	return ret;
344 }
345 
346 /**
347  * logfs_segment_write - write data block to object store
348  * @inode:		inode containing data
349  *
350  * Returns an errno or zero.
351  */
logfs_segment_write(struct inode * inode,struct page * page,struct logfs_shadow * shadow)352 int logfs_segment_write(struct inode *inode, struct page *page,
353 		struct logfs_shadow *shadow)
354 {
355 	struct super_block *sb = inode->i_sb;
356 	struct logfs_super *super = logfs_super(sb);
357 	int do_compress, type, len;
358 	int ret;
359 	void *buf;
360 
361 	super->s_flags |= LOGFS_SB_FLAG_DIRTY;
362 	BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
363 	do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED;
364 	if (shadow->gc_level != 0) {
365 		/* temporarily disable compression for indirect blocks */
366 		do_compress = 0;
367 	}
368 
369 	type = obj_type(inode, shrink_level(shadow->gc_level));
370 	len = obj_len(sb, type);
371 	buf = kmap(page);
372 	if (do_compress)
373 		ret = logfs_segment_write_compress(inode, buf, shadow, type,
374 				len);
375 	else
376 		ret = __logfs_segment_write(inode, buf, shadow, type, len,
377 				COMPR_NONE);
378 	kunmap(page);
379 
380 	log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n",
381 			shadow->ino, shadow->bix, shadow->gc_level,
382 			shadow->old_ofs, shadow->new_ofs,
383 			shadow->old_len, shadow->new_len);
384 	/* this BUG_ON did catch a locking bug.  useful */
385 	BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1)));
386 	return ret;
387 }
388 
wbuf_read(struct super_block * sb,u64 ofs,size_t len,void * buf)389 int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf)
390 {
391 	pgoff_t index = ofs >> PAGE_SHIFT;
392 	struct page *page;
393 	long offset = ofs & (PAGE_SIZE-1);
394 	long copylen;
395 
396 	while (len) {
397 		copylen = min((ulong)len, PAGE_SIZE - offset);
398 
399 		page = get_mapping_page(sb, index, 1);
400 		if (IS_ERR(page))
401 			return PTR_ERR(page);
402 		memcpy(buf, page_address(page) + offset, copylen);
403 		page_cache_release(page);
404 
405 		buf += copylen;
406 		len -= copylen;
407 		offset = 0;
408 		index++;
409 	}
410 	return 0;
411 }
412 
413 /*
414  * The "position" of indirect blocks is ambiguous.  It can be the position
415  * of any data block somewhere behind this indirect block.  So we need to
416  * normalize the positions through logfs_block_mask() before comparing.
417  */
check_pos(struct super_block * sb,u64 pos1,u64 pos2,level_t level)418 static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level)
419 {
420 	return	(pos1 & logfs_block_mask(sb, level)) !=
421 		(pos2 & logfs_block_mask(sb, level));
422 }
423 
424 #if 0
425 static int read_seg_header(struct super_block *sb, u64 ofs,
426 		struct logfs_segment_header *sh)
427 {
428 	__be32 crc;
429 	int err;
430 
431 	err = wbuf_read(sb, ofs, sizeof(*sh), sh);
432 	if (err)
433 		return err;
434 	crc = logfs_crc32(sh, sizeof(*sh), 4);
435 	if (crc != sh->crc) {
436 		printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
437 				"got %x\n", ofs, be32_to_cpu(sh->crc),
438 				be32_to_cpu(crc));
439 		return -EIO;
440 	}
441 	return 0;
442 }
443 #endif
444 
read_obj_header(struct super_block * sb,u64 ofs,struct logfs_object_header * oh)445 static int read_obj_header(struct super_block *sb, u64 ofs,
446 		struct logfs_object_header *oh)
447 {
448 	__be32 crc;
449 	int err;
450 
451 	err = wbuf_read(sb, ofs, sizeof(*oh), oh);
452 	if (err)
453 		return err;
454 	crc = logfs_crc32(oh, sizeof(*oh) - 4, 4);
455 	if (crc != oh->crc) {
456 		printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
457 				"got %x\n", ofs, be32_to_cpu(oh->crc),
458 				be32_to_cpu(crc));
459 		return -EIO;
460 	}
461 	return 0;
462 }
463 
move_btree_to_page(struct inode * inode,struct page * page,__be64 * data)464 static void move_btree_to_page(struct inode *inode, struct page *page,
465 		__be64 *data)
466 {
467 	struct super_block *sb = inode->i_sb;
468 	struct logfs_super *super = logfs_super(sb);
469 	struct btree_head128 *head = &super->s_object_alias_tree;
470 	struct logfs_block *block;
471 	struct object_alias_item *item, *next;
472 
473 	if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS))
474 		return;
475 
476 	block = btree_remove128(head, inode->i_ino, page->index);
477 	if (!block)
478 		return;
479 
480 	log_blockmove("move_btree_to_page(%llx, %llx, %x)\n",
481 			block->ino, block->bix, block->level);
482 	list_for_each_entry_safe(item, next, &block->item_list, list) {
483 		data[item->child_no] = item->val;
484 		list_del(&item->list);
485 		mempool_free(item, super->s_alias_pool);
486 	}
487 	block->page = page;
488 	SetPagePrivate(page);
489 	page->private = (unsigned long)block;
490 	block->ops = &indirect_block_ops;
491 	initialize_block_counters(page, block, data, 0);
492 }
493 
494 /*
495  * This silences a false, yet annoying gcc warning.  I hate it when my editor
496  * jumps into bitops.h each time I recompile this file.
497  * TODO: Complain to gcc folks about this and upgrade compiler.
498  */
fnb(const unsigned long * addr,unsigned long size,unsigned long offset)499 static unsigned long fnb(const unsigned long *addr,
500 		unsigned long size, unsigned long offset)
501 {
502 	return find_next_bit(addr, size, offset);
503 }
504 
move_page_to_btree(struct page * page)505 void move_page_to_btree(struct page *page)
506 {
507 	struct logfs_block *block = logfs_block(page);
508 	struct super_block *sb = block->sb;
509 	struct logfs_super *super = logfs_super(sb);
510 	struct object_alias_item *item;
511 	unsigned long pos;
512 	__be64 *child;
513 	int err;
514 
515 	if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) {
516 		block->ops->free_block(sb, block);
517 		return;
518 	}
519 	log_blockmove("move_page_to_btree(%llx, %llx, %x)\n",
520 			block->ino, block->bix, block->level);
521 	super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
522 
523 	for (pos = 0; ; pos++) {
524 		pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
525 		if (pos >= LOGFS_BLOCK_FACTOR)
526 			break;
527 
528 		item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
529 		BUG_ON(!item); /* mempool empty */
530 		memset(item, 0, sizeof(*item));
531 
532 		child = kmap_atomic(page, KM_USER0);
533 		item->val = child[pos];
534 		kunmap_atomic(child, KM_USER0);
535 		item->child_no = pos;
536 		list_add(&item->list, &block->item_list);
537 	}
538 	block->page = NULL;
539 	ClearPagePrivate(page);
540 	page->private = 0;
541 	block->ops = &btree_block_ops;
542 	err = alias_tree_insert(block->sb, block->ino, block->bix, block->level,
543 			block);
544 	BUG_ON(err); /* mempool empty */
545 	ClearPageUptodate(page);
546 }
547 
__logfs_segment_read(struct inode * inode,void * buf,u64 ofs,u64 bix,level_t level)548 static int __logfs_segment_read(struct inode *inode, void *buf,
549 		u64 ofs, u64 bix, level_t level)
550 {
551 	struct super_block *sb = inode->i_sb;
552 	void *compressor_buf = logfs_super(sb)->s_compressed_je;
553 	struct logfs_object_header oh;
554 	__be32 crc;
555 	u16 len;
556 	int err, block_len;
557 
558 	block_len = obj_len(sb, obj_type(inode, level));
559 	err = read_obj_header(sb, ofs, &oh);
560 	if (err)
561 		goto out_err;
562 
563 	err = -EIO;
564 	if (be64_to_cpu(oh.ino) != inode->i_ino
565 			|| check_pos(sb, be64_to_cpu(oh.bix), bix, level)) {
566 		printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: "
567 				"expected (%lx, %llx), got (%llx, %llx)\n",
568 				ofs, inode->i_ino, bix,
569 				be64_to_cpu(oh.ino), be64_to_cpu(oh.bix));
570 		goto out_err;
571 	}
572 
573 	len = be16_to_cpu(oh.len);
574 
575 	switch (oh.compr) {
576 	case COMPR_NONE:
577 		err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf);
578 		if (err)
579 			goto out_err;
580 		crc = logfs_crc32(buf, len, 0);
581 		if (crc != oh.data_crc) {
582 			printk(KERN_ERR"LOGFS: uncompressed data crc error at "
583 					"%llx: expected %x, got %x\n", ofs,
584 					be32_to_cpu(oh.data_crc),
585 					be32_to_cpu(crc));
586 			goto out_err;
587 		}
588 		break;
589 	case COMPR_ZLIB:
590 		mutex_lock(&logfs_super(sb)->s_journal_mutex);
591 		err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len,
592 				compressor_buf);
593 		if (err) {
594 			mutex_unlock(&logfs_super(sb)->s_journal_mutex);
595 			goto out_err;
596 		}
597 		crc = logfs_crc32(compressor_buf, len, 0);
598 		if (crc != oh.data_crc) {
599 			printk(KERN_ERR"LOGFS: compressed data crc error at "
600 					"%llx: expected %x, got %x\n", ofs,
601 					be32_to_cpu(oh.data_crc),
602 					be32_to_cpu(crc));
603 			mutex_unlock(&logfs_super(sb)->s_journal_mutex);
604 			goto out_err;
605 		}
606 		err = logfs_uncompress(compressor_buf, buf, len, block_len);
607 		mutex_unlock(&logfs_super(sb)->s_journal_mutex);
608 		if (err) {
609 			printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs);
610 			goto out_err;
611 		}
612 		break;
613 	default:
614 		LOGFS_BUG(sb);
615 		err = -EIO;
616 		goto out_err;
617 	}
618 	return 0;
619 
620 out_err:
621 	logfs_set_ro(sb);
622 	printk(KERN_ERR"LOGFS: device is read-only now\n");
623 	LOGFS_BUG(sb);
624 	return err;
625 }
626 
627 /**
628  * logfs_segment_read - read data block from object store
629  * @inode:		inode containing data
630  * @buf:		data buffer
631  * @ofs:		physical data offset
632  * @bix:		block index
633  * @level:		block level
634  *
635  * Returns 0 on success or a negative errno.
636  */
logfs_segment_read(struct inode * inode,struct page * page,u64 ofs,u64 bix,level_t level)637 int logfs_segment_read(struct inode *inode, struct page *page,
638 		u64 ofs, u64 bix, level_t level)
639 {
640 	int err;
641 	void *buf;
642 
643 	if (PageUptodate(page))
644 		return 0;
645 
646 	ofs &= ~LOGFS_FULLY_POPULATED;
647 
648 	buf = kmap(page);
649 	err = __logfs_segment_read(inode, buf, ofs, bix, level);
650 	if (!err) {
651 		move_btree_to_page(inode, page, buf);
652 		SetPageUptodate(page);
653 	}
654 	kunmap(page);
655 	log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n",
656 			inode->i_ino, bix, level, ofs, err);
657 	return err;
658 }
659 
logfs_segment_delete(struct inode * inode,struct logfs_shadow * shadow)660 int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
661 {
662 	struct super_block *sb = inode->i_sb;
663 	struct logfs_super *super = logfs_super(sb);
664 	struct logfs_object_header h;
665 	u16 len;
666 	int err;
667 
668 	super->s_flags |= LOGFS_SB_FLAG_DIRTY;
669 	BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
670 	BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED);
671 	if (!shadow->old_ofs)
672 		return 0;
673 
674 	log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n",
675 			shadow->ino, shadow->bix, shadow->gc_level,
676 			shadow->old_ofs, shadow->new_ofs,
677 			shadow->old_len, shadow->new_len);
678 	err = read_obj_header(sb, shadow->old_ofs, &h);
679 	LOGFS_BUG_ON(err, sb);
680 	LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb);
681 	LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix),
682 				shrink_level(shadow->gc_level)), sb);
683 
684 	if (shadow->gc_level == 0)
685 		len = be16_to_cpu(h.len);
686 	else
687 		len = obj_len(sb, h.type);
688 	shadow->old_len = len + sizeof(h);
689 	return 0;
690 }
691 
freeseg(struct super_block * sb,u32 segno)692 void freeseg(struct super_block *sb, u32 segno)
693 {
694 	struct logfs_super *super = logfs_super(sb);
695 	struct address_space *mapping = super->s_mapping_inode->i_mapping;
696 	struct page *page;
697 	u64 ofs, start, end;
698 
699 	start = dev_ofs(sb, segno, 0);
700 	end = dev_ofs(sb, segno + 1, 0);
701 	for (ofs = start; ofs < end; ofs += PAGE_SIZE) {
702 		page = find_get_page(mapping, ofs >> PAGE_SHIFT);
703 		if (!page)
704 			continue;
705 		ClearPagePrivate(page);
706 		page_cache_release(page);
707 	}
708 }
709 
logfs_open_area(struct logfs_area * area,size_t bytes)710 int logfs_open_area(struct logfs_area *area, size_t bytes)
711 {
712 	struct super_block *sb = area->a_sb;
713 	struct logfs_super *super = logfs_super(sb);
714 	int err, closed = 0;
715 
716 	if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize)
717 		return 0;
718 
719 	if (area->a_is_open) {
720 		u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
721 		u32 len = super->s_segsize - area->a_written_bytes;
722 
723 		log_gc("logfs_close_area(%x)\n", area->a_segno);
724 		pad_wbuf(area, 1);
725 		super->s_devops->writeseg(area->a_sb, ofs, len);
726 		freeseg(sb, area->a_segno);
727 		closed = 1;
728 	}
729 
730 	area->a_used_bytes = 0;
731 	area->a_written_bytes = 0;
732 again:
733 	area->a_ops->get_free_segment(area);
734 	area->a_ops->get_erase_count(area);
735 
736 	log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level);
737 	err = area->a_ops->erase_segment(area);
738 	if (err) {
739 		printk(KERN_WARNING "LogFS: Error erasing segment %x\n",
740 				area->a_segno);
741 		logfs_mark_segment_bad(sb, area->a_segno);
742 		goto again;
743 	}
744 	area->a_is_open = 1;
745 	return closed;
746 }
747 
logfs_sync_area(struct logfs_area * area)748 void logfs_sync_area(struct logfs_area *area)
749 {
750 	struct super_block *sb = area->a_sb;
751 	struct logfs_super *super = logfs_super(sb);
752 	u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
753 	u32 len = (area->a_used_bytes - area->a_written_bytes);
754 
755 	if (super->s_writesize)
756 		len &= ~(super->s_writesize - 1);
757 	if (len == 0)
758 		return;
759 	pad_wbuf(area, 0);
760 	super->s_devops->writeseg(sb, ofs, len);
761 	area->a_written_bytes += len;
762 }
763 
logfs_sync_segments(struct super_block * sb)764 void logfs_sync_segments(struct super_block *sb)
765 {
766 	struct logfs_super *super = logfs_super(sb);
767 	int i;
768 
769 	for_each_area(i)
770 		logfs_sync_area(super->s_area[i]);
771 }
772 
773 /*
774  * Pick a free segment to be used for this area.  Effectively takes a
775  * candidate from the free list (not really a candidate anymore).
776  */
ostore_get_free_segment(struct logfs_area * area)777 static void ostore_get_free_segment(struct logfs_area *area)
778 {
779 	struct super_block *sb = area->a_sb;
780 	struct logfs_super *super = logfs_super(sb);
781 
782 	if (super->s_free_list.count == 0) {
783 		printk(KERN_ERR"LOGFS: ran out of free segments\n");
784 		LOGFS_BUG(sb);
785 	}
786 
787 	area->a_segno = get_best_cand(sb, &super->s_free_list, NULL);
788 }
789 
ostore_get_erase_count(struct logfs_area * area)790 static void ostore_get_erase_count(struct logfs_area *area)
791 {
792 	struct logfs_segment_entry se;
793 	u32 ec_level;
794 
795 	logfs_get_segment_entry(area->a_sb, area->a_segno, &se);
796 	BUG_ON(se.ec_level == cpu_to_be32(BADSEG) ||
797 			se.valid == cpu_to_be32(RESERVED));
798 
799 	ec_level = be32_to_cpu(se.ec_level);
800 	area->a_erase_count = (ec_level >> 4) + 1;
801 }
802 
ostore_erase_segment(struct logfs_area * area)803 static int ostore_erase_segment(struct logfs_area *area)
804 {
805 	struct super_block *sb = area->a_sb;
806 	struct logfs_segment_header sh;
807 	u64 ofs;
808 	int err;
809 
810 	err = logfs_erase_segment(sb, area->a_segno, 0);
811 	if (err)
812 		return err;
813 
814 	sh.pad = 0;
815 	sh.type = SEG_OSTORE;
816 	sh.level = (__force u8)area->a_level;
817 	sh.segno = cpu_to_be32(area->a_segno);
818 	sh.ec = cpu_to_be32(area->a_erase_count);
819 	sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
820 	sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
821 
822 	logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count,
823 			area->a_level);
824 
825 	ofs = dev_ofs(sb, area->a_segno, 0);
826 	area->a_used_bytes = sizeof(sh);
827 	logfs_buf_write(area, ofs, &sh, sizeof(sh));
828 	return 0;
829 }
830 
831 static const struct logfs_area_ops ostore_area_ops = {
832 	.get_free_segment	= ostore_get_free_segment,
833 	.get_erase_count	= ostore_get_erase_count,
834 	.erase_segment		= ostore_erase_segment,
835 };
836 
free_area(struct logfs_area * area)837 static void free_area(struct logfs_area *area)
838 {
839 	if (area)
840 		freeseg(area->a_sb, area->a_segno);
841 	kfree(area);
842 }
843 
alloc_area(struct super_block * sb)844 static struct logfs_area *alloc_area(struct super_block *sb)
845 {
846 	struct logfs_area *area;
847 
848 	area = kzalloc(sizeof(*area), GFP_KERNEL);
849 	if (!area)
850 		return NULL;
851 
852 	area->a_sb = sb;
853 	return area;
854 }
855 
map_invalidatepage(struct page * page,unsigned long l)856 static void map_invalidatepage(struct page *page, unsigned long l)
857 {
858 	BUG();
859 }
860 
map_releasepage(struct page * page,gfp_t g)861 static int map_releasepage(struct page *page, gfp_t g)
862 {
863 	/* Don't release these pages */
864 	return 0;
865 }
866 
867 static const struct address_space_operations mapping_aops = {
868 	.invalidatepage = map_invalidatepage,
869 	.releasepage	= map_releasepage,
870 	.set_page_dirty = __set_page_dirty_nobuffers,
871 };
872 
logfs_init_mapping(struct super_block * sb)873 int logfs_init_mapping(struct super_block *sb)
874 {
875 	struct logfs_super *super = logfs_super(sb);
876 	struct address_space *mapping;
877 	struct inode *inode;
878 
879 	inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING);
880 	if (IS_ERR(inode))
881 		return PTR_ERR(inode);
882 	super->s_mapping_inode = inode;
883 	mapping = inode->i_mapping;
884 	mapping->a_ops = &mapping_aops;
885 	/* Would it be possible to use __GFP_HIGHMEM as well? */
886 	mapping_set_gfp_mask(mapping, GFP_NOFS);
887 	return 0;
888 }
889 
logfs_init_areas(struct super_block * sb)890 int logfs_init_areas(struct super_block *sb)
891 {
892 	struct logfs_super *super = logfs_super(sb);
893 	int i = -1;
894 
895 	super->s_alias_pool = mempool_create_kmalloc_pool(600,
896 			sizeof(struct object_alias_item));
897 	if (!super->s_alias_pool)
898 		return -ENOMEM;
899 
900 	super->s_journal_area = alloc_area(sb);
901 	if (!super->s_journal_area)
902 		goto err;
903 
904 	for_each_area(i) {
905 		super->s_area[i] = alloc_area(sb);
906 		if (!super->s_area[i])
907 			goto err;
908 		super->s_area[i]->a_level = GC_LEVEL(i);
909 		super->s_area[i]->a_ops = &ostore_area_ops;
910 	}
911 	btree_init_mempool128(&super->s_object_alias_tree,
912 			super->s_btree_pool);
913 	return 0;
914 
915 err:
916 	for (i--; i >= 0; i--)
917 		free_area(super->s_area[i]);
918 	free_area(super->s_journal_area);
919 	logfs_mempool_destroy(super->s_alias_pool);
920 	return -ENOMEM;
921 }
922 
logfs_cleanup_areas(struct super_block * sb)923 void logfs_cleanup_areas(struct super_block *sb)
924 {
925 	struct logfs_super *super = logfs_super(sb);
926 	int i;
927 
928 	btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias);
929 	for_each_area(i)
930 		free_area(super->s_area[i]);
931 	free_area(super->s_journal_area);
932 }
933