1 /*
2  * fs/logfs/gc.c	- garbage collection code
3  *
4  * As should be obvious for Linux kernel code, license is GPLv2
5  *
6  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7  */
8 #include "logfs.h"
9 #include <linux/sched.h>
10 #include <linux/slab.h>
11 
12 /*
13  * Wear leveling needs to kick in when the difference between low erase
14  * counts and high erase counts gets too big.  A good value for "too big"
15  * may be somewhat below 10% of maximum erase count for the device.
16  * Why not 397, to pick a nice round number with no specific meaning? :)
17  *
18  * WL_RATELIMIT is the minimum time between two wear level events.  A huge
19  * number of segments may fulfil the requirements for wear leveling at the
20  * same time.  If that happens we don't want to cause a latency from hell,
21  * but just gently pick one segment every so often and minimize overhead.
22  */
23 #define WL_DELTA 397
24 #define WL_RATELIMIT 100
25 #define MAX_OBJ_ALIASES	2600
26 #define SCAN_RATIO 512	/* number of scanned segments per gc'd segment */
27 #define LIST_SIZE 64	/* base size of candidate lists */
28 #define SCAN_ROUNDS 128	/* maximum number of complete medium scans */
29 #define SCAN_ROUNDS_HIGH 4 /* maximum number of higher-level scans */
30 
no_free_segments(struct super_block * sb)31 static int no_free_segments(struct super_block *sb)
32 {
33 	struct logfs_super *super = logfs_super(sb);
34 
35 	return super->s_free_list.count;
36 }
37 
38 /* journal has distance -1, top-most ifile layer distance 0 */
root_distance(struct super_block * sb,gc_level_t __gc_level)39 static u8 root_distance(struct super_block *sb, gc_level_t __gc_level)
40 {
41 	struct logfs_super *super = logfs_super(sb);
42 	u8 gc_level = (__force u8)__gc_level;
43 
44 	switch (gc_level) {
45 	case 0: /* fall through */
46 	case 1: /* fall through */
47 	case 2: /* fall through */
48 	case 3:
49 		/* file data or indirect blocks */
50 		return super->s_ifile_levels + super->s_iblock_levels - gc_level;
51 	case 6: /* fall through */
52 	case 7: /* fall through */
53 	case 8: /* fall through */
54 	case 9:
55 		/* inode file data or indirect blocks */
56 		return super->s_ifile_levels - (gc_level - 6);
57 	default:
58 		printk(KERN_ERR"LOGFS: segment of unknown level %x found\n",
59 				gc_level);
60 		WARN_ON(1);
61 		return super->s_ifile_levels + super->s_iblock_levels;
62 	}
63 }
64 
segment_is_reserved(struct super_block * sb,u32 segno)65 static int segment_is_reserved(struct super_block *sb, u32 segno)
66 {
67 	struct logfs_super *super = logfs_super(sb);
68 	struct logfs_area *area;
69 	void *reserved;
70 	int i;
71 
72 	/* Some segments are reserved.  Just pretend they were all valid */
73 	reserved = btree_lookup32(&super->s_reserved_segments, segno);
74 	if (reserved)
75 		return 1;
76 
77 	/* Currently open segments */
78 	for_each_area(i) {
79 		area = super->s_area[i];
80 		if (area->a_is_open && area->a_segno == segno)
81 			return 1;
82 	}
83 
84 	return 0;
85 }
86 
logfs_mark_segment_bad(struct super_block * sb,u32 segno)87 static void logfs_mark_segment_bad(struct super_block *sb, u32 segno)
88 {
89 	BUG();
90 }
91 
92 /*
93  * Returns the bytes consumed by valid objects in this segment.  Object headers
94  * are counted, the segment header is not.
95  */
logfs_valid_bytes(struct super_block * sb,u32 segno,u32 * ec,gc_level_t * gc_level)96 static u32 logfs_valid_bytes(struct super_block *sb, u32 segno, u32 *ec,
97 		gc_level_t *gc_level)
98 {
99 	struct logfs_segment_entry se;
100 	u32 ec_level;
101 
102 	logfs_get_segment_entry(sb, segno, &se);
103 	if (se.ec_level == cpu_to_be32(BADSEG) ||
104 			se.valid == cpu_to_be32(RESERVED))
105 		return RESERVED;
106 
107 	ec_level = be32_to_cpu(se.ec_level);
108 	*ec = ec_level >> 4;
109 	*gc_level = GC_LEVEL(ec_level & 0xf);
110 	return be32_to_cpu(se.valid);
111 }
112 
logfs_cleanse_block(struct super_block * sb,u64 ofs,u64 ino,u64 bix,gc_level_t gc_level)113 static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
114 		u64 bix, gc_level_t gc_level)
115 {
116 	struct inode *inode;
117 	int err, cookie;
118 
119 	inode = logfs_safe_iget(sb, ino, &cookie);
120 	err = logfs_rewrite_block(inode, bix, ofs, gc_level, 0);
121 	BUG_ON(err);
122 	logfs_safe_iput(inode, cookie);
123 }
124 
logfs_gc_segment(struct super_block * sb,u32 segno)125 static u32 logfs_gc_segment(struct super_block *sb, u32 segno)
126 {
127 	struct logfs_super *super = logfs_super(sb);
128 	struct logfs_segment_header sh;
129 	struct logfs_object_header oh;
130 	u64 ofs, ino, bix;
131 	u32 seg_ofs, logical_segno, cleaned = 0;
132 	int err, len, valid;
133 	gc_level_t gc_level;
134 
135 	LOGFS_BUG_ON(segment_is_reserved(sb, segno), sb);
136 
137 	btree_insert32(&super->s_reserved_segments, segno, (void *)1, GFP_NOFS);
138 	err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh);
139 	BUG_ON(err);
140 	gc_level = GC_LEVEL(sh.level);
141 	logical_segno = be32_to_cpu(sh.segno);
142 	if (sh.crc != logfs_crc32(&sh, sizeof(sh), 4)) {
143 		logfs_mark_segment_bad(sb, segno);
144 		cleaned = -1;
145 		goto out;
146 	}
147 
148 	for (seg_ofs = LOGFS_SEGMENT_HEADERSIZE;
149 			seg_ofs + sizeof(oh) < super->s_segsize; ) {
150 		ofs = dev_ofs(sb, logical_segno, seg_ofs);
151 		err = wbuf_read(sb, dev_ofs(sb, segno, seg_ofs), sizeof(oh),
152 				&oh);
153 		BUG_ON(err);
154 
155 		if (!memchr_inv(&oh, 0xff, sizeof(oh)))
156 			break;
157 
158 		if (oh.crc != logfs_crc32(&oh, sizeof(oh) - 4, 4)) {
159 			logfs_mark_segment_bad(sb, segno);
160 			cleaned = super->s_segsize - 1;
161 			goto out;
162 		}
163 
164 		ino = be64_to_cpu(oh.ino);
165 		bix = be64_to_cpu(oh.bix);
166 		len = sizeof(oh) + be16_to_cpu(oh.len);
167 		valid = logfs_is_valid_block(sb, ofs, ino, bix, gc_level);
168 		if (valid == 1) {
169 			logfs_cleanse_block(sb, ofs, ino, bix, gc_level);
170 			cleaned += len;
171 		} else if (valid == 2) {
172 			/* Will be invalid upon journal commit */
173 			cleaned += len;
174 		}
175 		seg_ofs += len;
176 	}
177 out:
178 	btree_remove32(&super->s_reserved_segments, segno);
179 	return cleaned;
180 }
181 
add_list(struct gc_candidate * cand,struct candidate_list * list)182 static struct gc_candidate *add_list(struct gc_candidate *cand,
183 		struct candidate_list *list)
184 {
185 	struct rb_node **p = &list->rb_tree.rb_node;
186 	struct rb_node *parent = NULL;
187 	struct gc_candidate *cur;
188 	int comp;
189 
190 	cand->list = list;
191 	while (*p) {
192 		parent = *p;
193 		cur = rb_entry(parent, struct gc_candidate, rb_node);
194 
195 		if (list->sort_by_ec)
196 			comp = cand->erase_count < cur->erase_count;
197 		else
198 			comp = cand->valid < cur->valid;
199 
200 		if (comp)
201 			p = &parent->rb_left;
202 		else
203 			p = &parent->rb_right;
204 	}
205 	rb_link_node(&cand->rb_node, parent, p);
206 	rb_insert_color(&cand->rb_node, &list->rb_tree);
207 
208 	if (list->count <= list->maxcount) {
209 		list->count++;
210 		return NULL;
211 	}
212 	cand = rb_entry(rb_last(&list->rb_tree), struct gc_candidate, rb_node);
213 	rb_erase(&cand->rb_node, &list->rb_tree);
214 	cand->list = NULL;
215 	return cand;
216 }
217 
remove_from_list(struct gc_candidate * cand)218 static void remove_from_list(struct gc_candidate *cand)
219 {
220 	struct candidate_list *list = cand->list;
221 
222 	rb_erase(&cand->rb_node, &list->rb_tree);
223 	list->count--;
224 }
225 
free_candidate(struct super_block * sb,struct gc_candidate * cand)226 static void free_candidate(struct super_block *sb, struct gc_candidate *cand)
227 {
228 	struct logfs_super *super = logfs_super(sb);
229 
230 	btree_remove32(&super->s_cand_tree, cand->segno);
231 	kfree(cand);
232 }
233 
get_best_cand(struct super_block * sb,struct candidate_list * list,u32 * ec)234 u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec)
235 {
236 	struct gc_candidate *cand;
237 	u32 segno;
238 
239 	BUG_ON(list->count == 0);
240 
241 	cand = rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
242 	remove_from_list(cand);
243 	segno = cand->segno;
244 	if (ec)
245 		*ec = cand->erase_count;
246 	free_candidate(sb, cand);
247 	return segno;
248 }
249 
250 /*
251  * We have several lists to manage segments with.  The reserve_list is used to
252  * deal with bad blocks.  We try to keep the best (lowest ec) segments on this
253  * list.
254  * The free_list contains free segments for normal usage.  It usually gets the
255  * second pick after the reserve_list.  But when the free_list is running short
256  * it is more important to keep the free_list full than to keep a reserve.
257  *
258  * Segments that are not free are put onto a per-level low_list.  If we have
259  * to run garbage collection, we pick a candidate from there.  All segments on
260  * those lists should have at least some free space so GC will make progress.
261  *
262  * And last we have the ec_list, which is used to pick segments for wear
263  * leveling.
264  *
265  * If all appropriate lists are full, we simply free the candidate and forget
266  * about that segment for a while.  We have better candidates for each purpose.
267  */
__add_candidate(struct super_block * sb,struct gc_candidate * cand)268 static void __add_candidate(struct super_block *sb, struct gc_candidate *cand)
269 {
270 	struct logfs_super *super = logfs_super(sb);
271 	u32 full = super->s_segsize - LOGFS_SEGMENT_RESERVE;
272 
273 	if (cand->valid == 0) {
274 		/* 100% free segments */
275 		log_gc_noisy("add reserve segment %x (ec %x) at %llx\n",
276 				cand->segno, cand->erase_count,
277 				dev_ofs(sb, cand->segno, 0));
278 		cand = add_list(cand, &super->s_reserve_list);
279 		if (cand) {
280 			log_gc_noisy("add free segment %x (ec %x) at %llx\n",
281 					cand->segno, cand->erase_count,
282 					dev_ofs(sb, cand->segno, 0));
283 			cand = add_list(cand, &super->s_free_list);
284 		}
285 	} else {
286 		/* good candidates for Garbage Collection */
287 		if (cand->valid < full)
288 			cand = add_list(cand, &super->s_low_list[cand->dist]);
289 		/* good candidates for wear leveling,
290 		 * segments that were recently written get ignored */
291 		if (cand)
292 			cand = add_list(cand, &super->s_ec_list);
293 	}
294 	if (cand)
295 		free_candidate(sb, cand);
296 }
297 
add_candidate(struct super_block * sb,u32 segno,u32 valid,u32 ec,u8 dist)298 static int add_candidate(struct super_block *sb, u32 segno, u32 valid, u32 ec,
299 		u8 dist)
300 {
301 	struct logfs_super *super = logfs_super(sb);
302 	struct gc_candidate *cand;
303 
304 	cand = kmalloc(sizeof(*cand), GFP_NOFS);
305 	if (!cand)
306 		return -ENOMEM;
307 
308 	cand->segno = segno;
309 	cand->valid = valid;
310 	cand->erase_count = ec;
311 	cand->dist = dist;
312 
313 	btree_insert32(&super->s_cand_tree, segno, cand, GFP_NOFS);
314 	__add_candidate(sb, cand);
315 	return 0;
316 }
317 
remove_segment_from_lists(struct super_block * sb,u32 segno)318 static void remove_segment_from_lists(struct super_block *sb, u32 segno)
319 {
320 	struct logfs_super *super = logfs_super(sb);
321 	struct gc_candidate *cand;
322 
323 	cand = btree_lookup32(&super->s_cand_tree, segno);
324 	if (cand) {
325 		remove_from_list(cand);
326 		free_candidate(sb, cand);
327 	}
328 }
329 
scan_segment(struct super_block * sb,u32 segno)330 static void scan_segment(struct super_block *sb, u32 segno)
331 {
332 	u32 valid, ec = 0;
333 	gc_level_t gc_level = 0;
334 	u8 dist;
335 
336 	if (segment_is_reserved(sb, segno))
337 		return;
338 
339 	remove_segment_from_lists(sb, segno);
340 	valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
341 	if (valid == RESERVED)
342 		return;
343 
344 	dist = root_distance(sb, gc_level);
345 	add_candidate(sb, segno, valid, ec, dist);
346 }
347 
first_in_list(struct candidate_list * list)348 static struct gc_candidate *first_in_list(struct candidate_list *list)
349 {
350 	if (list->count == 0)
351 		return NULL;
352 	return rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
353 }
354 
355 /*
356  * Find the best segment for garbage collection.  Main criterion is
357  * the segment requiring the least effort to clean.  Secondary
358  * criterion is to GC on the lowest level available.
359  *
360  * So we search the least effort segment on the lowest level first,
361  * then move up and pick another segment iff is requires significantly
362  * less effort.  Hence the LOGFS_MAX_OBJECTSIZE in the comparison.
363  */
get_candidate(struct super_block * sb)364 static struct gc_candidate *get_candidate(struct super_block *sb)
365 {
366 	struct logfs_super *super = logfs_super(sb);
367 	int i, max_dist;
368 	struct gc_candidate *cand = NULL, *this;
369 
370 	max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS);
371 
372 	for (i = max_dist; i >= 0; i--) {
373 		this = first_in_list(&super->s_low_list[i]);
374 		if (!this)
375 			continue;
376 		if (!cand)
377 			cand = this;
378 		if (this->valid + LOGFS_MAX_OBJECTSIZE <= cand->valid)
379 			cand = this;
380 	}
381 	return cand;
382 }
383 
__logfs_gc_once(struct super_block * sb,struct gc_candidate * cand)384 static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand)
385 {
386 	struct logfs_super *super = logfs_super(sb);
387 	gc_level_t gc_level;
388 	u32 cleaned, valid, segno, ec;
389 	u8 dist;
390 
391 	if (!cand) {
392 		log_gc("GC attempted, but no candidate found\n");
393 		return 0;
394 	}
395 
396 	segno = cand->segno;
397 	dist = cand->dist;
398 	valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
399 	free_candidate(sb, cand);
400 	log_gc("GC segment #%02x at %llx, %x required, %x free, %x valid, %llx free\n",
401 			segno, (u64)segno << super->s_segshift,
402 			dist, no_free_segments(sb), valid,
403 			super->s_free_bytes);
404 	cleaned = logfs_gc_segment(sb, segno);
405 	log_gc("GC segment #%02x complete - now %x valid\n", segno,
406 			valid - cleaned);
407 	BUG_ON(cleaned != valid);
408 	return 1;
409 }
410 
logfs_gc_once(struct super_block * sb)411 static int logfs_gc_once(struct super_block *sb)
412 {
413 	struct gc_candidate *cand;
414 
415 	cand = get_candidate(sb);
416 	if (cand)
417 		remove_from_list(cand);
418 	return __logfs_gc_once(sb, cand);
419 }
420 
421 /* returns 1 if a wrap occurs, 0 otherwise */
logfs_scan_some(struct super_block * sb)422 static int logfs_scan_some(struct super_block *sb)
423 {
424 	struct logfs_super *super = logfs_super(sb);
425 	u32 segno;
426 	int i, ret = 0;
427 
428 	segno = super->s_sweeper;
429 	for (i = SCAN_RATIO; i > 0; i--) {
430 		segno++;
431 		if (segno >= super->s_no_segs) {
432 			segno = 0;
433 			ret = 1;
434 			/* Break out of the loop.  We want to read a single
435 			 * block from the segment size on next invocation if
436 			 * SCAN_RATIO is set to match block size
437 			 */
438 			break;
439 		}
440 
441 		scan_segment(sb, segno);
442 	}
443 	super->s_sweeper = segno;
444 	return ret;
445 }
446 
447 /*
448  * In principle, this function should loop forever, looking for GC candidates
449  * and moving data.  LogFS is designed in such a way that this loop is
450  * guaranteed to terminate.
451  *
452  * Limiting the loop to some iterations serves purely to catch cases when
453  * these guarantees have failed.  An actual endless loop is an obvious bug
454  * and should be reported as such.
455  */
__logfs_gc_pass(struct super_block * sb,int target)456 static void __logfs_gc_pass(struct super_block *sb, int target)
457 {
458 	struct logfs_super *super = logfs_super(sb);
459 	struct logfs_block *block;
460 	int round, progress, last_progress = 0;
461 
462 	/*
463 	 * Doing too many changes to the segfile at once would result
464 	 * in a large number of aliases.  Write the journal before
465 	 * things get out of hand.
466 	 */
467 	if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES)
468 		logfs_write_anchor(sb);
469 
470 	if (no_free_segments(sb) >= target &&
471 			super->s_no_object_aliases < MAX_OBJ_ALIASES)
472 		return;
473 
474 	log_gc("__logfs_gc_pass(%x)\n", target);
475 	for (round = 0; round < SCAN_ROUNDS; ) {
476 		if (no_free_segments(sb) >= target)
477 			goto write_alias;
478 
479 		/* Sync in-memory state with on-medium state in case they
480 		 * diverged */
481 		logfs_write_anchor(sb);
482 		round += logfs_scan_some(sb);
483 		if (no_free_segments(sb) >= target)
484 			goto write_alias;
485 		progress = logfs_gc_once(sb);
486 		if (progress)
487 			last_progress = round;
488 		else if (round - last_progress > 2)
489 			break;
490 		continue;
491 
492 		/*
493 		 * The goto logic is nasty, I just don't know a better way to
494 		 * code it.  GC is supposed to ensure two things:
495 		 * 1. Enough free segments are available.
496 		 * 2. The number of aliases is bounded.
497 		 * When 1. is achieved, we take a look at 2. and write back
498 		 * some alias-containing blocks, if necessary.  However, after
499 		 * each such write we need to go back to 1., as writes can
500 		 * consume free segments.
501 		 */
502 write_alias:
503 		if (super->s_no_object_aliases < MAX_OBJ_ALIASES)
504 			return;
505 		if (list_empty(&super->s_object_alias)) {
506 			/* All aliases are still in btree */
507 			return;
508 		}
509 		log_gc("Write back one alias\n");
510 		block = list_entry(super->s_object_alias.next,
511 				struct logfs_block, alias_list);
512 		block->ops->write_block(block);
513 		/*
514 		 * To round off the nasty goto logic, we reset round here.  It
515 		 * is a safety-net for GC not making any progress and limited
516 		 * to something reasonably small.  If incremented it for every
517 		 * single alias, the loop could terminate rather quickly.
518 		 */
519 		round = 0;
520 	}
521 	LOGFS_BUG(sb);
522 }
523 
wl_ratelimit(struct super_block * sb,u64 * next_event)524 static int wl_ratelimit(struct super_block *sb, u64 *next_event)
525 {
526 	struct logfs_super *super = logfs_super(sb);
527 
528 	if (*next_event < super->s_gec) {
529 		*next_event = super->s_gec + WL_RATELIMIT;
530 		return 0;
531 	}
532 	return 1;
533 }
534 
logfs_wl_pass(struct super_block * sb)535 static void logfs_wl_pass(struct super_block *sb)
536 {
537 	struct logfs_super *super = logfs_super(sb);
538 	struct gc_candidate *wl_cand, *free_cand;
539 
540 	if (wl_ratelimit(sb, &super->s_wl_gec_ostore))
541 		return;
542 
543 	wl_cand = first_in_list(&super->s_ec_list);
544 	if (!wl_cand)
545 		return;
546 	free_cand = first_in_list(&super->s_free_list);
547 	if (!free_cand)
548 		return;
549 
550 	if (wl_cand->erase_count < free_cand->erase_count + WL_DELTA) {
551 		remove_from_list(wl_cand);
552 		__logfs_gc_once(sb, wl_cand);
553 	}
554 }
555 
556 /*
557  * The journal needs wear leveling as well.  But moving the journal is an
558  * expensive operation so we try to avoid it as much as possible.  And if we
559  * have to do it, we move the whole journal, not individual segments.
560  *
561  * Ratelimiting is not strictly necessary here, it mainly serves to avoid the
562  * calculations.  First we check whether moving the journal would be a
563  * significant improvement.  That means that a) the current journal segments
564  * have more wear than the future journal segments and b) the current journal
565  * segments have more wear than normal ostore segments.
566  * Rationale for b) is that we don't have to move the journal if it is aging
567  * less than the ostore, even if the reserve segments age even less (they are
568  * excluded from wear leveling, after all).
569  * Next we check that the superblocks have less wear than the journal.  Since
570  * moving the journal requires writing the superblocks, we have to protect the
571  * superblocks even more than the journal.
572  *
573  * Also we double the acceptable wear difference, compared to ostore wear
574  * leveling.  Journal data is read and rewritten rapidly, comparatively.  So
575  * soft errors have much less time to accumulate and we allow the journal to
576  * be a bit worse than the ostore.
577  */
logfs_journal_wl_pass(struct super_block * sb)578 static void logfs_journal_wl_pass(struct super_block *sb)
579 {
580 	struct logfs_super *super = logfs_super(sb);
581 	struct gc_candidate *cand;
582 	u32 min_journal_ec = -1, max_reserve_ec = 0;
583 	int i;
584 
585 	if (wl_ratelimit(sb, &super->s_wl_gec_journal))
586 		return;
587 
588 	if (super->s_reserve_list.count < super->s_no_journal_segs) {
589 		/* Reserve is not full enough to move complete journal */
590 		return;
591 	}
592 
593 	journal_for_each(i)
594 		if (super->s_journal_seg[i])
595 			min_journal_ec = min(min_journal_ec,
596 					super->s_journal_ec[i]);
597 	cand = rb_entry(rb_first(&super->s_free_list.rb_tree),
598 			struct gc_candidate, rb_node);
599 	max_reserve_ec = cand->erase_count;
600 	for (i = 0; i < 2; i++) {
601 		struct logfs_segment_entry se;
602 		u32 segno = seg_no(sb, super->s_sb_ofs[i]);
603 		u32 ec;
604 
605 		logfs_get_segment_entry(sb, segno, &se);
606 		ec = be32_to_cpu(se.ec_level) >> 4;
607 		max_reserve_ec = max(max_reserve_ec, ec);
608 	}
609 
610 	if (min_journal_ec > max_reserve_ec + 2 * WL_DELTA) {
611 		do_logfs_journal_wl_pass(sb);
612 	}
613 }
614 
logfs_gc_pass(struct super_block * sb)615 void logfs_gc_pass(struct super_block *sb)
616 {
617 	struct logfs_super *super = logfs_super(sb);
618 
619 	//BUG_ON(mutex_trylock(&logfs_super(sb)->s_w_mutex));
620 	/* Write journal before free space is getting saturated with dirty
621 	 * objects.
622 	 */
623 	if (super->s_dirty_used_bytes + super->s_dirty_free_bytes
624 			+ LOGFS_MAX_OBJECTSIZE >= super->s_free_bytes)
625 		logfs_write_anchor(sb);
626 	__logfs_gc_pass(sb, super->s_total_levels);
627 	logfs_wl_pass(sb);
628 	logfs_journal_wl_pass(sb);
629 }
630 
check_area(struct super_block * sb,int i)631 static int check_area(struct super_block *sb, int i)
632 {
633 	struct logfs_super *super = logfs_super(sb);
634 	struct logfs_area *area = super->s_area[i];
635 	gc_level_t gc_level;
636 	u32 cleaned, valid, ec;
637 	u32 segno = area->a_segno;
638 	u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
639 
640 	if (!area->a_is_open)
641 		return 0;
642 
643 	if (super->s_devops->can_write_buf(sb, ofs) == 0)
644 		return 0;
645 
646 	printk(KERN_INFO"LogFS: Possibly incomplete write at %llx\n", ofs);
647 	/*
648 	 * The device cannot write back the write buffer.  Most likely the
649 	 * wbuf was already written out and the system crashed at some point
650 	 * before the journal commit happened.  In that case we wouldn't have
651 	 * to do anything.  But if the crash happened before the wbuf was
652 	 * written out correctly, we must GC this segment.  So assume the
653 	 * worst and always do the GC run.
654 	 */
655 	area->a_is_open = 0;
656 	valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
657 	cleaned = logfs_gc_segment(sb, segno);
658 	if (cleaned != valid)
659 		return -EIO;
660 	return 0;
661 }
662 
logfs_check_areas(struct super_block * sb)663 int logfs_check_areas(struct super_block *sb)
664 {
665 	int i, err;
666 
667 	for_each_area(i) {
668 		err = check_area(sb, i);
669 		if (err)
670 			return err;
671 	}
672 	return 0;
673 }
674 
logfs_init_candlist(struct candidate_list * list,int maxcount,int sort_by_ec)675 static void logfs_init_candlist(struct candidate_list *list, int maxcount,
676 		int sort_by_ec)
677 {
678 	list->count = 0;
679 	list->maxcount = maxcount;
680 	list->sort_by_ec = sort_by_ec;
681 	list->rb_tree = RB_ROOT;
682 }
683 
logfs_init_gc(struct super_block * sb)684 int logfs_init_gc(struct super_block *sb)
685 {
686 	struct logfs_super *super = logfs_super(sb);
687 	int i;
688 
689 	btree_init_mempool32(&super->s_cand_tree, super->s_btree_pool);
690 	logfs_init_candlist(&super->s_free_list, LIST_SIZE + SCAN_RATIO, 1);
691 	logfs_init_candlist(&super->s_reserve_list,
692 			super->s_bad_seg_reserve, 1);
693 	for_each_area(i)
694 		logfs_init_candlist(&super->s_low_list[i], LIST_SIZE, 0);
695 	logfs_init_candlist(&super->s_ec_list, LIST_SIZE, 1);
696 	return 0;
697 }
698 
logfs_cleanup_list(struct super_block * sb,struct candidate_list * list)699 static void logfs_cleanup_list(struct super_block *sb,
700 		struct candidate_list *list)
701 {
702 	struct gc_candidate *cand;
703 
704 	while (list->count) {
705 		cand = rb_entry(list->rb_tree.rb_node, struct gc_candidate,
706 				rb_node);
707 		remove_from_list(cand);
708 		free_candidate(sb, cand);
709 	}
710 	BUG_ON(list->rb_tree.rb_node);
711 }
712 
logfs_cleanup_gc(struct super_block * sb)713 void logfs_cleanup_gc(struct super_block *sb)
714 {
715 	struct logfs_super *super = logfs_super(sb);
716 	int i;
717 
718 	if (!super->s_free_list.count)
719 		return;
720 
721 	/*
722 	 * FIXME: The btree may still contain a single empty node.  So we
723 	 * call the grim visitor to clean up that mess.  Btree code should
724 	 * do it for us, really.
725 	 */
726 	btree_grim_visitor32(&super->s_cand_tree, 0, NULL);
727 	logfs_cleanup_list(sb, &super->s_free_list);
728 	logfs_cleanup_list(sb, &super->s_reserve_list);
729 	for_each_area(i)
730 		logfs_cleanup_list(sb, &super->s_low_list[i]);
731 	logfs_cleanup_list(sb, &super->s_ec_list);
732 }
733