1 /*
2 * fs/logfs/gc.c - garbage collection code
3 *
4 * As should be obvious for Linux kernel code, license is GPLv2
5 *
6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7 */
8 #include "logfs.h"
9 #include <linux/sched.h>
10 #include <linux/slab.h>
11
12 /*
13 * Wear leveling needs to kick in when the difference between low erase
14 * counts and high erase counts gets too big. A good value for "too big"
15 * may be somewhat below 10% of maximum erase count for the device.
16 * Why not 397, to pick a nice round number with no specific meaning? :)
17 *
18 * WL_RATELIMIT is the minimum time between two wear level events. A huge
19 * number of segments may fulfil the requirements for wear leveling at the
20 * same time. If that happens we don't want to cause a latency from hell,
21 * but just gently pick one segment every so often and minimize overhead.
22 */
23 #define WL_DELTA 397
24 #define WL_RATELIMIT 100
25 #define MAX_OBJ_ALIASES 2600
26 #define SCAN_RATIO 512 /* number of scanned segments per gc'd segment */
27 #define LIST_SIZE 64 /* base size of candidate lists */
28 #define SCAN_ROUNDS 128 /* maximum number of complete medium scans */
29 #define SCAN_ROUNDS_HIGH 4 /* maximum number of higher-level scans */
30
no_free_segments(struct super_block * sb)31 static int no_free_segments(struct super_block *sb)
32 {
33 struct logfs_super *super = logfs_super(sb);
34
35 return super->s_free_list.count;
36 }
37
38 /* journal has distance -1, top-most ifile layer distance 0 */
root_distance(struct super_block * sb,gc_level_t __gc_level)39 static u8 root_distance(struct super_block *sb, gc_level_t __gc_level)
40 {
41 struct logfs_super *super = logfs_super(sb);
42 u8 gc_level = (__force u8)__gc_level;
43
44 switch (gc_level) {
45 case 0: /* fall through */
46 case 1: /* fall through */
47 case 2: /* fall through */
48 case 3:
49 /* file data or indirect blocks */
50 return super->s_ifile_levels + super->s_iblock_levels - gc_level;
51 case 6: /* fall through */
52 case 7: /* fall through */
53 case 8: /* fall through */
54 case 9:
55 /* inode file data or indirect blocks */
56 return super->s_ifile_levels - (gc_level - 6);
57 default:
58 printk(KERN_ERR"LOGFS: segment of unknown level %x found\n",
59 gc_level);
60 WARN_ON(1);
61 return super->s_ifile_levels + super->s_iblock_levels;
62 }
63 }
64
segment_is_reserved(struct super_block * sb,u32 segno)65 static int segment_is_reserved(struct super_block *sb, u32 segno)
66 {
67 struct logfs_super *super = logfs_super(sb);
68 struct logfs_area *area;
69 void *reserved;
70 int i;
71
72 /* Some segments are reserved. Just pretend they were all valid */
73 reserved = btree_lookup32(&super->s_reserved_segments, segno);
74 if (reserved)
75 return 1;
76
77 /* Currently open segments */
78 for_each_area(i) {
79 area = super->s_area[i];
80 if (area->a_is_open && area->a_segno == segno)
81 return 1;
82 }
83
84 return 0;
85 }
86
logfs_mark_segment_bad(struct super_block * sb,u32 segno)87 static void logfs_mark_segment_bad(struct super_block *sb, u32 segno)
88 {
89 BUG();
90 }
91
92 /*
93 * Returns the bytes consumed by valid objects in this segment. Object headers
94 * are counted, the segment header is not.
95 */
logfs_valid_bytes(struct super_block * sb,u32 segno,u32 * ec,gc_level_t * gc_level)96 static u32 logfs_valid_bytes(struct super_block *sb, u32 segno, u32 *ec,
97 gc_level_t *gc_level)
98 {
99 struct logfs_segment_entry se;
100 u32 ec_level;
101
102 logfs_get_segment_entry(sb, segno, &se);
103 if (se.ec_level == cpu_to_be32(BADSEG) ||
104 se.valid == cpu_to_be32(RESERVED))
105 return RESERVED;
106
107 ec_level = be32_to_cpu(se.ec_level);
108 *ec = ec_level >> 4;
109 *gc_level = GC_LEVEL(ec_level & 0xf);
110 return be32_to_cpu(se.valid);
111 }
112
logfs_cleanse_block(struct super_block * sb,u64 ofs,u64 ino,u64 bix,gc_level_t gc_level)113 static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
114 u64 bix, gc_level_t gc_level)
115 {
116 struct inode *inode;
117 int err, cookie;
118
119 inode = logfs_safe_iget(sb, ino, &cookie);
120 err = logfs_rewrite_block(inode, bix, ofs, gc_level, 0);
121 BUG_ON(err);
122 logfs_safe_iput(inode, cookie);
123 }
124
logfs_gc_segment(struct super_block * sb,u32 segno)125 static u32 logfs_gc_segment(struct super_block *sb, u32 segno)
126 {
127 struct logfs_super *super = logfs_super(sb);
128 struct logfs_segment_header sh;
129 struct logfs_object_header oh;
130 u64 ofs, ino, bix;
131 u32 seg_ofs, logical_segno, cleaned = 0;
132 int err, len, valid;
133 gc_level_t gc_level;
134
135 LOGFS_BUG_ON(segment_is_reserved(sb, segno), sb);
136
137 btree_insert32(&super->s_reserved_segments, segno, (void *)1, GFP_NOFS);
138 err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh);
139 BUG_ON(err);
140 gc_level = GC_LEVEL(sh.level);
141 logical_segno = be32_to_cpu(sh.segno);
142 if (sh.crc != logfs_crc32(&sh, sizeof(sh), 4)) {
143 logfs_mark_segment_bad(sb, segno);
144 cleaned = -1;
145 goto out;
146 }
147
148 for (seg_ofs = LOGFS_SEGMENT_HEADERSIZE;
149 seg_ofs + sizeof(oh) < super->s_segsize; ) {
150 ofs = dev_ofs(sb, logical_segno, seg_ofs);
151 err = wbuf_read(sb, dev_ofs(sb, segno, seg_ofs), sizeof(oh),
152 &oh);
153 BUG_ON(err);
154
155 if (!memchr_inv(&oh, 0xff, sizeof(oh)))
156 break;
157
158 if (oh.crc != logfs_crc32(&oh, sizeof(oh) - 4, 4)) {
159 logfs_mark_segment_bad(sb, segno);
160 cleaned = super->s_segsize - 1;
161 goto out;
162 }
163
164 ino = be64_to_cpu(oh.ino);
165 bix = be64_to_cpu(oh.bix);
166 len = sizeof(oh) + be16_to_cpu(oh.len);
167 valid = logfs_is_valid_block(sb, ofs, ino, bix, gc_level);
168 if (valid == 1) {
169 logfs_cleanse_block(sb, ofs, ino, bix, gc_level);
170 cleaned += len;
171 } else if (valid == 2) {
172 /* Will be invalid upon journal commit */
173 cleaned += len;
174 }
175 seg_ofs += len;
176 }
177 out:
178 btree_remove32(&super->s_reserved_segments, segno);
179 return cleaned;
180 }
181
add_list(struct gc_candidate * cand,struct candidate_list * list)182 static struct gc_candidate *add_list(struct gc_candidate *cand,
183 struct candidate_list *list)
184 {
185 struct rb_node **p = &list->rb_tree.rb_node;
186 struct rb_node *parent = NULL;
187 struct gc_candidate *cur;
188 int comp;
189
190 cand->list = list;
191 while (*p) {
192 parent = *p;
193 cur = rb_entry(parent, struct gc_candidate, rb_node);
194
195 if (list->sort_by_ec)
196 comp = cand->erase_count < cur->erase_count;
197 else
198 comp = cand->valid < cur->valid;
199
200 if (comp)
201 p = &parent->rb_left;
202 else
203 p = &parent->rb_right;
204 }
205 rb_link_node(&cand->rb_node, parent, p);
206 rb_insert_color(&cand->rb_node, &list->rb_tree);
207
208 if (list->count <= list->maxcount) {
209 list->count++;
210 return NULL;
211 }
212 cand = rb_entry(rb_last(&list->rb_tree), struct gc_candidate, rb_node);
213 rb_erase(&cand->rb_node, &list->rb_tree);
214 cand->list = NULL;
215 return cand;
216 }
217
remove_from_list(struct gc_candidate * cand)218 static void remove_from_list(struct gc_candidate *cand)
219 {
220 struct candidate_list *list = cand->list;
221
222 rb_erase(&cand->rb_node, &list->rb_tree);
223 list->count--;
224 }
225
free_candidate(struct super_block * sb,struct gc_candidate * cand)226 static void free_candidate(struct super_block *sb, struct gc_candidate *cand)
227 {
228 struct logfs_super *super = logfs_super(sb);
229
230 btree_remove32(&super->s_cand_tree, cand->segno);
231 kfree(cand);
232 }
233
get_best_cand(struct super_block * sb,struct candidate_list * list,u32 * ec)234 u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec)
235 {
236 struct gc_candidate *cand;
237 u32 segno;
238
239 BUG_ON(list->count == 0);
240
241 cand = rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
242 remove_from_list(cand);
243 segno = cand->segno;
244 if (ec)
245 *ec = cand->erase_count;
246 free_candidate(sb, cand);
247 return segno;
248 }
249
250 /*
251 * We have several lists to manage segments with. The reserve_list is used to
252 * deal with bad blocks. We try to keep the best (lowest ec) segments on this
253 * list.
254 * The free_list contains free segments for normal usage. It usually gets the
255 * second pick after the reserve_list. But when the free_list is running short
256 * it is more important to keep the free_list full than to keep a reserve.
257 *
258 * Segments that are not free are put onto a per-level low_list. If we have
259 * to run garbage collection, we pick a candidate from there. All segments on
260 * those lists should have at least some free space so GC will make progress.
261 *
262 * And last we have the ec_list, which is used to pick segments for wear
263 * leveling.
264 *
265 * If all appropriate lists are full, we simply free the candidate and forget
266 * about that segment for a while. We have better candidates for each purpose.
267 */
__add_candidate(struct super_block * sb,struct gc_candidate * cand)268 static void __add_candidate(struct super_block *sb, struct gc_candidate *cand)
269 {
270 struct logfs_super *super = logfs_super(sb);
271 u32 full = super->s_segsize - LOGFS_SEGMENT_RESERVE;
272
273 if (cand->valid == 0) {
274 /* 100% free segments */
275 log_gc_noisy("add reserve segment %x (ec %x) at %llx\n",
276 cand->segno, cand->erase_count,
277 dev_ofs(sb, cand->segno, 0));
278 cand = add_list(cand, &super->s_reserve_list);
279 if (cand) {
280 log_gc_noisy("add free segment %x (ec %x) at %llx\n",
281 cand->segno, cand->erase_count,
282 dev_ofs(sb, cand->segno, 0));
283 cand = add_list(cand, &super->s_free_list);
284 }
285 } else {
286 /* good candidates for Garbage Collection */
287 if (cand->valid < full)
288 cand = add_list(cand, &super->s_low_list[cand->dist]);
289 /* good candidates for wear leveling,
290 * segments that were recently written get ignored */
291 if (cand)
292 cand = add_list(cand, &super->s_ec_list);
293 }
294 if (cand)
295 free_candidate(sb, cand);
296 }
297
add_candidate(struct super_block * sb,u32 segno,u32 valid,u32 ec,u8 dist)298 static int add_candidate(struct super_block *sb, u32 segno, u32 valid, u32 ec,
299 u8 dist)
300 {
301 struct logfs_super *super = logfs_super(sb);
302 struct gc_candidate *cand;
303
304 cand = kmalloc(sizeof(*cand), GFP_NOFS);
305 if (!cand)
306 return -ENOMEM;
307
308 cand->segno = segno;
309 cand->valid = valid;
310 cand->erase_count = ec;
311 cand->dist = dist;
312
313 btree_insert32(&super->s_cand_tree, segno, cand, GFP_NOFS);
314 __add_candidate(sb, cand);
315 return 0;
316 }
317
remove_segment_from_lists(struct super_block * sb,u32 segno)318 static void remove_segment_from_lists(struct super_block *sb, u32 segno)
319 {
320 struct logfs_super *super = logfs_super(sb);
321 struct gc_candidate *cand;
322
323 cand = btree_lookup32(&super->s_cand_tree, segno);
324 if (cand) {
325 remove_from_list(cand);
326 free_candidate(sb, cand);
327 }
328 }
329
scan_segment(struct super_block * sb,u32 segno)330 static void scan_segment(struct super_block *sb, u32 segno)
331 {
332 u32 valid, ec = 0;
333 gc_level_t gc_level = 0;
334 u8 dist;
335
336 if (segment_is_reserved(sb, segno))
337 return;
338
339 remove_segment_from_lists(sb, segno);
340 valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
341 if (valid == RESERVED)
342 return;
343
344 dist = root_distance(sb, gc_level);
345 add_candidate(sb, segno, valid, ec, dist);
346 }
347
first_in_list(struct candidate_list * list)348 static struct gc_candidate *first_in_list(struct candidate_list *list)
349 {
350 if (list->count == 0)
351 return NULL;
352 return rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
353 }
354
355 /*
356 * Find the best segment for garbage collection. Main criterion is
357 * the segment requiring the least effort to clean. Secondary
358 * criterion is to GC on the lowest level available.
359 *
360 * So we search the least effort segment on the lowest level first,
361 * then move up and pick another segment iff is requires significantly
362 * less effort. Hence the LOGFS_MAX_OBJECTSIZE in the comparison.
363 */
get_candidate(struct super_block * sb)364 static struct gc_candidate *get_candidate(struct super_block *sb)
365 {
366 struct logfs_super *super = logfs_super(sb);
367 int i, max_dist;
368 struct gc_candidate *cand = NULL, *this;
369
370 max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS);
371
372 for (i = max_dist; i >= 0; i--) {
373 this = first_in_list(&super->s_low_list[i]);
374 if (!this)
375 continue;
376 if (!cand)
377 cand = this;
378 if (this->valid + LOGFS_MAX_OBJECTSIZE <= cand->valid)
379 cand = this;
380 }
381 return cand;
382 }
383
__logfs_gc_once(struct super_block * sb,struct gc_candidate * cand)384 static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand)
385 {
386 struct logfs_super *super = logfs_super(sb);
387 gc_level_t gc_level;
388 u32 cleaned, valid, segno, ec;
389 u8 dist;
390
391 if (!cand) {
392 log_gc("GC attempted, but no candidate found\n");
393 return 0;
394 }
395
396 segno = cand->segno;
397 dist = cand->dist;
398 valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
399 free_candidate(sb, cand);
400 log_gc("GC segment #%02x at %llx, %x required, %x free, %x valid, %llx free\n",
401 segno, (u64)segno << super->s_segshift,
402 dist, no_free_segments(sb), valid,
403 super->s_free_bytes);
404 cleaned = logfs_gc_segment(sb, segno);
405 log_gc("GC segment #%02x complete - now %x valid\n", segno,
406 valid - cleaned);
407 BUG_ON(cleaned != valid);
408 return 1;
409 }
410
logfs_gc_once(struct super_block * sb)411 static int logfs_gc_once(struct super_block *sb)
412 {
413 struct gc_candidate *cand;
414
415 cand = get_candidate(sb);
416 if (cand)
417 remove_from_list(cand);
418 return __logfs_gc_once(sb, cand);
419 }
420
421 /* returns 1 if a wrap occurs, 0 otherwise */
logfs_scan_some(struct super_block * sb)422 static int logfs_scan_some(struct super_block *sb)
423 {
424 struct logfs_super *super = logfs_super(sb);
425 u32 segno;
426 int i, ret = 0;
427
428 segno = super->s_sweeper;
429 for (i = SCAN_RATIO; i > 0; i--) {
430 segno++;
431 if (segno >= super->s_no_segs) {
432 segno = 0;
433 ret = 1;
434 /* Break out of the loop. We want to read a single
435 * block from the segment size on next invocation if
436 * SCAN_RATIO is set to match block size
437 */
438 break;
439 }
440
441 scan_segment(sb, segno);
442 }
443 super->s_sweeper = segno;
444 return ret;
445 }
446
447 /*
448 * In principle, this function should loop forever, looking for GC candidates
449 * and moving data. LogFS is designed in such a way that this loop is
450 * guaranteed to terminate.
451 *
452 * Limiting the loop to some iterations serves purely to catch cases when
453 * these guarantees have failed. An actual endless loop is an obvious bug
454 * and should be reported as such.
455 */
__logfs_gc_pass(struct super_block * sb,int target)456 static void __logfs_gc_pass(struct super_block *sb, int target)
457 {
458 struct logfs_super *super = logfs_super(sb);
459 struct logfs_block *block;
460 int round, progress, last_progress = 0;
461
462 /*
463 * Doing too many changes to the segfile at once would result
464 * in a large number of aliases. Write the journal before
465 * things get out of hand.
466 */
467 if (super->s_shadow_tree.no_shadowed_segments >= MAX_OBJ_ALIASES)
468 logfs_write_anchor(sb);
469
470 if (no_free_segments(sb) >= target &&
471 super->s_no_object_aliases < MAX_OBJ_ALIASES)
472 return;
473
474 log_gc("__logfs_gc_pass(%x)\n", target);
475 for (round = 0; round < SCAN_ROUNDS; ) {
476 if (no_free_segments(sb) >= target)
477 goto write_alias;
478
479 /* Sync in-memory state with on-medium state in case they
480 * diverged */
481 logfs_write_anchor(sb);
482 round += logfs_scan_some(sb);
483 if (no_free_segments(sb) >= target)
484 goto write_alias;
485 progress = logfs_gc_once(sb);
486 if (progress)
487 last_progress = round;
488 else if (round - last_progress > 2)
489 break;
490 continue;
491
492 /*
493 * The goto logic is nasty, I just don't know a better way to
494 * code it. GC is supposed to ensure two things:
495 * 1. Enough free segments are available.
496 * 2. The number of aliases is bounded.
497 * When 1. is achieved, we take a look at 2. and write back
498 * some alias-containing blocks, if necessary. However, after
499 * each such write we need to go back to 1., as writes can
500 * consume free segments.
501 */
502 write_alias:
503 if (super->s_no_object_aliases < MAX_OBJ_ALIASES)
504 return;
505 if (list_empty(&super->s_object_alias)) {
506 /* All aliases are still in btree */
507 return;
508 }
509 log_gc("Write back one alias\n");
510 block = list_entry(super->s_object_alias.next,
511 struct logfs_block, alias_list);
512 block->ops->write_block(block);
513 /*
514 * To round off the nasty goto logic, we reset round here. It
515 * is a safety-net for GC not making any progress and limited
516 * to something reasonably small. If incremented it for every
517 * single alias, the loop could terminate rather quickly.
518 */
519 round = 0;
520 }
521 LOGFS_BUG(sb);
522 }
523
wl_ratelimit(struct super_block * sb,u64 * next_event)524 static int wl_ratelimit(struct super_block *sb, u64 *next_event)
525 {
526 struct logfs_super *super = logfs_super(sb);
527
528 if (*next_event < super->s_gec) {
529 *next_event = super->s_gec + WL_RATELIMIT;
530 return 0;
531 }
532 return 1;
533 }
534
logfs_wl_pass(struct super_block * sb)535 static void logfs_wl_pass(struct super_block *sb)
536 {
537 struct logfs_super *super = logfs_super(sb);
538 struct gc_candidate *wl_cand, *free_cand;
539
540 if (wl_ratelimit(sb, &super->s_wl_gec_ostore))
541 return;
542
543 wl_cand = first_in_list(&super->s_ec_list);
544 if (!wl_cand)
545 return;
546 free_cand = first_in_list(&super->s_free_list);
547 if (!free_cand)
548 return;
549
550 if (wl_cand->erase_count < free_cand->erase_count + WL_DELTA) {
551 remove_from_list(wl_cand);
552 __logfs_gc_once(sb, wl_cand);
553 }
554 }
555
556 /*
557 * The journal needs wear leveling as well. But moving the journal is an
558 * expensive operation so we try to avoid it as much as possible. And if we
559 * have to do it, we move the whole journal, not individual segments.
560 *
561 * Ratelimiting is not strictly necessary here, it mainly serves to avoid the
562 * calculations. First we check whether moving the journal would be a
563 * significant improvement. That means that a) the current journal segments
564 * have more wear than the future journal segments and b) the current journal
565 * segments have more wear than normal ostore segments.
566 * Rationale for b) is that we don't have to move the journal if it is aging
567 * less than the ostore, even if the reserve segments age even less (they are
568 * excluded from wear leveling, after all).
569 * Next we check that the superblocks have less wear than the journal. Since
570 * moving the journal requires writing the superblocks, we have to protect the
571 * superblocks even more than the journal.
572 *
573 * Also we double the acceptable wear difference, compared to ostore wear
574 * leveling. Journal data is read and rewritten rapidly, comparatively. So
575 * soft errors have much less time to accumulate and we allow the journal to
576 * be a bit worse than the ostore.
577 */
logfs_journal_wl_pass(struct super_block * sb)578 static void logfs_journal_wl_pass(struct super_block *sb)
579 {
580 struct logfs_super *super = logfs_super(sb);
581 struct gc_candidate *cand;
582 u32 min_journal_ec = -1, max_reserve_ec = 0;
583 int i;
584
585 if (wl_ratelimit(sb, &super->s_wl_gec_journal))
586 return;
587
588 if (super->s_reserve_list.count < super->s_no_journal_segs) {
589 /* Reserve is not full enough to move complete journal */
590 return;
591 }
592
593 journal_for_each(i)
594 if (super->s_journal_seg[i])
595 min_journal_ec = min(min_journal_ec,
596 super->s_journal_ec[i]);
597 cand = rb_entry(rb_first(&super->s_free_list.rb_tree),
598 struct gc_candidate, rb_node);
599 max_reserve_ec = cand->erase_count;
600 for (i = 0; i < 2; i++) {
601 struct logfs_segment_entry se;
602 u32 segno = seg_no(sb, super->s_sb_ofs[i]);
603 u32 ec;
604
605 logfs_get_segment_entry(sb, segno, &se);
606 ec = be32_to_cpu(se.ec_level) >> 4;
607 max_reserve_ec = max(max_reserve_ec, ec);
608 }
609
610 if (min_journal_ec > max_reserve_ec + 2 * WL_DELTA) {
611 do_logfs_journal_wl_pass(sb);
612 }
613 }
614
logfs_gc_pass(struct super_block * sb)615 void logfs_gc_pass(struct super_block *sb)
616 {
617 struct logfs_super *super = logfs_super(sb);
618
619 //BUG_ON(mutex_trylock(&logfs_super(sb)->s_w_mutex));
620 /* Write journal before free space is getting saturated with dirty
621 * objects.
622 */
623 if (super->s_dirty_used_bytes + super->s_dirty_free_bytes
624 + LOGFS_MAX_OBJECTSIZE >= super->s_free_bytes)
625 logfs_write_anchor(sb);
626 __logfs_gc_pass(sb, super->s_total_levels);
627 logfs_wl_pass(sb);
628 logfs_journal_wl_pass(sb);
629 }
630
check_area(struct super_block * sb,int i)631 static int check_area(struct super_block *sb, int i)
632 {
633 struct logfs_super *super = logfs_super(sb);
634 struct logfs_area *area = super->s_area[i];
635 gc_level_t gc_level;
636 u32 cleaned, valid, ec;
637 u32 segno = area->a_segno;
638 u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
639
640 if (!area->a_is_open)
641 return 0;
642
643 if (super->s_devops->can_write_buf(sb, ofs) == 0)
644 return 0;
645
646 printk(KERN_INFO"LogFS: Possibly incomplete write at %llx\n", ofs);
647 /*
648 * The device cannot write back the write buffer. Most likely the
649 * wbuf was already written out and the system crashed at some point
650 * before the journal commit happened. In that case we wouldn't have
651 * to do anything. But if the crash happened before the wbuf was
652 * written out correctly, we must GC this segment. So assume the
653 * worst and always do the GC run.
654 */
655 area->a_is_open = 0;
656 valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
657 cleaned = logfs_gc_segment(sb, segno);
658 if (cleaned != valid)
659 return -EIO;
660 return 0;
661 }
662
logfs_check_areas(struct super_block * sb)663 int logfs_check_areas(struct super_block *sb)
664 {
665 int i, err;
666
667 for_each_area(i) {
668 err = check_area(sb, i);
669 if (err)
670 return err;
671 }
672 return 0;
673 }
674
logfs_init_candlist(struct candidate_list * list,int maxcount,int sort_by_ec)675 static void logfs_init_candlist(struct candidate_list *list, int maxcount,
676 int sort_by_ec)
677 {
678 list->count = 0;
679 list->maxcount = maxcount;
680 list->sort_by_ec = sort_by_ec;
681 list->rb_tree = RB_ROOT;
682 }
683
logfs_init_gc(struct super_block * sb)684 int logfs_init_gc(struct super_block *sb)
685 {
686 struct logfs_super *super = logfs_super(sb);
687 int i;
688
689 btree_init_mempool32(&super->s_cand_tree, super->s_btree_pool);
690 logfs_init_candlist(&super->s_free_list, LIST_SIZE + SCAN_RATIO, 1);
691 logfs_init_candlist(&super->s_reserve_list,
692 super->s_bad_seg_reserve, 1);
693 for_each_area(i)
694 logfs_init_candlist(&super->s_low_list[i], LIST_SIZE, 0);
695 logfs_init_candlist(&super->s_ec_list, LIST_SIZE, 1);
696 return 0;
697 }
698
logfs_cleanup_list(struct super_block * sb,struct candidate_list * list)699 static void logfs_cleanup_list(struct super_block *sb,
700 struct candidate_list *list)
701 {
702 struct gc_candidate *cand;
703
704 while (list->count) {
705 cand = rb_entry(list->rb_tree.rb_node, struct gc_candidate,
706 rb_node);
707 remove_from_list(cand);
708 free_candidate(sb, cand);
709 }
710 BUG_ON(list->rb_tree.rb_node);
711 }
712
logfs_cleanup_gc(struct super_block * sb)713 void logfs_cleanup_gc(struct super_block *sb)
714 {
715 struct logfs_super *super = logfs_super(sb);
716 int i;
717
718 if (!super->s_free_list.count)
719 return;
720
721 /*
722 * FIXME: The btree may still contain a single empty node. So we
723 * call the grim visitor to clean up that mess. Btree code should
724 * do it for us, really.
725 */
726 btree_grim_visitor32(&super->s_cand_tree, 0, NULL);
727 logfs_cleanup_list(sb, &super->s_free_list);
728 logfs_cleanup_list(sb, &super->s_reserve_list);
729 for_each_area(i)
730 logfs_cleanup_list(sb, &super->s_low_list[i]);
731 logfs_cleanup_list(sb, &super->s_ec_list);
732 }
733