1 /*
2 * Copyright (C) 2011 Red Hat, Inc.
3 *
4 * This file is released under the GPL.
5 */
6
7 #include "dm-space-map-common.h"
8 #include "dm-transaction-manager.h"
9 #include "dm-btree-internal.h"
10 #include "dm-persistent-data-internal.h"
11
12 #include <linux/bitops.h>
13 #include <linux/device-mapper.h>
14
15 #define DM_MSG_PREFIX "space map common"
16
17 /*----------------------------------------------------------------*/
18
19 /*
20 * Index validator.
21 */
22 #define INDEX_CSUM_XOR 160478
23
index_prepare_for_write(struct dm_block_validator * v,struct dm_block * b,size_t block_size)24 static void index_prepare_for_write(struct dm_block_validator *v,
25 struct dm_block *b,
26 size_t block_size)
27 {
28 struct disk_metadata_index *mi_le = dm_block_data(b);
29
30 mi_le->blocknr = cpu_to_le64(dm_block_location(b));
31 mi_le->csum = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
32 block_size - sizeof(__le32),
33 INDEX_CSUM_XOR));
34 }
35
index_check(struct dm_block_validator * v,struct dm_block * b,size_t block_size)36 static int index_check(struct dm_block_validator *v,
37 struct dm_block *b,
38 size_t block_size)
39 {
40 struct disk_metadata_index *mi_le = dm_block_data(b);
41 __le32 csum_disk;
42
43 if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) {
44 DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu",
45 le64_to_cpu(mi_le->blocknr), dm_block_location(b));
46 return -ENOTBLK;
47 }
48
49 csum_disk = cpu_to_le32(dm_bm_checksum(&mi_le->padding,
50 block_size - sizeof(__le32),
51 INDEX_CSUM_XOR));
52 if (csum_disk != mi_le->csum) {
53 DMERR_LIMIT("index_check failed: csum %u != wanted %u",
54 le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum));
55 return -EILSEQ;
56 }
57
58 return 0;
59 }
60
61 static struct dm_block_validator index_validator = {
62 .name = "index",
63 .prepare_for_write = index_prepare_for_write,
64 .check = index_check
65 };
66
67 /*----------------------------------------------------------------*/
68
69 /*
70 * Bitmap validator
71 */
72 #define BITMAP_CSUM_XOR 240779
73
dm_bitmap_prepare_for_write(struct dm_block_validator * v,struct dm_block * b,size_t block_size)74 static void dm_bitmap_prepare_for_write(struct dm_block_validator *v,
75 struct dm_block *b,
76 size_t block_size)
77 {
78 struct disk_bitmap_header *disk_header = dm_block_data(b);
79
80 disk_header->blocknr = cpu_to_le64(dm_block_location(b));
81 disk_header->csum = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
82 block_size - sizeof(__le32),
83 BITMAP_CSUM_XOR));
84 }
85
dm_bitmap_check(struct dm_block_validator * v,struct dm_block * b,size_t block_size)86 static int dm_bitmap_check(struct dm_block_validator *v,
87 struct dm_block *b,
88 size_t block_size)
89 {
90 struct disk_bitmap_header *disk_header = dm_block_data(b);
91 __le32 csum_disk;
92
93 if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) {
94 DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu",
95 le64_to_cpu(disk_header->blocknr), dm_block_location(b));
96 return -ENOTBLK;
97 }
98
99 csum_disk = cpu_to_le32(dm_bm_checksum(&disk_header->not_used,
100 block_size - sizeof(__le32),
101 BITMAP_CSUM_XOR));
102 if (csum_disk != disk_header->csum) {
103 DMERR_LIMIT("bitmap check failed: csum %u != wanted %u",
104 le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum));
105 return -EILSEQ;
106 }
107
108 return 0;
109 }
110
111 static struct dm_block_validator dm_sm_bitmap_validator = {
112 .name = "sm_bitmap",
113 .prepare_for_write = dm_bitmap_prepare_for_write,
114 .check = dm_bitmap_check,
115 };
116
117 /*----------------------------------------------------------------*/
118
119 #define ENTRIES_PER_WORD 32
120 #define ENTRIES_SHIFT 5
121
dm_bitmap_data(struct dm_block * b)122 static void *dm_bitmap_data(struct dm_block *b)
123 {
124 return dm_block_data(b) + sizeof(struct disk_bitmap_header);
125 }
126
127 #define WORD_MASK_HIGH 0xAAAAAAAAAAAAAAAAULL
128
dm_bitmap_word_used(void * addr,unsigned b)129 static unsigned dm_bitmap_word_used(void *addr, unsigned b)
130 {
131 __le64 *words_le = addr;
132 __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
133
134 uint64_t bits = le64_to_cpu(*w_le);
135 uint64_t mask = (bits + WORD_MASK_HIGH + 1) & WORD_MASK_HIGH;
136
137 return !(~bits & mask);
138 }
139
sm_lookup_bitmap(void * addr,unsigned b)140 static unsigned sm_lookup_bitmap(void *addr, unsigned b)
141 {
142 __le64 *words_le = addr;
143 __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
144 unsigned hi, lo;
145
146 b = (b & (ENTRIES_PER_WORD - 1)) << 1;
147 hi = !!test_bit_le(b, (void *) w_le);
148 lo = !!test_bit_le(b + 1, (void *) w_le);
149 return (hi << 1) | lo;
150 }
151
sm_set_bitmap(void * addr,unsigned b,unsigned val)152 static void sm_set_bitmap(void *addr, unsigned b, unsigned val)
153 {
154 __le64 *words_le = addr;
155 __le64 *w_le = words_le + (b >> ENTRIES_SHIFT);
156
157 b = (b & (ENTRIES_PER_WORD - 1)) << 1;
158
159 if (val & 2)
160 __set_bit_le(b, (void *) w_le);
161 else
162 __clear_bit_le(b, (void *) w_le);
163
164 if (val & 1)
165 __set_bit_le(b + 1, (void *) w_le);
166 else
167 __clear_bit_le(b + 1, (void *) w_le);
168 }
169
sm_find_free(void * addr,unsigned begin,unsigned end,unsigned * result)170 static int sm_find_free(void *addr, unsigned begin, unsigned end,
171 unsigned *result)
172 {
173 while (begin < end) {
174 if (!(begin & (ENTRIES_PER_WORD - 1)) &&
175 dm_bitmap_word_used(addr, begin)) {
176 begin += ENTRIES_PER_WORD;
177 continue;
178 }
179
180 if (!sm_lookup_bitmap(addr, begin)) {
181 *result = begin;
182 return 0;
183 }
184
185 begin++;
186 }
187
188 return -ENOSPC;
189 }
190
191 /*----------------------------------------------------------------*/
192
sm_ll_init(struct ll_disk * ll,struct dm_transaction_manager * tm)193 static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
194 {
195 memset(ll, 0, sizeof(struct ll_disk));
196
197 ll->tm = tm;
198
199 ll->bitmap_info.tm = tm;
200 ll->bitmap_info.levels = 1;
201
202 /*
203 * Because the new bitmap blocks are created via a shadow
204 * operation, the old entry has already had its reference count
205 * decremented and we don't need the btree to do any bookkeeping.
206 */
207 ll->bitmap_info.value_type.size = sizeof(struct disk_index_entry);
208 ll->bitmap_info.value_type.inc = NULL;
209 ll->bitmap_info.value_type.dec = NULL;
210 ll->bitmap_info.value_type.equal = NULL;
211
212 ll->ref_count_info.tm = tm;
213 ll->ref_count_info.levels = 1;
214 ll->ref_count_info.value_type.size = sizeof(uint32_t);
215 ll->ref_count_info.value_type.inc = NULL;
216 ll->ref_count_info.value_type.dec = NULL;
217 ll->ref_count_info.value_type.equal = NULL;
218
219 ll->block_size = dm_bm_block_size(dm_tm_get_bm(tm));
220
221 if (ll->block_size > (1 << 30)) {
222 DMERR("block size too big to hold bitmaps");
223 return -EINVAL;
224 }
225
226 ll->entries_per_block = (ll->block_size - sizeof(struct disk_bitmap_header)) *
227 ENTRIES_PER_BYTE;
228 ll->nr_blocks = 0;
229 ll->bitmap_root = 0;
230 ll->ref_count_root = 0;
231 ll->bitmap_index_changed = false;
232
233 return 0;
234 }
235
sm_ll_extend(struct ll_disk * ll,dm_block_t extra_blocks)236 int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
237 {
238 int r;
239 dm_block_t i, nr_blocks, nr_indexes;
240 unsigned old_blocks, blocks;
241
242 nr_blocks = ll->nr_blocks + extra_blocks;
243 old_blocks = dm_sector_div_up(ll->nr_blocks, ll->entries_per_block);
244 blocks = dm_sector_div_up(nr_blocks, ll->entries_per_block);
245
246 nr_indexes = dm_sector_div_up(nr_blocks, ll->entries_per_block);
247 if (nr_indexes > ll->max_entries(ll)) {
248 DMERR("space map too large");
249 return -EINVAL;
250 }
251
252 /*
253 * We need to set this before the dm_tm_new_block() call below.
254 */
255 ll->nr_blocks = nr_blocks;
256 for (i = old_blocks; i < blocks; i++) {
257 struct dm_block *b;
258 struct disk_index_entry idx;
259
260 r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
261 if (r < 0)
262 return r;
263
264 idx.blocknr = cpu_to_le64(dm_block_location(b));
265
266 dm_tm_unlock(ll->tm, b);
267
268 idx.nr_free = cpu_to_le32(ll->entries_per_block);
269 idx.none_free_before = 0;
270
271 r = ll->save_ie(ll, i, &idx);
272 if (r < 0)
273 return r;
274 }
275
276 return 0;
277 }
278
sm_ll_lookup_bitmap(struct ll_disk * ll,dm_block_t b,uint32_t * result)279 int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
280 {
281 int r;
282 dm_block_t index = b;
283 struct disk_index_entry ie_disk;
284 struct dm_block *blk;
285
286 if (b >= ll->nr_blocks) {
287 DMERR_LIMIT("metadata block out of bounds");
288 return -EINVAL;
289 }
290
291 b = do_div(index, ll->entries_per_block);
292 r = ll->load_ie(ll, index, &ie_disk);
293 if (r < 0)
294 return r;
295
296 r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
297 &dm_sm_bitmap_validator, &blk);
298 if (r < 0)
299 return r;
300
301 *result = sm_lookup_bitmap(dm_bitmap_data(blk), b);
302
303 dm_tm_unlock(ll->tm, blk);
304
305 return 0;
306 }
307
sm_ll_lookup_big_ref_count(struct ll_disk * ll,dm_block_t b,uint32_t * result)308 static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
309 uint32_t *result)
310 {
311 __le32 le_rc;
312 int r;
313
314 r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
315 if (r < 0)
316 return r;
317
318 *result = le32_to_cpu(le_rc);
319
320 return r;
321 }
322
sm_ll_lookup(struct ll_disk * ll,dm_block_t b,uint32_t * result)323 int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
324 {
325 int r = sm_ll_lookup_bitmap(ll, b, result);
326
327 if (r)
328 return r;
329
330 if (*result != 3)
331 return r;
332
333 return sm_ll_lookup_big_ref_count(ll, b, result);
334 }
335
sm_ll_find_free_block(struct ll_disk * ll,dm_block_t begin,dm_block_t end,dm_block_t * result)336 int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
337 dm_block_t end, dm_block_t *result)
338 {
339 int r;
340 struct disk_index_entry ie_disk;
341 dm_block_t i, index_begin = begin;
342 dm_block_t index_end = dm_sector_div_up(end, ll->entries_per_block);
343
344 /*
345 * FIXME: Use shifts
346 */
347 begin = do_div(index_begin, ll->entries_per_block);
348 end = do_div(end, ll->entries_per_block);
349 if (end == 0)
350 end = ll->entries_per_block;
351
352 for (i = index_begin; i < index_end; i++, begin = 0) {
353 struct dm_block *blk;
354 unsigned position;
355 uint32_t bit_end;
356
357 r = ll->load_ie(ll, i, &ie_disk);
358 if (r < 0)
359 return r;
360
361 if (le32_to_cpu(ie_disk.nr_free) == 0)
362 continue;
363
364 r = dm_tm_read_lock(ll->tm, le64_to_cpu(ie_disk.blocknr),
365 &dm_sm_bitmap_validator, &blk);
366 if (r < 0)
367 return r;
368
369 bit_end = (i == index_end - 1) ? end : ll->entries_per_block;
370
371 r = sm_find_free(dm_bitmap_data(blk),
372 max_t(unsigned, begin, le32_to_cpu(ie_disk.none_free_before)),
373 bit_end, &position);
374 if (r == -ENOSPC) {
375 /*
376 * This might happen because we started searching
377 * part way through the bitmap.
378 */
379 dm_tm_unlock(ll->tm, blk);
380 continue;
381 }
382
383 dm_tm_unlock(ll->tm, blk);
384
385 *result = i * ll->entries_per_block + (dm_block_t) position;
386 return 0;
387 }
388
389 return -ENOSPC;
390 }
391
sm_ll_find_common_free_block(struct ll_disk * old_ll,struct ll_disk * new_ll,dm_block_t begin,dm_block_t end,dm_block_t * b)392 int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll,
393 dm_block_t begin, dm_block_t end, dm_block_t *b)
394 {
395 int r;
396 uint32_t count;
397
398 do {
399 r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b);
400 if (r)
401 break;
402
403 /* double check this block wasn't used in the old transaction */
404 if (*b >= old_ll->nr_blocks)
405 count = 0;
406 else {
407 r = sm_ll_lookup(old_ll, *b, &count);
408 if (r)
409 break;
410
411 if (count)
412 begin = *b + 1;
413 }
414 } while (count);
415
416 return r;
417 }
418
419 /*----------------------------------------------------------------*/
420
sm_ll_insert(struct ll_disk * ll,dm_block_t b,uint32_t ref_count,int32_t * nr_allocations)421 int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
422 uint32_t ref_count, int32_t *nr_allocations)
423 {
424 int r;
425 uint32_t bit, old;
426 struct dm_block *nb;
427 dm_block_t index = b;
428 struct disk_index_entry ie_disk;
429 void *bm_le;
430 int inc;
431
432 bit = do_div(index, ll->entries_per_block);
433 r = ll->load_ie(ll, index, &ie_disk);
434 if (r < 0)
435 return r;
436
437 r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ie_disk.blocknr),
438 &dm_sm_bitmap_validator, &nb, &inc);
439 if (r < 0) {
440 DMERR("dm_tm_shadow_block() failed");
441 return r;
442 }
443 ie_disk.blocknr = cpu_to_le64(dm_block_location(nb));
444 bm_le = dm_bitmap_data(nb);
445
446 old = sm_lookup_bitmap(bm_le, bit);
447 if (old > 2) {
448 r = sm_ll_lookup_big_ref_count(ll, b, &old);
449 if (r < 0) {
450 dm_tm_unlock(ll->tm, nb);
451 return r;
452 }
453 }
454
455 if (r) {
456 dm_tm_unlock(ll->tm, nb);
457 return r;
458 }
459
460 if (ref_count <= 2) {
461 sm_set_bitmap(bm_le, bit, ref_count);
462 dm_tm_unlock(ll->tm, nb);
463
464 if (old > 2) {
465 r = dm_btree_remove(&ll->ref_count_info,
466 ll->ref_count_root,
467 &b, &ll->ref_count_root);
468 if (r)
469 return r;
470 }
471
472 } else {
473 __le32 le_rc = cpu_to_le32(ref_count);
474
475 sm_set_bitmap(bm_le, bit, 3);
476 dm_tm_unlock(ll->tm, nb);
477
478 __dm_bless_for_disk(&le_rc);
479 r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
480 &b, &le_rc, &ll->ref_count_root);
481 if (r < 0) {
482 DMERR("ref count insert failed");
483 return r;
484 }
485 }
486
487 if (ref_count && !old) {
488 *nr_allocations = 1;
489 ll->nr_allocated++;
490 le32_add_cpu(&ie_disk.nr_free, -1);
491 if (le32_to_cpu(ie_disk.none_free_before) == bit)
492 ie_disk.none_free_before = cpu_to_le32(bit + 1);
493
494 } else if (old && !ref_count) {
495 *nr_allocations = -1;
496 ll->nr_allocated--;
497 le32_add_cpu(&ie_disk.nr_free, 1);
498 ie_disk.none_free_before = cpu_to_le32(min(le32_to_cpu(ie_disk.none_free_before), bit));
499 } else
500 *nr_allocations = 0;
501
502 return ll->save_ie(ll, index, &ie_disk);
503 }
504
505 /*----------------------------------------------------------------*/
506
507 /*
508 * Holds useful intermediate results for the range based inc and dec
509 * operations.
510 */
511 struct inc_context {
512 struct disk_index_entry ie_disk;
513 struct dm_block *bitmap_block;
514 void *bitmap;
515
516 struct dm_block *overflow_leaf;
517 };
518
init_inc_context(struct inc_context * ic)519 static inline void init_inc_context(struct inc_context *ic)
520 {
521 ic->bitmap_block = NULL;
522 ic->bitmap = NULL;
523 ic->overflow_leaf = NULL;
524 }
525
exit_inc_context(struct ll_disk * ll,struct inc_context * ic)526 static inline void exit_inc_context(struct ll_disk *ll, struct inc_context *ic)
527 {
528 if (ic->bitmap_block)
529 dm_tm_unlock(ll->tm, ic->bitmap_block);
530 if (ic->overflow_leaf)
531 dm_tm_unlock(ll->tm, ic->overflow_leaf);
532 }
533
reset_inc_context(struct ll_disk * ll,struct inc_context * ic)534 static inline void reset_inc_context(struct ll_disk *ll, struct inc_context *ic)
535 {
536 exit_inc_context(ll, ic);
537 init_inc_context(ic);
538 }
539
540 /*
541 * Confirms a btree node contains a particular key at an index.
542 */
contains_key(struct btree_node * n,uint64_t key,int index)543 static bool contains_key(struct btree_node *n, uint64_t key, int index)
544 {
545 return index >= 0 &&
546 index < le32_to_cpu(n->header.nr_entries) &&
547 le64_to_cpu(n->keys[index]) == key;
548 }
549
__sm_ll_inc_overflow(struct ll_disk * ll,dm_block_t b,struct inc_context * ic)550 static int __sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
551 {
552 int r;
553 int index;
554 struct btree_node *n;
555 __le32 *v_ptr;
556 uint32_t rc;
557
558 /*
559 * bitmap_block needs to be unlocked because getting the
560 * overflow_leaf may need to allocate, and thus use the space map.
561 */
562 reset_inc_context(ll, ic);
563
564 r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
565 b, &index, &ll->ref_count_root, &ic->overflow_leaf);
566 if (r < 0)
567 return r;
568
569 n = dm_block_data(ic->overflow_leaf);
570
571 if (!contains_key(n, b, index)) {
572 DMERR("overflow btree is missing an entry");
573 return -EINVAL;
574 }
575
576 v_ptr = value_ptr(n, index);
577 rc = le32_to_cpu(*v_ptr) + 1;
578 *v_ptr = cpu_to_le32(rc);
579
580 return 0;
581 }
582
sm_ll_inc_overflow(struct ll_disk * ll,dm_block_t b,struct inc_context * ic)583 static int sm_ll_inc_overflow(struct ll_disk *ll, dm_block_t b, struct inc_context *ic)
584 {
585 int index;
586 struct btree_node *n;
587 __le32 *v_ptr;
588 uint32_t rc;
589
590 /*
591 * Do we already have the correct overflow leaf?
592 */
593 if (ic->overflow_leaf) {
594 n = dm_block_data(ic->overflow_leaf);
595 index = lower_bound(n, b);
596 if (contains_key(n, b, index)) {
597 v_ptr = value_ptr(n, index);
598 rc = le32_to_cpu(*v_ptr) + 1;
599 *v_ptr = cpu_to_le32(rc);
600
601 return 0;
602 }
603 }
604
605 return __sm_ll_inc_overflow(ll, b, ic);
606 }
607
shadow_bitmap(struct ll_disk * ll,struct inc_context * ic)608 static inline int shadow_bitmap(struct ll_disk *ll, struct inc_context *ic)
609 {
610 int r, inc;
611 r = dm_tm_shadow_block(ll->tm, le64_to_cpu(ic->ie_disk.blocknr),
612 &dm_sm_bitmap_validator, &ic->bitmap_block, &inc);
613 if (r < 0) {
614 DMERR("dm_tm_shadow_block() failed");
615 return r;
616 }
617 ic->ie_disk.blocknr = cpu_to_le64(dm_block_location(ic->bitmap_block));
618 ic->bitmap = dm_bitmap_data(ic->bitmap_block);
619 return 0;
620 }
621
622 /*
623 * Once shadow_bitmap has been called, which always happens at the start of inc/dec,
624 * we can reopen the bitmap with a simple write lock, rather than re calling
625 * dm_tm_shadow_block().
626 */
ensure_bitmap(struct ll_disk * ll,struct inc_context * ic)627 static inline int ensure_bitmap(struct ll_disk *ll, struct inc_context *ic)
628 {
629 if (!ic->bitmap_block) {
630 int r = dm_bm_write_lock(dm_tm_get_bm(ll->tm), le64_to_cpu(ic->ie_disk.blocknr),
631 &dm_sm_bitmap_validator, &ic->bitmap_block);
632 if (r) {
633 DMERR("unable to re-get write lock for bitmap");
634 return r;
635 }
636 ic->bitmap = dm_bitmap_data(ic->bitmap_block);
637 }
638
639 return 0;
640 }
641
642 /*
643 * Loops round incrementing entries in a single bitmap.
644 */
sm_ll_inc_bitmap(struct ll_disk * ll,dm_block_t b,uint32_t bit,uint32_t bit_end,int32_t * nr_allocations,dm_block_t * new_b,struct inc_context * ic)645 static inline int sm_ll_inc_bitmap(struct ll_disk *ll, dm_block_t b,
646 uint32_t bit, uint32_t bit_end,
647 int32_t *nr_allocations, dm_block_t *new_b,
648 struct inc_context *ic)
649 {
650 int r;
651 __le32 le_rc;
652 uint32_t old;
653
654 for (; bit != bit_end; bit++, b++) {
655 /*
656 * We only need to drop the bitmap if we need to find a new btree
657 * leaf for the overflow. So if it was dropped last iteration,
658 * we now re-get it.
659 */
660 r = ensure_bitmap(ll, ic);
661 if (r)
662 return r;
663
664 old = sm_lookup_bitmap(ic->bitmap, bit);
665 switch (old) {
666 case 0:
667 /* inc bitmap, adjust nr_allocated */
668 sm_set_bitmap(ic->bitmap, bit, 1);
669 (*nr_allocations)++;
670 ll->nr_allocated++;
671 le32_add_cpu(&ic->ie_disk.nr_free, -1);
672 if (le32_to_cpu(ic->ie_disk.none_free_before) == bit)
673 ic->ie_disk.none_free_before = cpu_to_le32(bit + 1);
674 break;
675
676 case 1:
677 /* inc bitmap */
678 sm_set_bitmap(ic->bitmap, bit, 2);
679 break;
680
681 case 2:
682 /* inc bitmap and insert into overflow */
683 sm_set_bitmap(ic->bitmap, bit, 3);
684 reset_inc_context(ll, ic);
685
686 le_rc = cpu_to_le32(3);
687 __dm_bless_for_disk(&le_rc);
688 r = dm_btree_insert(&ll->ref_count_info, ll->ref_count_root,
689 &b, &le_rc, &ll->ref_count_root);
690 if (r < 0) {
691 DMERR("ref count insert failed");
692 return r;
693 }
694 break;
695
696 default:
697 /*
698 * inc within the overflow tree only.
699 */
700 r = sm_ll_inc_overflow(ll, b, ic);
701 if (r < 0)
702 return r;
703 }
704 }
705
706 *new_b = b;
707 return 0;
708 }
709
710 /*
711 * Finds a bitmap that contains entries in the block range, and increments
712 * them.
713 */
__sm_ll_inc(struct ll_disk * ll,dm_block_t b,dm_block_t e,int32_t * nr_allocations,dm_block_t * new_b)714 static int __sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
715 int32_t *nr_allocations, dm_block_t *new_b)
716 {
717 int r;
718 struct inc_context ic;
719 uint32_t bit, bit_end;
720 dm_block_t index = b;
721
722 init_inc_context(&ic);
723
724 bit = do_div(index, ll->entries_per_block);
725 r = ll->load_ie(ll, index, &ic.ie_disk);
726 if (r < 0)
727 return r;
728
729 r = shadow_bitmap(ll, &ic);
730 if (r)
731 return r;
732
733 bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
734 r = sm_ll_inc_bitmap(ll, b, bit, bit_end, nr_allocations, new_b, &ic);
735
736 exit_inc_context(ll, &ic);
737
738 if (r)
739 return r;
740
741 return ll->save_ie(ll, index, &ic.ie_disk);
742 }
743
sm_ll_inc(struct ll_disk * ll,dm_block_t b,dm_block_t e,int32_t * nr_allocations)744 int sm_ll_inc(struct ll_disk *ll, dm_block_t b, dm_block_t e,
745 int32_t *nr_allocations)
746 {
747 *nr_allocations = 0;
748 while (b != e) {
749 int r = __sm_ll_inc(ll, b, e, nr_allocations, &b);
750 if (r)
751 return r;
752 }
753
754 return 0;
755 }
756
757 /*----------------------------------------------------------------*/
758
__sm_ll_del_overflow(struct ll_disk * ll,dm_block_t b,struct inc_context * ic)759 static int __sm_ll_del_overflow(struct ll_disk *ll, dm_block_t b,
760 struct inc_context *ic)
761 {
762 reset_inc_context(ll, ic);
763 return dm_btree_remove(&ll->ref_count_info, ll->ref_count_root,
764 &b, &ll->ref_count_root);
765 }
766
__sm_ll_dec_overflow(struct ll_disk * ll,dm_block_t b,struct inc_context * ic,uint32_t * old_rc)767 static int __sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
768 struct inc_context *ic, uint32_t *old_rc)
769 {
770 int r;
771 int index = -1;
772 struct btree_node *n;
773 __le32 *v_ptr;
774 uint32_t rc;
775
776 reset_inc_context(ll, ic);
777 r = btree_get_overwrite_leaf(&ll->ref_count_info, ll->ref_count_root,
778 b, &index, &ll->ref_count_root, &ic->overflow_leaf);
779 if (r < 0)
780 return r;
781
782 n = dm_block_data(ic->overflow_leaf);
783
784 if (!contains_key(n, b, index)) {
785 DMERR("overflow btree is missing an entry");
786 return -EINVAL;
787 }
788
789 v_ptr = value_ptr(n, index);
790 rc = le32_to_cpu(*v_ptr);
791 *old_rc = rc;
792
793 if (rc == 3) {
794 return __sm_ll_del_overflow(ll, b, ic);
795 } else {
796 rc--;
797 *v_ptr = cpu_to_le32(rc);
798 return 0;
799 }
800 }
801
sm_ll_dec_overflow(struct ll_disk * ll,dm_block_t b,struct inc_context * ic,uint32_t * old_rc)802 static int sm_ll_dec_overflow(struct ll_disk *ll, dm_block_t b,
803 struct inc_context *ic, uint32_t *old_rc)
804 {
805 /*
806 * Do we already have the correct overflow leaf?
807 */
808 if (ic->overflow_leaf) {
809 int index;
810 struct btree_node *n;
811 __le32 *v_ptr;
812 uint32_t rc;
813
814 n = dm_block_data(ic->overflow_leaf);
815 index = lower_bound(n, b);
816 if (contains_key(n, b, index)) {
817 v_ptr = value_ptr(n, index);
818 rc = le32_to_cpu(*v_ptr);
819 *old_rc = rc;
820
821 if (rc > 3) {
822 rc--;
823 *v_ptr = cpu_to_le32(rc);
824 return 0;
825 } else {
826 return __sm_ll_del_overflow(ll, b, ic);
827 }
828
829 }
830 }
831
832 return __sm_ll_dec_overflow(ll, b, ic, old_rc);
833 }
834
835 /*
836 * Loops round incrementing entries in a single bitmap.
837 */
sm_ll_dec_bitmap(struct ll_disk * ll,dm_block_t b,uint32_t bit,uint32_t bit_end,struct inc_context * ic,int32_t * nr_allocations,dm_block_t * new_b)838 static inline int sm_ll_dec_bitmap(struct ll_disk *ll, dm_block_t b,
839 uint32_t bit, uint32_t bit_end,
840 struct inc_context *ic,
841 int32_t *nr_allocations, dm_block_t *new_b)
842 {
843 int r;
844 uint32_t old;
845
846 for (; bit != bit_end; bit++, b++) {
847 /*
848 * We only need to drop the bitmap if we need to find a new btree
849 * leaf for the overflow. So if it was dropped last iteration,
850 * we now re-get it.
851 */
852 r = ensure_bitmap(ll, ic);
853 if (r)
854 return r;
855
856 old = sm_lookup_bitmap(ic->bitmap, bit);
857 switch (old) {
858 case 0:
859 DMERR("unable to decrement block");
860 return -EINVAL;
861
862 case 1:
863 /* dec bitmap */
864 sm_set_bitmap(ic->bitmap, bit, 0);
865 (*nr_allocations)--;
866 ll->nr_allocated--;
867 le32_add_cpu(&ic->ie_disk.nr_free, 1);
868 ic->ie_disk.none_free_before =
869 cpu_to_le32(min(le32_to_cpu(ic->ie_disk.none_free_before), bit));
870 break;
871
872 case 2:
873 /* dec bitmap and insert into overflow */
874 sm_set_bitmap(ic->bitmap, bit, 1);
875 break;
876
877 case 3:
878 r = sm_ll_dec_overflow(ll, b, ic, &old);
879 if (r < 0)
880 return r;
881
882 if (old == 3) {
883 r = ensure_bitmap(ll, ic);
884 if (r)
885 return r;
886
887 sm_set_bitmap(ic->bitmap, bit, 2);
888 }
889 break;
890 }
891 }
892
893 *new_b = b;
894 return 0;
895 }
896
__sm_ll_dec(struct ll_disk * ll,dm_block_t b,dm_block_t e,int32_t * nr_allocations,dm_block_t * new_b)897 static int __sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
898 int32_t *nr_allocations, dm_block_t *new_b)
899 {
900 int r;
901 uint32_t bit, bit_end;
902 struct inc_context ic;
903 dm_block_t index = b;
904
905 init_inc_context(&ic);
906
907 bit = do_div(index, ll->entries_per_block);
908 r = ll->load_ie(ll, index, &ic.ie_disk);
909 if (r < 0)
910 return r;
911
912 r = shadow_bitmap(ll, &ic);
913 if (r)
914 return r;
915
916 bit_end = min(bit + (e - b), (dm_block_t) ll->entries_per_block);
917 r = sm_ll_dec_bitmap(ll, b, bit, bit_end, &ic, nr_allocations, new_b);
918 exit_inc_context(ll, &ic);
919
920 if (r)
921 return r;
922
923 return ll->save_ie(ll, index, &ic.ie_disk);
924 }
925
sm_ll_dec(struct ll_disk * ll,dm_block_t b,dm_block_t e,int32_t * nr_allocations)926 int sm_ll_dec(struct ll_disk *ll, dm_block_t b, dm_block_t e,
927 int32_t *nr_allocations)
928 {
929 *nr_allocations = 0;
930 while (b != e) {
931 int r = __sm_ll_dec(ll, b, e, nr_allocations, &b);
932 if (r)
933 return r;
934 }
935
936 return 0;
937 }
938
939 /*----------------------------------------------------------------*/
940
sm_ll_commit(struct ll_disk * ll)941 int sm_ll_commit(struct ll_disk *ll)
942 {
943 int r = 0;
944
945 if (ll->bitmap_index_changed) {
946 r = ll->commit(ll);
947 if (!r)
948 ll->bitmap_index_changed = false;
949 }
950
951 return r;
952 }
953
954 /*----------------------------------------------------------------*/
955
metadata_ll_load_ie(struct ll_disk * ll,dm_block_t index,struct disk_index_entry * ie)956 static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
957 struct disk_index_entry *ie)
958 {
959 memcpy(ie, ll->mi_le.index + index, sizeof(*ie));
960 return 0;
961 }
962
metadata_ll_save_ie(struct ll_disk * ll,dm_block_t index,struct disk_index_entry * ie)963 static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
964 struct disk_index_entry *ie)
965 {
966 ll->bitmap_index_changed = true;
967 memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
968 return 0;
969 }
970
metadata_ll_init_index(struct ll_disk * ll)971 static int metadata_ll_init_index(struct ll_disk *ll)
972 {
973 int r;
974 struct dm_block *b;
975
976 r = dm_tm_new_block(ll->tm, &index_validator, &b);
977 if (r < 0)
978 return r;
979
980 ll->bitmap_root = dm_block_location(b);
981
982 dm_tm_unlock(ll->tm, b);
983
984 return 0;
985 }
986
metadata_ll_open(struct ll_disk * ll)987 static int metadata_ll_open(struct ll_disk *ll)
988 {
989 int r;
990 struct dm_block *block;
991
992 r = dm_tm_read_lock(ll->tm, ll->bitmap_root,
993 &index_validator, &block);
994 if (r)
995 return r;
996
997 memcpy(&ll->mi_le, dm_block_data(block), sizeof(ll->mi_le));
998 dm_tm_unlock(ll->tm, block);
999
1000 return 0;
1001 }
1002
metadata_ll_max_entries(struct ll_disk * ll)1003 static dm_block_t metadata_ll_max_entries(struct ll_disk *ll)
1004 {
1005 return MAX_METADATA_BITMAPS;
1006 }
1007
metadata_ll_commit(struct ll_disk * ll)1008 static int metadata_ll_commit(struct ll_disk *ll)
1009 {
1010 int r, inc;
1011 struct dm_block *b;
1012
1013 r = dm_tm_shadow_block(ll->tm, ll->bitmap_root, &index_validator, &b, &inc);
1014 if (r)
1015 return r;
1016
1017 memcpy(dm_block_data(b), &ll->mi_le, sizeof(ll->mi_le));
1018 ll->bitmap_root = dm_block_location(b);
1019
1020 dm_tm_unlock(ll->tm, b);
1021
1022 return 0;
1023 }
1024
sm_ll_new_metadata(struct ll_disk * ll,struct dm_transaction_manager * tm)1025 int sm_ll_new_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm)
1026 {
1027 int r;
1028
1029 r = sm_ll_init(ll, tm);
1030 if (r < 0)
1031 return r;
1032
1033 ll->load_ie = metadata_ll_load_ie;
1034 ll->save_ie = metadata_ll_save_ie;
1035 ll->init_index = metadata_ll_init_index;
1036 ll->open_index = metadata_ll_open;
1037 ll->max_entries = metadata_ll_max_entries;
1038 ll->commit = metadata_ll_commit;
1039
1040 ll->nr_blocks = 0;
1041 ll->nr_allocated = 0;
1042
1043 r = ll->init_index(ll);
1044 if (r < 0)
1045 return r;
1046
1047 r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1048 if (r < 0)
1049 return r;
1050
1051 return 0;
1052 }
1053
sm_ll_open_metadata(struct ll_disk * ll,struct dm_transaction_manager * tm,void * root_le,size_t len)1054 int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
1055 void *root_le, size_t len)
1056 {
1057 int r;
1058 struct disk_sm_root smr;
1059
1060 if (len < sizeof(struct disk_sm_root)) {
1061 DMERR("sm_metadata root too small");
1062 return -ENOMEM;
1063 }
1064
1065 /*
1066 * We don't know the alignment of the root_le buffer, so need to
1067 * copy into a new structure.
1068 */
1069 memcpy(&smr, root_le, sizeof(smr));
1070
1071 r = sm_ll_init(ll, tm);
1072 if (r < 0)
1073 return r;
1074
1075 ll->load_ie = metadata_ll_load_ie;
1076 ll->save_ie = metadata_ll_save_ie;
1077 ll->init_index = metadata_ll_init_index;
1078 ll->open_index = metadata_ll_open;
1079 ll->max_entries = metadata_ll_max_entries;
1080 ll->commit = metadata_ll_commit;
1081
1082 ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
1083 ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
1084 ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
1085 ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
1086
1087 return ll->open_index(ll);
1088 }
1089
1090 /*----------------------------------------------------------------*/
1091
ie_cache_writeback(struct ll_disk * ll,struct ie_cache * iec)1092 static inline int ie_cache_writeback(struct ll_disk *ll, struct ie_cache *iec)
1093 {
1094 iec->dirty = false;
1095 __dm_bless_for_disk(iec->ie);
1096 return dm_btree_insert(&ll->bitmap_info, ll->bitmap_root,
1097 &iec->index, &iec->ie, &ll->bitmap_root);
1098 }
1099
hash_index(dm_block_t index)1100 static inline unsigned hash_index(dm_block_t index)
1101 {
1102 return dm_hash_block(index, IE_CACHE_MASK);
1103 }
1104
disk_ll_load_ie(struct ll_disk * ll,dm_block_t index,struct disk_index_entry * ie)1105 static int disk_ll_load_ie(struct ll_disk *ll, dm_block_t index,
1106 struct disk_index_entry *ie)
1107 {
1108 int r;
1109 unsigned h = hash_index(index);
1110 struct ie_cache *iec = ll->ie_cache + h;
1111
1112 if (iec->valid) {
1113 if (iec->index == index) {
1114 memcpy(ie, &iec->ie, sizeof(*ie));
1115 return 0;
1116 }
1117
1118 if (iec->dirty) {
1119 r = ie_cache_writeback(ll, iec);
1120 if (r)
1121 return r;
1122 }
1123 }
1124
1125 r = dm_btree_lookup(&ll->bitmap_info, ll->bitmap_root, &index, ie);
1126 if (!r) {
1127 iec->valid = true;
1128 iec->dirty = false;
1129 iec->index = index;
1130 memcpy(&iec->ie, ie, sizeof(*ie));
1131 }
1132
1133 return r;
1134 }
1135
disk_ll_save_ie(struct ll_disk * ll,dm_block_t index,struct disk_index_entry * ie)1136 static int disk_ll_save_ie(struct ll_disk *ll, dm_block_t index,
1137 struct disk_index_entry *ie)
1138 {
1139 int r;
1140 unsigned h = hash_index(index);
1141 struct ie_cache *iec = ll->ie_cache + h;
1142
1143 ll->bitmap_index_changed = true;
1144 if (iec->valid) {
1145 if (iec->index == index) {
1146 memcpy(&iec->ie, ie, sizeof(*ie));
1147 iec->dirty = true;
1148 return 0;
1149 }
1150
1151 if (iec->dirty) {
1152 r = ie_cache_writeback(ll, iec);
1153 if (r)
1154 return r;
1155 }
1156 }
1157
1158 iec->valid = true;
1159 iec->dirty = true;
1160 iec->index = index;
1161 memcpy(&iec->ie, ie, sizeof(*ie));
1162 return 0;
1163 }
1164
disk_ll_init_index(struct ll_disk * ll)1165 static int disk_ll_init_index(struct ll_disk *ll)
1166 {
1167 unsigned i;
1168 for (i = 0; i < IE_CACHE_SIZE; i++) {
1169 struct ie_cache *iec = ll->ie_cache + i;
1170 iec->valid = false;
1171 iec->dirty = false;
1172 }
1173 return dm_btree_empty(&ll->bitmap_info, &ll->bitmap_root);
1174 }
1175
disk_ll_open(struct ll_disk * ll)1176 static int disk_ll_open(struct ll_disk *ll)
1177 {
1178 return 0;
1179 }
1180
disk_ll_max_entries(struct ll_disk * ll)1181 static dm_block_t disk_ll_max_entries(struct ll_disk *ll)
1182 {
1183 return -1ULL;
1184 }
1185
disk_ll_commit(struct ll_disk * ll)1186 static int disk_ll_commit(struct ll_disk *ll)
1187 {
1188 int r = 0;
1189 unsigned i;
1190
1191 for (i = 0; i < IE_CACHE_SIZE; i++) {
1192 struct ie_cache *iec = ll->ie_cache + i;
1193 if (iec->valid && iec->dirty)
1194 r = ie_cache_writeback(ll, iec);
1195 }
1196
1197 return r;
1198 }
1199
sm_ll_new_disk(struct ll_disk * ll,struct dm_transaction_manager * tm)1200 int sm_ll_new_disk(struct ll_disk *ll, struct dm_transaction_manager *tm)
1201 {
1202 int r;
1203
1204 r = sm_ll_init(ll, tm);
1205 if (r < 0)
1206 return r;
1207
1208 ll->load_ie = disk_ll_load_ie;
1209 ll->save_ie = disk_ll_save_ie;
1210 ll->init_index = disk_ll_init_index;
1211 ll->open_index = disk_ll_open;
1212 ll->max_entries = disk_ll_max_entries;
1213 ll->commit = disk_ll_commit;
1214
1215 ll->nr_blocks = 0;
1216 ll->nr_allocated = 0;
1217
1218 r = ll->init_index(ll);
1219 if (r < 0)
1220 return r;
1221
1222 r = dm_btree_empty(&ll->ref_count_info, &ll->ref_count_root);
1223 if (r < 0)
1224 return r;
1225
1226 return 0;
1227 }
1228
sm_ll_open_disk(struct ll_disk * ll,struct dm_transaction_manager * tm,void * root_le,size_t len)1229 int sm_ll_open_disk(struct ll_disk *ll, struct dm_transaction_manager *tm,
1230 void *root_le, size_t len)
1231 {
1232 int r;
1233 struct disk_sm_root *smr = root_le;
1234
1235 if (len < sizeof(struct disk_sm_root)) {
1236 DMERR("sm_metadata root too small");
1237 return -ENOMEM;
1238 }
1239
1240 r = sm_ll_init(ll, tm);
1241 if (r < 0)
1242 return r;
1243
1244 ll->load_ie = disk_ll_load_ie;
1245 ll->save_ie = disk_ll_save_ie;
1246 ll->init_index = disk_ll_init_index;
1247 ll->open_index = disk_ll_open;
1248 ll->max_entries = disk_ll_max_entries;
1249 ll->commit = disk_ll_commit;
1250
1251 ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
1252 ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
1253 ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
1254 ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
1255
1256 return ll->open_index(ll);
1257 }
1258
1259 /*----------------------------------------------------------------*/
1260