1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 *
4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
5 *
6 * This code builds two trees of free clusters extents.
7 * Trees are sorted by start of extent and by length of extent.
8 * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees.
9 * In extreme case code reads on-disk bitmap to find free clusters.
10 *
11 */
12
13 #include <linux/buffer_head.h>
14 #include <linux/fs.h>
15 #include <linux/kernel.h>
16
17 #include "ntfs.h"
18 #include "ntfs_fs.h"
19
20 /*
21 * Maximum number of extents in tree.
22 */
23 #define NTFS_MAX_WND_EXTENTS (32u * 1024u)
24
25 struct rb_node_key {
26 struct rb_node node;
27 size_t key;
28 };
29
30 struct e_node {
31 struct rb_node_key start; /* Tree sorted by start. */
32 struct rb_node_key count; /* Tree sorted by len. */
33 };
34
35 static int wnd_rescan(struct wnd_bitmap *wnd);
36 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw);
37 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits);
38
39 static struct kmem_cache *ntfs_enode_cachep;
40
ntfs3_init_bitmap(void)41 int __init ntfs3_init_bitmap(void)
42 {
43 ntfs_enode_cachep =
44 kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0,
45 SLAB_RECLAIM_ACCOUNT, NULL);
46 return ntfs_enode_cachep ? 0 : -ENOMEM;
47 }
48
ntfs3_exit_bitmap(void)49 void ntfs3_exit_bitmap(void)
50 {
51 kmem_cache_destroy(ntfs_enode_cachep);
52 }
53
54 /*
55 * wnd_scan
56 *
57 * b_pos + b_len - biggest fragment.
58 * Scan range [wpos wbits) window @buf.
59 *
60 * Return: -1 if not found.
61 */
wnd_scan(const ulong * buf,size_t wbit,u32 wpos,u32 wend,size_t to_alloc,size_t * prev_tail,size_t * b_pos,size_t * b_len)62 static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend,
63 size_t to_alloc, size_t *prev_tail, size_t *b_pos,
64 size_t *b_len)
65 {
66 while (wpos < wend) {
67 size_t free_len;
68 u32 free_bits, end;
69 u32 used = find_next_zero_bit(buf, wend, wpos);
70
71 if (used >= wend) {
72 if (*b_len < *prev_tail) {
73 *b_pos = wbit - *prev_tail;
74 *b_len = *prev_tail;
75 }
76
77 *prev_tail = 0;
78 return -1;
79 }
80
81 if (used > wpos) {
82 wpos = used;
83 if (*b_len < *prev_tail) {
84 *b_pos = wbit - *prev_tail;
85 *b_len = *prev_tail;
86 }
87
88 *prev_tail = 0;
89 }
90
91 /*
92 * Now we have a fragment [wpos, wend) staring with 0.
93 */
94 end = wpos + to_alloc - *prev_tail;
95 free_bits = find_next_bit(buf, min(end, wend), wpos);
96
97 free_len = *prev_tail + free_bits - wpos;
98
99 if (*b_len < free_len) {
100 *b_pos = wbit + wpos - *prev_tail;
101 *b_len = free_len;
102 }
103
104 if (free_len >= to_alloc)
105 return wbit + wpos - *prev_tail;
106
107 if (free_bits >= wend) {
108 *prev_tail += free_bits - wpos;
109 return -1;
110 }
111
112 wpos = free_bits + 1;
113
114 *prev_tail = 0;
115 }
116
117 return -1;
118 }
119
120 /*
121 * wnd_close - Frees all resources.
122 */
wnd_close(struct wnd_bitmap * wnd)123 void wnd_close(struct wnd_bitmap *wnd)
124 {
125 struct rb_node *node, *next;
126
127 kfree(wnd->free_bits);
128 run_close(&wnd->run);
129
130 node = rb_first(&wnd->start_tree);
131
132 while (node) {
133 next = rb_next(node);
134 rb_erase(node, &wnd->start_tree);
135 kmem_cache_free(ntfs_enode_cachep,
136 rb_entry(node, struct e_node, start.node));
137 node = next;
138 }
139 }
140
rb_lookup(struct rb_root * root,size_t v)141 static struct rb_node *rb_lookup(struct rb_root *root, size_t v)
142 {
143 struct rb_node **p = &root->rb_node;
144 struct rb_node *r = NULL;
145
146 while (*p) {
147 struct rb_node_key *k;
148
149 k = rb_entry(*p, struct rb_node_key, node);
150 if (v < k->key) {
151 p = &(*p)->rb_left;
152 } else if (v > k->key) {
153 r = &k->node;
154 p = &(*p)->rb_right;
155 } else {
156 return &k->node;
157 }
158 }
159
160 return r;
161 }
162
163 /*
164 * rb_insert_count - Helper function to insert special kind of 'count' tree.
165 */
rb_insert_count(struct rb_root * root,struct e_node * e)166 static inline bool rb_insert_count(struct rb_root *root, struct e_node *e)
167 {
168 struct rb_node **p = &root->rb_node;
169 struct rb_node *parent = NULL;
170 size_t e_ckey = e->count.key;
171 size_t e_skey = e->start.key;
172
173 while (*p) {
174 struct e_node *k =
175 rb_entry(parent = *p, struct e_node, count.node);
176
177 if (e_ckey > k->count.key) {
178 p = &(*p)->rb_left;
179 } else if (e_ckey < k->count.key) {
180 p = &(*p)->rb_right;
181 } else if (e_skey < k->start.key) {
182 p = &(*p)->rb_left;
183 } else if (e_skey > k->start.key) {
184 p = &(*p)->rb_right;
185 } else {
186 WARN_ON(1);
187 return false;
188 }
189 }
190
191 rb_link_node(&e->count.node, parent, p);
192 rb_insert_color(&e->count.node, root);
193 return true;
194 }
195
196 /*
197 * rb_insert_start - Helper function to insert special kind of 'count' tree.
198 */
rb_insert_start(struct rb_root * root,struct e_node * e)199 static inline bool rb_insert_start(struct rb_root *root, struct e_node *e)
200 {
201 struct rb_node **p = &root->rb_node;
202 struct rb_node *parent = NULL;
203 size_t e_skey = e->start.key;
204
205 while (*p) {
206 struct e_node *k;
207
208 parent = *p;
209
210 k = rb_entry(parent, struct e_node, start.node);
211 if (e_skey < k->start.key) {
212 p = &(*p)->rb_left;
213 } else if (e_skey > k->start.key) {
214 p = &(*p)->rb_right;
215 } else {
216 WARN_ON(1);
217 return false;
218 }
219 }
220
221 rb_link_node(&e->start.node, parent, p);
222 rb_insert_color(&e->start.node, root);
223 return true;
224 }
225
226 /*
227 * wnd_add_free_ext - Adds a new extent of free space.
228 * @build: 1 when building tree.
229 */
wnd_add_free_ext(struct wnd_bitmap * wnd,size_t bit,size_t len,bool build)230 static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len,
231 bool build)
232 {
233 struct e_node *e, *e0 = NULL;
234 size_t ib, end_in = bit + len;
235 struct rb_node *n;
236
237 if (build) {
238 /* Use extent_min to filter too short extents. */
239 if (wnd->count >= NTFS_MAX_WND_EXTENTS &&
240 len <= wnd->extent_min) {
241 wnd->uptodated = -1;
242 return;
243 }
244 } else {
245 /* Try to find extent before 'bit'. */
246 n = rb_lookup(&wnd->start_tree, bit);
247
248 if (!n) {
249 n = rb_first(&wnd->start_tree);
250 } else {
251 e = rb_entry(n, struct e_node, start.node);
252 n = rb_next(n);
253 if (e->start.key + e->count.key == bit) {
254 /* Remove left. */
255 bit = e->start.key;
256 len += e->count.key;
257 rb_erase(&e->start.node, &wnd->start_tree);
258 rb_erase(&e->count.node, &wnd->count_tree);
259 wnd->count -= 1;
260 e0 = e;
261 }
262 }
263
264 while (n) {
265 size_t next_end;
266
267 e = rb_entry(n, struct e_node, start.node);
268 next_end = e->start.key + e->count.key;
269 if (e->start.key > end_in)
270 break;
271
272 /* Remove right. */
273 n = rb_next(n);
274 len += next_end - end_in;
275 end_in = next_end;
276 rb_erase(&e->start.node, &wnd->start_tree);
277 rb_erase(&e->count.node, &wnd->count_tree);
278 wnd->count -= 1;
279
280 if (!e0)
281 e0 = e;
282 else
283 kmem_cache_free(ntfs_enode_cachep, e);
284 }
285
286 if (wnd->uptodated != 1) {
287 /* Check bits before 'bit'. */
288 ib = wnd->zone_bit == wnd->zone_end ||
289 bit < wnd->zone_end
290 ? 0
291 : wnd->zone_end;
292
293 while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) {
294 bit -= 1;
295 len += 1;
296 }
297
298 /* Check bits after 'end_in'. */
299 ib = wnd->zone_bit == wnd->zone_end ||
300 end_in > wnd->zone_bit
301 ? wnd->nbits
302 : wnd->zone_bit;
303
304 while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) {
305 end_in += 1;
306 len += 1;
307 }
308 }
309 }
310 /* Insert new fragment. */
311 if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
312 if (e0)
313 kmem_cache_free(ntfs_enode_cachep, e0);
314
315 wnd->uptodated = -1;
316
317 /* Compare with smallest fragment. */
318 n = rb_last(&wnd->count_tree);
319 e = rb_entry(n, struct e_node, count.node);
320 if (len <= e->count.key)
321 goto out; /* Do not insert small fragments. */
322
323 if (build) {
324 struct e_node *e2;
325
326 n = rb_prev(n);
327 e2 = rb_entry(n, struct e_node, count.node);
328 /* Smallest fragment will be 'e2->count.key'. */
329 wnd->extent_min = e2->count.key;
330 }
331
332 /* Replace smallest fragment by new one. */
333 rb_erase(&e->start.node, &wnd->start_tree);
334 rb_erase(&e->count.node, &wnd->count_tree);
335 wnd->count -= 1;
336 } else {
337 e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
338 if (!e) {
339 wnd->uptodated = -1;
340 goto out;
341 }
342
343 if (build && len <= wnd->extent_min)
344 wnd->extent_min = len;
345 }
346 e->start.key = bit;
347 e->count.key = len;
348 if (len > wnd->extent_max)
349 wnd->extent_max = len;
350
351 rb_insert_start(&wnd->start_tree, e);
352 rb_insert_count(&wnd->count_tree, e);
353 wnd->count += 1;
354
355 out:;
356 }
357
358 /*
359 * wnd_remove_free_ext - Remove a run from the cached free space.
360 */
wnd_remove_free_ext(struct wnd_bitmap * wnd,size_t bit,size_t len)361 static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len)
362 {
363 struct rb_node *n, *n3;
364 struct e_node *e, *e3;
365 size_t end_in = bit + len;
366 size_t end3, end, new_key, new_len, max_new_len;
367
368 /* Try to find extent before 'bit'. */
369 n = rb_lookup(&wnd->start_tree, bit);
370
371 if (!n)
372 return;
373
374 e = rb_entry(n, struct e_node, start.node);
375 end = e->start.key + e->count.key;
376
377 new_key = new_len = 0;
378 len = e->count.key;
379
380 /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n'. */
381 if (e->start.key > bit)
382 ;
383 else if (end_in <= end) {
384 /* Range [bit,end_in) inside 'e'. */
385 new_key = end_in;
386 new_len = end - end_in;
387 len = bit - e->start.key;
388 } else if (bit > end) {
389 bool bmax = false;
390
391 n3 = rb_next(n);
392
393 while (n3) {
394 e3 = rb_entry(n3, struct e_node, start.node);
395 if (e3->start.key >= end_in)
396 break;
397
398 if (e3->count.key == wnd->extent_max)
399 bmax = true;
400
401 end3 = e3->start.key + e3->count.key;
402 if (end3 > end_in) {
403 e3->start.key = end_in;
404 rb_erase(&e3->count.node, &wnd->count_tree);
405 e3->count.key = end3 - end_in;
406 rb_insert_count(&wnd->count_tree, e3);
407 break;
408 }
409
410 n3 = rb_next(n3);
411 rb_erase(&e3->start.node, &wnd->start_tree);
412 rb_erase(&e3->count.node, &wnd->count_tree);
413 wnd->count -= 1;
414 kmem_cache_free(ntfs_enode_cachep, e3);
415 }
416 if (!bmax)
417 return;
418 n3 = rb_first(&wnd->count_tree);
419 wnd->extent_max =
420 n3 ? rb_entry(n3, struct e_node, count.node)->count.key
421 : 0;
422 return;
423 }
424
425 if (e->count.key != wnd->extent_max) {
426 ;
427 } else if (rb_prev(&e->count.node)) {
428 ;
429 } else {
430 n3 = rb_next(&e->count.node);
431 max_new_len = max(len, new_len);
432 if (!n3) {
433 wnd->extent_max = max_new_len;
434 } else {
435 e3 = rb_entry(n3, struct e_node, count.node);
436 wnd->extent_max = max(e3->count.key, max_new_len);
437 }
438 }
439
440 if (!len) {
441 if (new_len) {
442 e->start.key = new_key;
443 rb_erase(&e->count.node, &wnd->count_tree);
444 e->count.key = new_len;
445 rb_insert_count(&wnd->count_tree, e);
446 } else {
447 rb_erase(&e->start.node, &wnd->start_tree);
448 rb_erase(&e->count.node, &wnd->count_tree);
449 wnd->count -= 1;
450 kmem_cache_free(ntfs_enode_cachep, e);
451 }
452 goto out;
453 }
454 rb_erase(&e->count.node, &wnd->count_tree);
455 e->count.key = len;
456 rb_insert_count(&wnd->count_tree, e);
457
458 if (!new_len)
459 goto out;
460
461 if (wnd->count >= NTFS_MAX_WND_EXTENTS) {
462 wnd->uptodated = -1;
463
464 /* Get minimal extent. */
465 e = rb_entry(rb_last(&wnd->count_tree), struct e_node,
466 count.node);
467 if (e->count.key > new_len)
468 goto out;
469
470 /* Replace minimum. */
471 rb_erase(&e->start.node, &wnd->start_tree);
472 rb_erase(&e->count.node, &wnd->count_tree);
473 wnd->count -= 1;
474 } else {
475 e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC);
476 if (!e)
477 wnd->uptodated = -1;
478 }
479
480 if (e) {
481 e->start.key = new_key;
482 e->count.key = new_len;
483 rb_insert_start(&wnd->start_tree, e);
484 rb_insert_count(&wnd->count_tree, e);
485 wnd->count += 1;
486 }
487
488 out:
489 if (!wnd->count && 1 != wnd->uptodated)
490 wnd_rescan(wnd);
491 }
492
493 /*
494 * wnd_rescan - Scan all bitmap. Used while initialization.
495 */
wnd_rescan(struct wnd_bitmap * wnd)496 static int wnd_rescan(struct wnd_bitmap *wnd)
497 {
498 int err = 0;
499 size_t prev_tail = 0;
500 struct super_block *sb = wnd->sb;
501 struct ntfs_sb_info *sbi = sb->s_fs_info;
502 u64 lbo, len = 0;
503 u32 blocksize = sb->s_blocksize;
504 u8 cluster_bits = sbi->cluster_bits;
505 u32 wbits = 8 * sb->s_blocksize;
506 u32 used, frb;
507 const ulong *buf;
508 size_t wpos, wbit, iw, vbo;
509 struct buffer_head *bh = NULL;
510 CLST lcn, clen;
511
512 wnd->uptodated = 0;
513 wnd->extent_max = 0;
514 wnd->extent_min = MINUS_ONE_T;
515 wnd->total_zeroes = 0;
516
517 vbo = 0;
518
519 for (iw = 0; iw < wnd->nwnd; iw++) {
520 if (iw + 1 == wnd->nwnd)
521 wbits = wnd->bits_last;
522
523 if (wnd->inited) {
524 if (!wnd->free_bits[iw]) {
525 /* All ones. */
526 if (prev_tail) {
527 wnd_add_free_ext(wnd,
528 vbo * 8 - prev_tail,
529 prev_tail, true);
530 prev_tail = 0;
531 }
532 goto next_wnd;
533 }
534 if (wbits == wnd->free_bits[iw]) {
535 /* All zeroes. */
536 prev_tail += wbits;
537 wnd->total_zeroes += wbits;
538 goto next_wnd;
539 }
540 }
541
542 if (!len) {
543 u32 off = vbo & sbi->cluster_mask;
544
545 if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits,
546 &lcn, &clen, NULL)) {
547 err = -ENOENT;
548 goto out;
549 }
550
551 lbo = ((u64)lcn << cluster_bits) + off;
552 len = ((u64)clen << cluster_bits) - off;
553 }
554
555 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
556 if (!bh) {
557 err = -EIO;
558 goto out;
559 }
560
561 buf = (ulong *)bh->b_data;
562
563 used = bitmap_weight(buf, wbits);
564 if (used < wbits) {
565 frb = wbits - used;
566 wnd->free_bits[iw] = frb;
567 wnd->total_zeroes += frb;
568 }
569
570 wpos = 0;
571 wbit = vbo * 8;
572
573 if (wbit + wbits > wnd->nbits)
574 wbits = wnd->nbits - wbit;
575
576 do {
577 used = find_next_zero_bit(buf, wbits, wpos);
578
579 if (used > wpos && prev_tail) {
580 wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
581 prev_tail, true);
582 prev_tail = 0;
583 }
584
585 wpos = used;
586
587 if (wpos >= wbits) {
588 /* No free blocks. */
589 prev_tail = 0;
590 break;
591 }
592
593 frb = find_next_bit(buf, wbits, wpos);
594 if (frb >= wbits) {
595 /* Keep last free block. */
596 prev_tail += frb - wpos;
597 break;
598 }
599
600 wnd_add_free_ext(wnd, wbit + wpos - prev_tail,
601 frb + prev_tail - wpos, true);
602
603 /* Skip free block and first '1'. */
604 wpos = frb + 1;
605 /* Reset previous tail. */
606 prev_tail = 0;
607 } while (wpos < wbits);
608
609 next_wnd:
610
611 if (bh)
612 put_bh(bh);
613 bh = NULL;
614
615 vbo += blocksize;
616 if (len) {
617 len -= blocksize;
618 lbo += blocksize;
619 }
620 }
621
622 /* Add last block. */
623 if (prev_tail)
624 wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true);
625
626 /*
627 * Before init cycle wnd->uptodated was 0.
628 * If any errors or limits occurs while initialization then
629 * wnd->uptodated will be -1.
630 * If 'uptodated' is still 0 then Tree is really updated.
631 */
632 if (!wnd->uptodated)
633 wnd->uptodated = 1;
634
635 if (wnd->zone_bit != wnd->zone_end) {
636 size_t zlen = wnd->zone_end - wnd->zone_bit;
637
638 wnd->zone_end = wnd->zone_bit;
639 wnd_zone_set(wnd, wnd->zone_bit, zlen);
640 }
641
642 out:
643 return err;
644 }
645
wnd_init(struct wnd_bitmap * wnd,struct super_block * sb,size_t nbits)646 int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits)
647 {
648 int err;
649 u32 blocksize = sb->s_blocksize;
650 u32 wbits = blocksize * 8;
651
652 init_rwsem(&wnd->rw_lock);
653
654 wnd->sb = sb;
655 wnd->nbits = nbits;
656 wnd->total_zeroes = nbits;
657 wnd->extent_max = MINUS_ONE_T;
658 wnd->zone_bit = wnd->zone_end = 0;
659 wnd->nwnd = bytes_to_block(sb, bitmap_size(nbits));
660 wnd->bits_last = nbits & (wbits - 1);
661 if (!wnd->bits_last)
662 wnd->bits_last = wbits;
663
664 wnd->free_bits = kcalloc(wnd->nwnd, sizeof(u16), GFP_NOFS | __GFP_NOWARN);
665 if (!wnd->free_bits)
666 return -ENOMEM;
667
668 err = wnd_rescan(wnd);
669 if (err)
670 return err;
671
672 wnd->inited = true;
673
674 return 0;
675 }
676
677 /*
678 * wnd_map - Call sb_bread for requested window.
679 */
wnd_map(struct wnd_bitmap * wnd,size_t iw)680 static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw)
681 {
682 size_t vbo;
683 CLST lcn, clen;
684 struct super_block *sb = wnd->sb;
685 struct ntfs_sb_info *sbi;
686 struct buffer_head *bh;
687 u64 lbo;
688
689 sbi = sb->s_fs_info;
690 vbo = (u64)iw << sb->s_blocksize_bits;
691
692 if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen,
693 NULL)) {
694 return ERR_PTR(-ENOENT);
695 }
696
697 lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask);
698
699 bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits);
700 if (!bh)
701 return ERR_PTR(-EIO);
702
703 return bh;
704 }
705
706 /*
707 * wnd_set_free - Mark the bits range from bit to bit + bits as free.
708 */
wnd_set_free(struct wnd_bitmap * wnd,size_t bit,size_t bits)709 int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
710 {
711 int err = 0;
712 struct super_block *sb = wnd->sb;
713 size_t bits0 = bits;
714 u32 wbits = 8 * sb->s_blocksize;
715 size_t iw = bit >> (sb->s_blocksize_bits + 3);
716 u32 wbit = bit & (wbits - 1);
717 struct buffer_head *bh;
718
719 while (iw < wnd->nwnd && bits) {
720 u32 tail, op;
721 ulong *buf;
722
723 if (iw + 1 == wnd->nwnd)
724 wbits = wnd->bits_last;
725
726 tail = wbits - wbit;
727 op = min_t(u32, tail, bits);
728
729 bh = wnd_map(wnd, iw);
730 if (IS_ERR(bh)) {
731 err = PTR_ERR(bh);
732 break;
733 }
734
735 buf = (ulong *)bh->b_data;
736
737 lock_buffer(bh);
738
739 __bitmap_clear(buf, wbit, op);
740
741 wnd->free_bits[iw] += op;
742
743 set_buffer_uptodate(bh);
744 mark_buffer_dirty(bh);
745 unlock_buffer(bh);
746 put_bh(bh);
747
748 wnd->total_zeroes += op;
749 bits -= op;
750 wbit = 0;
751 iw += 1;
752 }
753
754 wnd_add_free_ext(wnd, bit, bits0, false);
755
756 return err;
757 }
758
759 /*
760 * wnd_set_used - Mark the bits range from bit to bit + bits as used.
761 */
wnd_set_used(struct wnd_bitmap * wnd,size_t bit,size_t bits)762 int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
763 {
764 int err = 0;
765 struct super_block *sb = wnd->sb;
766 size_t bits0 = bits;
767 size_t iw = bit >> (sb->s_blocksize_bits + 3);
768 u32 wbits = 8 * sb->s_blocksize;
769 u32 wbit = bit & (wbits - 1);
770 struct buffer_head *bh;
771
772 while (iw < wnd->nwnd && bits) {
773 u32 tail, op;
774 ulong *buf;
775
776 if (unlikely(iw + 1 == wnd->nwnd))
777 wbits = wnd->bits_last;
778
779 tail = wbits - wbit;
780 op = min_t(u32, tail, bits);
781
782 bh = wnd_map(wnd, iw);
783 if (IS_ERR(bh)) {
784 err = PTR_ERR(bh);
785 break;
786 }
787 buf = (ulong *)bh->b_data;
788
789 lock_buffer(bh);
790
791 __bitmap_set(buf, wbit, op);
792 wnd->free_bits[iw] -= op;
793
794 set_buffer_uptodate(bh);
795 mark_buffer_dirty(bh);
796 unlock_buffer(bh);
797 put_bh(bh);
798
799 wnd->total_zeroes -= op;
800 bits -= op;
801 wbit = 0;
802 iw += 1;
803 }
804
805 if (!RB_EMPTY_ROOT(&wnd->start_tree))
806 wnd_remove_free_ext(wnd, bit, bits0);
807
808 return err;
809 }
810
811 /*
812 * wnd_is_free_hlp
813 *
814 * Return: True if all clusters [bit, bit+bits) are free (bitmap only).
815 */
wnd_is_free_hlp(struct wnd_bitmap * wnd,size_t bit,size_t bits)816 static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits)
817 {
818 struct super_block *sb = wnd->sb;
819 size_t iw = bit >> (sb->s_blocksize_bits + 3);
820 u32 wbits = 8 * sb->s_blocksize;
821 u32 wbit = bit & (wbits - 1);
822
823 while (iw < wnd->nwnd && bits) {
824 u32 tail, op;
825
826 if (unlikely(iw + 1 == wnd->nwnd))
827 wbits = wnd->bits_last;
828
829 tail = wbits - wbit;
830 op = min_t(u32, tail, bits);
831
832 if (wbits != wnd->free_bits[iw]) {
833 bool ret;
834 struct buffer_head *bh = wnd_map(wnd, iw);
835
836 if (IS_ERR(bh))
837 return false;
838
839 ret = are_bits_clear((ulong *)bh->b_data, wbit, op);
840
841 put_bh(bh);
842 if (!ret)
843 return false;
844 }
845
846 bits -= op;
847 wbit = 0;
848 iw += 1;
849 }
850
851 return true;
852 }
853
854 /*
855 * wnd_is_free
856 *
857 * Return: True if all clusters [bit, bit+bits) are free.
858 */
wnd_is_free(struct wnd_bitmap * wnd,size_t bit,size_t bits)859 bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits)
860 {
861 bool ret;
862 struct rb_node *n;
863 size_t end;
864 struct e_node *e;
865
866 if (RB_EMPTY_ROOT(&wnd->start_tree))
867 goto use_wnd;
868
869 n = rb_lookup(&wnd->start_tree, bit);
870 if (!n)
871 goto use_wnd;
872
873 e = rb_entry(n, struct e_node, start.node);
874
875 end = e->start.key + e->count.key;
876
877 if (bit < end && bit + bits <= end)
878 return true;
879
880 use_wnd:
881 ret = wnd_is_free_hlp(wnd, bit, bits);
882
883 return ret;
884 }
885
886 /*
887 * wnd_is_used
888 *
889 * Return: True if all clusters [bit, bit+bits) are used.
890 */
wnd_is_used(struct wnd_bitmap * wnd,size_t bit,size_t bits)891 bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits)
892 {
893 bool ret = false;
894 struct super_block *sb = wnd->sb;
895 size_t iw = bit >> (sb->s_blocksize_bits + 3);
896 u32 wbits = 8 * sb->s_blocksize;
897 u32 wbit = bit & (wbits - 1);
898 size_t end;
899 struct rb_node *n;
900 struct e_node *e;
901
902 if (RB_EMPTY_ROOT(&wnd->start_tree))
903 goto use_wnd;
904
905 end = bit + bits;
906 n = rb_lookup(&wnd->start_tree, end - 1);
907 if (!n)
908 goto use_wnd;
909
910 e = rb_entry(n, struct e_node, start.node);
911 if (e->start.key + e->count.key > bit)
912 return false;
913
914 use_wnd:
915 while (iw < wnd->nwnd && bits) {
916 u32 tail, op;
917
918 if (unlikely(iw + 1 == wnd->nwnd))
919 wbits = wnd->bits_last;
920
921 tail = wbits - wbit;
922 op = min_t(u32, tail, bits);
923
924 if (wnd->free_bits[iw]) {
925 bool ret;
926 struct buffer_head *bh = wnd_map(wnd, iw);
927
928 if (IS_ERR(bh))
929 goto out;
930
931 ret = are_bits_set((ulong *)bh->b_data, wbit, op);
932 put_bh(bh);
933 if (!ret)
934 goto out;
935 }
936
937 bits -= op;
938 wbit = 0;
939 iw += 1;
940 }
941 ret = true;
942
943 out:
944 return ret;
945 }
946
947 /*
948 * wnd_find - Look for free space.
949 *
950 * - flags - BITMAP_FIND_XXX flags
951 *
952 * Return: 0 if not found.
953 */
wnd_find(struct wnd_bitmap * wnd,size_t to_alloc,size_t hint,size_t flags,size_t * allocated)954 size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint,
955 size_t flags, size_t *allocated)
956 {
957 struct super_block *sb;
958 u32 wbits, wpos, wzbit, wzend;
959 size_t fnd, max_alloc, b_len, b_pos;
960 size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend;
961 size_t to_alloc0 = to_alloc;
962 const ulong *buf;
963 const struct e_node *e;
964 const struct rb_node *pr, *cr;
965 u8 log2_bits;
966 bool fbits_valid;
967 struct buffer_head *bh;
968
969 /* Fast checking for available free space. */
970 if (flags & BITMAP_FIND_FULL) {
971 size_t zeroes = wnd_zeroes(wnd);
972
973 zeroes -= wnd->zone_end - wnd->zone_bit;
974 if (zeroes < to_alloc0)
975 goto no_space;
976
977 if (to_alloc0 > wnd->extent_max)
978 goto no_space;
979 } else {
980 if (to_alloc > wnd->extent_max)
981 to_alloc = wnd->extent_max;
982 }
983
984 if (wnd->zone_bit <= hint && hint < wnd->zone_end)
985 hint = wnd->zone_end;
986
987 max_alloc = wnd->nbits;
988 b_len = b_pos = 0;
989
990 if (hint >= max_alloc)
991 hint = 0;
992
993 if (RB_EMPTY_ROOT(&wnd->start_tree)) {
994 if (wnd->uptodated == 1) {
995 /* Extents tree is updated -> No free space. */
996 goto no_space;
997 }
998 goto scan_bitmap;
999 }
1000
1001 e = NULL;
1002 if (!hint)
1003 goto allocate_biggest;
1004
1005 /* Use hint: Enumerate extents by start >= hint. */
1006 pr = NULL;
1007 cr = wnd->start_tree.rb_node;
1008
1009 for (;;) {
1010 e = rb_entry(cr, struct e_node, start.node);
1011
1012 if (e->start.key == hint)
1013 break;
1014
1015 if (e->start.key < hint) {
1016 pr = cr;
1017 cr = cr->rb_right;
1018 if (!cr)
1019 break;
1020 continue;
1021 }
1022
1023 cr = cr->rb_left;
1024 if (!cr) {
1025 e = pr ? rb_entry(pr, struct e_node, start.node) : NULL;
1026 break;
1027 }
1028 }
1029
1030 if (!e)
1031 goto allocate_biggest;
1032
1033 if (e->start.key + e->count.key > hint) {
1034 /* We have found extension with 'hint' inside. */
1035 size_t len = e->start.key + e->count.key - hint;
1036
1037 if (len >= to_alloc && hint + to_alloc <= max_alloc) {
1038 fnd = hint;
1039 goto found;
1040 }
1041
1042 if (!(flags & BITMAP_FIND_FULL)) {
1043 if (len > to_alloc)
1044 len = to_alloc;
1045
1046 if (hint + len <= max_alloc) {
1047 fnd = hint;
1048 to_alloc = len;
1049 goto found;
1050 }
1051 }
1052 }
1053
1054 allocate_biggest:
1055 /* Allocate from biggest free extent. */
1056 e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node);
1057 if (e->count.key != wnd->extent_max)
1058 wnd->extent_max = e->count.key;
1059
1060 if (e->count.key < max_alloc) {
1061 if (e->count.key >= to_alloc) {
1062 ;
1063 } else if (flags & BITMAP_FIND_FULL) {
1064 if (e->count.key < to_alloc0) {
1065 /* Biggest free block is less then requested. */
1066 goto no_space;
1067 }
1068 to_alloc = e->count.key;
1069 } else if (-1 != wnd->uptodated) {
1070 to_alloc = e->count.key;
1071 } else {
1072 /* Check if we can use more bits. */
1073 size_t op, max_check;
1074 struct rb_root start_tree;
1075
1076 memcpy(&start_tree, &wnd->start_tree,
1077 sizeof(struct rb_root));
1078 memset(&wnd->start_tree, 0, sizeof(struct rb_root));
1079
1080 max_check = e->start.key + to_alloc;
1081 if (max_check > max_alloc)
1082 max_check = max_alloc;
1083 for (op = e->start.key + e->count.key; op < max_check;
1084 op++) {
1085 if (!wnd_is_free(wnd, op, 1))
1086 break;
1087 }
1088 memcpy(&wnd->start_tree, &start_tree,
1089 sizeof(struct rb_root));
1090 to_alloc = op - e->start.key;
1091 }
1092
1093 /* Prepare to return. */
1094 fnd = e->start.key;
1095 if (e->start.key + to_alloc > max_alloc)
1096 to_alloc = max_alloc - e->start.key;
1097 goto found;
1098 }
1099
1100 if (wnd->uptodated == 1) {
1101 /* Extents tree is updated -> no free space. */
1102 goto no_space;
1103 }
1104
1105 b_len = e->count.key;
1106 b_pos = e->start.key;
1107
1108 scan_bitmap:
1109 sb = wnd->sb;
1110 log2_bits = sb->s_blocksize_bits + 3;
1111
1112 /* At most two ranges [hint, max_alloc) + [0, hint). */
1113 Again:
1114
1115 /* TODO: Optimize request for case nbits > wbits. */
1116 iw = hint >> log2_bits;
1117 wbits = sb->s_blocksize * 8;
1118 wpos = hint & (wbits - 1);
1119 prev_tail = 0;
1120 fbits_valid = true;
1121
1122 if (max_alloc == wnd->nbits) {
1123 nwnd = wnd->nwnd;
1124 } else {
1125 size_t t = max_alloc + wbits - 1;
1126
1127 nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd;
1128 }
1129
1130 /* Enumerate all windows. */
1131 for (; iw < nwnd; iw++) {
1132 wbit = iw << log2_bits;
1133
1134 if (!wnd->free_bits[iw]) {
1135 if (prev_tail > b_len) {
1136 b_pos = wbit - prev_tail;
1137 b_len = prev_tail;
1138 }
1139
1140 /* Skip full used window. */
1141 prev_tail = 0;
1142 wpos = 0;
1143 continue;
1144 }
1145
1146 if (unlikely(iw + 1 == nwnd)) {
1147 if (max_alloc == wnd->nbits) {
1148 wbits = wnd->bits_last;
1149 } else {
1150 size_t t = max_alloc & (wbits - 1);
1151
1152 if (t) {
1153 wbits = t;
1154 fbits_valid = false;
1155 }
1156 }
1157 }
1158
1159 if (wnd->zone_end > wnd->zone_bit) {
1160 ebit = wbit + wbits;
1161 zbit = max(wnd->zone_bit, wbit);
1162 zend = min(wnd->zone_end, ebit);
1163
1164 /* Here we have a window [wbit, ebit) and zone [zbit, zend). */
1165 if (zend <= zbit) {
1166 /* Zone does not overlap window. */
1167 } else {
1168 wzbit = zbit - wbit;
1169 wzend = zend - wbit;
1170
1171 /* Zone overlaps window. */
1172 if (wnd->free_bits[iw] == wzend - wzbit) {
1173 prev_tail = 0;
1174 wpos = 0;
1175 continue;
1176 }
1177
1178 /* Scan two ranges window: [wbit, zbit) and [zend, ebit). */
1179 bh = wnd_map(wnd, iw);
1180
1181 if (IS_ERR(bh)) {
1182 /* TODO: Error */
1183 prev_tail = 0;
1184 wpos = 0;
1185 continue;
1186 }
1187
1188 buf = (ulong *)bh->b_data;
1189
1190 /* Scan range [wbit, zbit). */
1191 if (wpos < wzbit) {
1192 /* Scan range [wpos, zbit). */
1193 fnd = wnd_scan(buf, wbit, wpos, wzbit,
1194 to_alloc, &prev_tail,
1195 &b_pos, &b_len);
1196 if (fnd != MINUS_ONE_T) {
1197 put_bh(bh);
1198 goto found;
1199 }
1200 }
1201
1202 prev_tail = 0;
1203
1204 /* Scan range [zend, ebit). */
1205 if (wzend < wbits) {
1206 fnd = wnd_scan(buf, wbit,
1207 max(wzend, wpos), wbits,
1208 to_alloc, &prev_tail,
1209 &b_pos, &b_len);
1210 if (fnd != MINUS_ONE_T) {
1211 put_bh(bh);
1212 goto found;
1213 }
1214 }
1215
1216 wpos = 0;
1217 put_bh(bh);
1218 continue;
1219 }
1220 }
1221
1222 /* Current window does not overlap zone. */
1223 if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) {
1224 /* Window is empty. */
1225 if (prev_tail + wbits >= to_alloc) {
1226 fnd = wbit + wpos - prev_tail;
1227 goto found;
1228 }
1229
1230 /* Increase 'prev_tail' and process next window. */
1231 prev_tail += wbits;
1232 wpos = 0;
1233 continue;
1234 }
1235
1236 /* Read window. */
1237 bh = wnd_map(wnd, iw);
1238 if (IS_ERR(bh)) {
1239 // TODO: Error.
1240 prev_tail = 0;
1241 wpos = 0;
1242 continue;
1243 }
1244
1245 buf = (ulong *)bh->b_data;
1246
1247 /* Scan range [wpos, eBits). */
1248 fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail,
1249 &b_pos, &b_len);
1250 put_bh(bh);
1251 if (fnd != MINUS_ONE_T)
1252 goto found;
1253 }
1254
1255 if (b_len < prev_tail) {
1256 /* The last fragment. */
1257 b_len = prev_tail;
1258 b_pos = max_alloc - prev_tail;
1259 }
1260
1261 if (hint) {
1262 /*
1263 * We have scanned range [hint max_alloc).
1264 * Prepare to scan range [0 hint + to_alloc).
1265 */
1266 size_t nextmax = hint + to_alloc;
1267
1268 if (likely(nextmax >= hint) && nextmax < max_alloc)
1269 max_alloc = nextmax;
1270 hint = 0;
1271 goto Again;
1272 }
1273
1274 if (!b_len)
1275 goto no_space;
1276
1277 wnd->extent_max = b_len;
1278
1279 if (flags & BITMAP_FIND_FULL)
1280 goto no_space;
1281
1282 fnd = b_pos;
1283 to_alloc = b_len;
1284
1285 found:
1286 if (flags & BITMAP_FIND_MARK_AS_USED) {
1287 /* TODO: Optimize remove extent (pass 'e'?). */
1288 if (wnd_set_used(wnd, fnd, to_alloc))
1289 goto no_space;
1290 } else if (wnd->extent_max != MINUS_ONE_T &&
1291 to_alloc > wnd->extent_max) {
1292 wnd->extent_max = to_alloc;
1293 }
1294
1295 *allocated = fnd;
1296 return to_alloc;
1297
1298 no_space:
1299 return 0;
1300 }
1301
1302 /*
1303 * wnd_extend - Extend bitmap ($MFT bitmap).
1304 */
wnd_extend(struct wnd_bitmap * wnd,size_t new_bits)1305 int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
1306 {
1307 int err;
1308 struct super_block *sb = wnd->sb;
1309 struct ntfs_sb_info *sbi = sb->s_fs_info;
1310 u32 blocksize = sb->s_blocksize;
1311 u32 wbits = blocksize * 8;
1312 u32 b0, new_last;
1313 size_t bits, iw, new_wnd;
1314 size_t old_bits = wnd->nbits;
1315 u16 *new_free;
1316
1317 if (new_bits <= old_bits)
1318 return -EINVAL;
1319
1320 /* Align to 8 byte boundary. */
1321 new_wnd = bytes_to_block(sb, bitmap_size(new_bits));
1322 new_last = new_bits & (wbits - 1);
1323 if (!new_last)
1324 new_last = wbits;
1325
1326 if (new_wnd != wnd->nwnd) {
1327 new_free = kmalloc(new_wnd * sizeof(u16), GFP_NOFS);
1328 if (!new_free)
1329 return -ENOMEM;
1330
1331 memcpy(new_free, wnd->free_bits, wnd->nwnd * sizeof(short));
1332 memset(new_free + wnd->nwnd, 0,
1333 (new_wnd - wnd->nwnd) * sizeof(short));
1334 kfree(wnd->free_bits);
1335 wnd->free_bits = new_free;
1336 }
1337
1338 /* Zero bits [old_bits,new_bits). */
1339 bits = new_bits - old_bits;
1340 b0 = old_bits & (wbits - 1);
1341
1342 for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) {
1343 u32 op;
1344 size_t frb;
1345 u64 vbo, lbo, bytes;
1346 struct buffer_head *bh;
1347 ulong *buf;
1348
1349 if (iw + 1 == new_wnd)
1350 wbits = new_last;
1351
1352 op = b0 + bits > wbits ? wbits - b0 : bits;
1353 vbo = (u64)iw * blocksize;
1354
1355 err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes);
1356 if (err)
1357 break;
1358
1359 bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits);
1360 if (!bh)
1361 return -EIO;
1362
1363 lock_buffer(bh);
1364 buf = (ulong *)bh->b_data;
1365
1366 __bitmap_clear(buf, b0, blocksize * 8 - b0);
1367 frb = wbits - bitmap_weight(buf, wbits);
1368 wnd->total_zeroes += frb - wnd->free_bits[iw];
1369 wnd->free_bits[iw] = frb;
1370
1371 set_buffer_uptodate(bh);
1372 mark_buffer_dirty(bh);
1373 unlock_buffer(bh);
1374 /* err = sync_dirty_buffer(bh); */
1375
1376 b0 = 0;
1377 bits -= op;
1378 }
1379
1380 wnd->nbits = new_bits;
1381 wnd->nwnd = new_wnd;
1382 wnd->bits_last = new_last;
1383
1384 wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false);
1385
1386 return 0;
1387 }
1388
wnd_zone_set(struct wnd_bitmap * wnd,size_t lcn,size_t len)1389 void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len)
1390 {
1391 size_t zlen = wnd->zone_end - wnd->zone_bit;
1392
1393 if (zlen)
1394 wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false);
1395
1396 if (!RB_EMPTY_ROOT(&wnd->start_tree) && len)
1397 wnd_remove_free_ext(wnd, lcn, len);
1398
1399 wnd->zone_bit = lcn;
1400 wnd->zone_end = lcn + len;
1401 }
1402
ntfs_trim_fs(struct ntfs_sb_info * sbi,struct fstrim_range * range)1403 int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range)
1404 {
1405 int err = 0;
1406 struct super_block *sb = sbi->sb;
1407 struct wnd_bitmap *wnd = &sbi->used.bitmap;
1408 u32 wbits = 8 * sb->s_blocksize;
1409 CLST len = 0, lcn = 0, done = 0;
1410 CLST minlen = bytes_to_cluster(sbi, range->minlen);
1411 CLST lcn_from = bytes_to_cluster(sbi, range->start);
1412 size_t iw = lcn_from >> (sb->s_blocksize_bits + 3);
1413 u32 wbit = lcn_from & (wbits - 1);
1414 const ulong *buf;
1415 CLST lcn_to;
1416
1417 if (!minlen)
1418 minlen = 1;
1419
1420 if (range->len == (u64)-1)
1421 lcn_to = wnd->nbits;
1422 else
1423 lcn_to = bytes_to_cluster(sbi, range->start + range->len);
1424
1425 down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
1426
1427 for (; iw < wnd->nwnd; iw++, wbit = 0) {
1428 CLST lcn_wnd = iw * wbits;
1429 struct buffer_head *bh;
1430
1431 if (lcn_wnd > lcn_to)
1432 break;
1433
1434 if (!wnd->free_bits[iw])
1435 continue;
1436
1437 if (iw + 1 == wnd->nwnd)
1438 wbits = wnd->bits_last;
1439
1440 if (lcn_wnd + wbits > lcn_to)
1441 wbits = lcn_to - lcn_wnd;
1442
1443 bh = wnd_map(wnd, iw);
1444 if (IS_ERR(bh)) {
1445 err = PTR_ERR(bh);
1446 break;
1447 }
1448
1449 buf = (ulong *)bh->b_data;
1450
1451 for (; wbit < wbits; wbit++) {
1452 if (!test_bit(wbit, buf)) {
1453 if (!len)
1454 lcn = lcn_wnd + wbit;
1455 len += 1;
1456 continue;
1457 }
1458 if (len >= minlen) {
1459 err = ntfs_discard(sbi, lcn, len);
1460 if (err)
1461 goto out;
1462 done += len;
1463 }
1464 len = 0;
1465 }
1466 put_bh(bh);
1467 }
1468
1469 /* Process the last fragment. */
1470 if (len >= minlen) {
1471 err = ntfs_discard(sbi, lcn, len);
1472 if (err)
1473 goto out;
1474 done += len;
1475 }
1476
1477 out:
1478 range->len = (u64)done << sbi->cluster_bits;
1479
1480 up_read(&wnd->rw_lock);
1481
1482 return err;
1483 }
1484