1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include <linux/sort.h>
23 #include <linux/rcupdate.h>
24 #include <linux/kthread.h>
25 #include <linux/slab.h>
26 #include "compat.h"
27 #include "hash.h"
28 #include "ctree.h"
29 #include "disk-io.h"
30 #include "print-tree.h"
31 #include "transaction.h"
32 #include "volumes.h"
33 #include "locking.h"
34 #include "free-space-cache.h"
35 
36 /* control flags for do_chunk_alloc's force field
37  * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
38  * if we really need one.
39  *
40  * CHUNK_ALLOC_FORCE means it must try to allocate one
41  *
42  * CHUNK_ALLOC_LIMITED means to only try and allocate one
43  * if we have very few chunks already allocated.  This is
44  * used as part of the clustering code to help make sure
45  * we have a good pool of storage to cluster in, without
46  * filling the FS with empty chunks
47  *
48  */
49 enum {
50 	CHUNK_ALLOC_NO_FORCE = 0,
51 	CHUNK_ALLOC_FORCE = 1,
52 	CHUNK_ALLOC_LIMITED = 2,
53 };
54 
55 static int update_block_group(struct btrfs_trans_handle *trans,
56 			      struct btrfs_root *root,
57 			      u64 bytenr, u64 num_bytes, int alloc);
58 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
59 				struct btrfs_root *root,
60 				u64 bytenr, u64 num_bytes, u64 parent,
61 				u64 root_objectid, u64 owner_objectid,
62 				u64 owner_offset, int refs_to_drop,
63 				struct btrfs_delayed_extent_op *extra_op);
64 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
65 				    struct extent_buffer *leaf,
66 				    struct btrfs_extent_item *ei);
67 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
68 				      struct btrfs_root *root,
69 				      u64 parent, u64 root_objectid,
70 				      u64 flags, u64 owner, u64 offset,
71 				      struct btrfs_key *ins, int ref_mod);
72 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
73 				     struct btrfs_root *root,
74 				     u64 parent, u64 root_objectid,
75 				     u64 flags, struct btrfs_disk_key *key,
76 				     int level, struct btrfs_key *ins);
77 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
78 			  struct btrfs_root *extent_root, u64 alloc_bytes,
79 			  u64 flags, int force);
80 static int find_next_key(struct btrfs_path *path, int level,
81 			 struct btrfs_key *key);
82 static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
83 			    int dump_block_groups);
84 
85 static noinline int
block_group_cache_done(struct btrfs_block_group_cache * cache)86 block_group_cache_done(struct btrfs_block_group_cache *cache)
87 {
88 	smp_mb();
89 	return cache->cached == BTRFS_CACHE_FINISHED;
90 }
91 
block_group_bits(struct btrfs_block_group_cache * cache,u64 bits)92 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
93 {
94 	return (cache->flags & bits) == bits;
95 }
96 
btrfs_get_block_group(struct btrfs_block_group_cache * cache)97 void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
98 {
99 	atomic_inc(&cache->count);
100 }
101 
btrfs_put_block_group(struct btrfs_block_group_cache * cache)102 void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
103 {
104 	if (atomic_dec_and_test(&cache->count)) {
105 		WARN_ON(cache->pinned > 0);
106 		WARN_ON(cache->reserved > 0);
107 		WARN_ON(cache->reserved_pinned > 0);
108 		kfree(cache);
109 	}
110 }
111 
112 /*
113  * this adds the block group to the fs_info rb tree for the block group
114  * cache
115  */
btrfs_add_block_group_cache(struct btrfs_fs_info * info,struct btrfs_block_group_cache * block_group)116 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
117 				struct btrfs_block_group_cache *block_group)
118 {
119 	struct rb_node **p;
120 	struct rb_node *parent = NULL;
121 	struct btrfs_block_group_cache *cache;
122 
123 	spin_lock(&info->block_group_cache_lock);
124 	p = &info->block_group_cache_tree.rb_node;
125 
126 	while (*p) {
127 		parent = *p;
128 		cache = rb_entry(parent, struct btrfs_block_group_cache,
129 				 cache_node);
130 		if (block_group->key.objectid < cache->key.objectid) {
131 			p = &(*p)->rb_left;
132 		} else if (block_group->key.objectid > cache->key.objectid) {
133 			p = &(*p)->rb_right;
134 		} else {
135 			spin_unlock(&info->block_group_cache_lock);
136 			return -EEXIST;
137 		}
138 	}
139 
140 	rb_link_node(&block_group->cache_node, parent, p);
141 	rb_insert_color(&block_group->cache_node,
142 			&info->block_group_cache_tree);
143 	spin_unlock(&info->block_group_cache_lock);
144 
145 	return 0;
146 }
147 
148 /*
149  * This will return the block group at or after bytenr if contains is 0, else
150  * it will return the block group that contains the bytenr
151  */
152 static struct btrfs_block_group_cache *
block_group_cache_tree_search(struct btrfs_fs_info * info,u64 bytenr,int contains)153 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
154 			      int contains)
155 {
156 	struct btrfs_block_group_cache *cache, *ret = NULL;
157 	struct rb_node *n;
158 	u64 end, start;
159 
160 	spin_lock(&info->block_group_cache_lock);
161 	n = info->block_group_cache_tree.rb_node;
162 
163 	while (n) {
164 		cache = rb_entry(n, struct btrfs_block_group_cache,
165 				 cache_node);
166 		end = cache->key.objectid + cache->key.offset - 1;
167 		start = cache->key.objectid;
168 
169 		if (bytenr < start) {
170 			if (!contains && (!ret || start < ret->key.objectid))
171 				ret = cache;
172 			n = n->rb_left;
173 		} else if (bytenr > start) {
174 			if (contains && bytenr <= end) {
175 				ret = cache;
176 				break;
177 			}
178 			n = n->rb_right;
179 		} else {
180 			ret = cache;
181 			break;
182 		}
183 	}
184 	if (ret)
185 		btrfs_get_block_group(ret);
186 	spin_unlock(&info->block_group_cache_lock);
187 
188 	return ret;
189 }
190 
add_excluded_extent(struct btrfs_root * root,u64 start,u64 num_bytes)191 static int add_excluded_extent(struct btrfs_root *root,
192 			       u64 start, u64 num_bytes)
193 {
194 	u64 end = start + num_bytes - 1;
195 	set_extent_bits(&root->fs_info->freed_extents[0],
196 			start, end, EXTENT_UPTODATE, GFP_NOFS);
197 	set_extent_bits(&root->fs_info->freed_extents[1],
198 			start, end, EXTENT_UPTODATE, GFP_NOFS);
199 	return 0;
200 }
201 
free_excluded_extents(struct btrfs_root * root,struct btrfs_block_group_cache * cache)202 static void free_excluded_extents(struct btrfs_root *root,
203 				  struct btrfs_block_group_cache *cache)
204 {
205 	u64 start, end;
206 
207 	start = cache->key.objectid;
208 	end = start + cache->key.offset - 1;
209 
210 	clear_extent_bits(&root->fs_info->freed_extents[0],
211 			  start, end, EXTENT_UPTODATE, GFP_NOFS);
212 	clear_extent_bits(&root->fs_info->freed_extents[1],
213 			  start, end, EXTENT_UPTODATE, GFP_NOFS);
214 }
215 
exclude_super_stripes(struct btrfs_root * root,struct btrfs_block_group_cache * cache)216 static int exclude_super_stripes(struct btrfs_root *root,
217 				 struct btrfs_block_group_cache *cache)
218 {
219 	u64 bytenr;
220 	u64 *logical;
221 	int stripe_len;
222 	int i, nr, ret;
223 
224 	if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
225 		stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
226 		cache->bytes_super += stripe_len;
227 		ret = add_excluded_extent(root, cache->key.objectid,
228 					  stripe_len);
229 		BUG_ON(ret);
230 	}
231 
232 	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
233 		bytenr = btrfs_sb_offset(i);
234 		ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
235 				       cache->key.objectid, bytenr,
236 				       0, &logical, &nr, &stripe_len);
237 		BUG_ON(ret);
238 
239 		while (nr--) {
240 			cache->bytes_super += stripe_len;
241 			ret = add_excluded_extent(root, logical[nr],
242 						  stripe_len);
243 			BUG_ON(ret);
244 		}
245 
246 		kfree(logical);
247 	}
248 	return 0;
249 }
250 
251 static struct btrfs_caching_control *
get_caching_control(struct btrfs_block_group_cache * cache)252 get_caching_control(struct btrfs_block_group_cache *cache)
253 {
254 	struct btrfs_caching_control *ctl;
255 
256 	spin_lock(&cache->lock);
257 	if (cache->cached != BTRFS_CACHE_STARTED) {
258 		spin_unlock(&cache->lock);
259 		return NULL;
260 	}
261 
262 	/* We're loading it the fast way, so we don't have a caching_ctl. */
263 	if (!cache->caching_ctl) {
264 		spin_unlock(&cache->lock);
265 		return NULL;
266 	}
267 
268 	ctl = cache->caching_ctl;
269 	atomic_inc(&ctl->count);
270 	spin_unlock(&cache->lock);
271 	return ctl;
272 }
273 
put_caching_control(struct btrfs_caching_control * ctl)274 static void put_caching_control(struct btrfs_caching_control *ctl)
275 {
276 	if (atomic_dec_and_test(&ctl->count))
277 		kfree(ctl);
278 }
279 
280 /*
281  * this is only called by cache_block_group, since we could have freed extents
282  * we need to check the pinned_extents for any extents that can't be used yet
283  * since their free space will be released as soon as the transaction commits.
284  */
add_new_free_space(struct btrfs_block_group_cache * block_group,struct btrfs_fs_info * info,u64 start,u64 end)285 static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
286 			      struct btrfs_fs_info *info, u64 start, u64 end)
287 {
288 	u64 extent_start, extent_end, size, total_added = 0;
289 	int ret;
290 
291 	while (start < end) {
292 		ret = find_first_extent_bit(info->pinned_extents, start,
293 					    &extent_start, &extent_end,
294 					    EXTENT_DIRTY | EXTENT_UPTODATE);
295 		if (ret)
296 			break;
297 
298 		if (extent_start <= start) {
299 			start = extent_end + 1;
300 		} else if (extent_start > start && extent_start < end) {
301 			size = extent_start - start;
302 			total_added += size;
303 			ret = btrfs_add_free_space(block_group, start,
304 						   size);
305 			BUG_ON(ret);
306 			start = extent_end + 1;
307 		} else {
308 			break;
309 		}
310 	}
311 
312 	if (start < end) {
313 		size = end - start;
314 		total_added += size;
315 		ret = btrfs_add_free_space(block_group, start, size);
316 		BUG_ON(ret);
317 	}
318 
319 	return total_added;
320 }
321 
caching_kthread(void * data)322 static int caching_kthread(void *data)
323 {
324 	struct btrfs_block_group_cache *block_group = data;
325 	struct btrfs_fs_info *fs_info = block_group->fs_info;
326 	struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
327 	struct btrfs_root *extent_root = fs_info->extent_root;
328 	struct btrfs_path *path;
329 	struct extent_buffer *leaf;
330 	struct btrfs_key key;
331 	u64 total_found = 0;
332 	u64 last = 0;
333 	u32 nritems;
334 	int ret = 0;
335 
336 	path = btrfs_alloc_path();
337 	if (!path)
338 		return -ENOMEM;
339 
340 	last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
341 
342 	/*
343 	 * We don't want to deadlock with somebody trying to allocate a new
344 	 * extent for the extent root while also trying to search the extent
345 	 * root to add free space.  So we skip locking and search the commit
346 	 * root, since its read-only
347 	 */
348 	path->skip_locking = 1;
349 	path->search_commit_root = 1;
350 	path->reada = 2;
351 
352 	key.objectid = last;
353 	key.offset = 0;
354 	key.type = BTRFS_EXTENT_ITEM_KEY;
355 again:
356 	mutex_lock(&caching_ctl->mutex);
357 	/* need to make sure the commit_root doesn't disappear */
358 	down_read(&fs_info->extent_commit_sem);
359 
360 	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
361 	if (ret < 0)
362 		goto err;
363 
364 	leaf = path->nodes[0];
365 	nritems = btrfs_header_nritems(leaf);
366 
367 	while (1) {
368 		smp_mb();
369 		if (fs_info->closing > 1) {
370 			last = (u64)-1;
371 			break;
372 		}
373 
374 		if (path->slots[0] < nritems) {
375 			btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
376 		} else {
377 			ret = find_next_key(path, 0, &key);
378 			if (ret)
379 				break;
380 
381 			caching_ctl->progress = last;
382 			btrfs_release_path(extent_root, path);
383 			up_read(&fs_info->extent_commit_sem);
384 			mutex_unlock(&caching_ctl->mutex);
385 			if (btrfs_transaction_in_commit(fs_info))
386 				schedule_timeout(1);
387 			else
388 				cond_resched();
389 			goto again;
390 		}
391 
392 		if (key.objectid < block_group->key.objectid) {
393 			path->slots[0]++;
394 			continue;
395 		}
396 
397 		if (key.objectid >= block_group->key.objectid +
398 		    block_group->key.offset)
399 			break;
400 
401 		if (key.type == BTRFS_EXTENT_ITEM_KEY) {
402 			total_found += add_new_free_space(block_group,
403 							  fs_info, last,
404 							  key.objectid);
405 			last = key.objectid + key.offset;
406 
407 			if (total_found > (1024 * 1024 * 2)) {
408 				total_found = 0;
409 				wake_up(&caching_ctl->wait);
410 			}
411 		}
412 		path->slots[0]++;
413 	}
414 	ret = 0;
415 
416 	total_found += add_new_free_space(block_group, fs_info, last,
417 					  block_group->key.objectid +
418 					  block_group->key.offset);
419 	caching_ctl->progress = (u64)-1;
420 
421 	spin_lock(&block_group->lock);
422 	block_group->caching_ctl = NULL;
423 	block_group->cached = BTRFS_CACHE_FINISHED;
424 	spin_unlock(&block_group->lock);
425 
426 err:
427 	btrfs_free_path(path);
428 	up_read(&fs_info->extent_commit_sem);
429 
430 	free_excluded_extents(extent_root, block_group);
431 
432 	mutex_unlock(&caching_ctl->mutex);
433 	wake_up(&caching_ctl->wait);
434 
435 	put_caching_control(caching_ctl);
436 	atomic_dec(&block_group->space_info->caching_threads);
437 	btrfs_put_block_group(block_group);
438 
439 	return 0;
440 }
441 
cache_block_group(struct btrfs_block_group_cache * cache,struct btrfs_trans_handle * trans,struct btrfs_root * root,int load_cache_only)442 static int cache_block_group(struct btrfs_block_group_cache *cache,
443 			     struct btrfs_trans_handle *trans,
444 			     struct btrfs_root *root,
445 			     int load_cache_only)
446 {
447 	struct btrfs_fs_info *fs_info = cache->fs_info;
448 	struct btrfs_caching_control *caching_ctl;
449 	struct task_struct *tsk;
450 	int ret = 0;
451 
452 	smp_mb();
453 	if (cache->cached != BTRFS_CACHE_NO)
454 		return 0;
455 
456 	/*
457 	 * We can't do the read from on-disk cache during a commit since we need
458 	 * to have the normal tree locking.  Also if we are currently trying to
459 	 * allocate blocks for the tree root we can't do the fast caching since
460 	 * we likely hold important locks.
461 	 */
462 	if (trans && (!trans->transaction->in_commit) &&
463 	    (root && root != root->fs_info->tree_root)) {
464 		spin_lock(&cache->lock);
465 		if (cache->cached != BTRFS_CACHE_NO) {
466 			spin_unlock(&cache->lock);
467 			return 0;
468 		}
469 		cache->cached = BTRFS_CACHE_STARTED;
470 		spin_unlock(&cache->lock);
471 
472 		ret = load_free_space_cache(fs_info, cache);
473 
474 		spin_lock(&cache->lock);
475 		if (ret == 1) {
476 			cache->cached = BTRFS_CACHE_FINISHED;
477 			cache->last_byte_to_unpin = (u64)-1;
478 		} else {
479 			cache->cached = BTRFS_CACHE_NO;
480 		}
481 		spin_unlock(&cache->lock);
482 		if (ret == 1) {
483 			free_excluded_extents(fs_info->extent_root, cache);
484 			return 0;
485 		}
486 	}
487 
488 	if (load_cache_only)
489 		return 0;
490 
491 	caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
492 	BUG_ON(!caching_ctl);
493 
494 	INIT_LIST_HEAD(&caching_ctl->list);
495 	mutex_init(&caching_ctl->mutex);
496 	init_waitqueue_head(&caching_ctl->wait);
497 	caching_ctl->block_group = cache;
498 	caching_ctl->progress = cache->key.objectid;
499 	/* one for caching kthread, one for caching block group list */
500 	atomic_set(&caching_ctl->count, 2);
501 
502 	spin_lock(&cache->lock);
503 	if (cache->cached != BTRFS_CACHE_NO) {
504 		spin_unlock(&cache->lock);
505 		kfree(caching_ctl);
506 		return 0;
507 	}
508 	cache->caching_ctl = caching_ctl;
509 	cache->cached = BTRFS_CACHE_STARTED;
510 	spin_unlock(&cache->lock);
511 
512 	down_write(&fs_info->extent_commit_sem);
513 	list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
514 	up_write(&fs_info->extent_commit_sem);
515 
516 	atomic_inc(&cache->space_info->caching_threads);
517 	btrfs_get_block_group(cache);
518 
519 	tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
520 			  cache->key.objectid);
521 	if (IS_ERR(tsk)) {
522 		ret = PTR_ERR(tsk);
523 		printk(KERN_ERR "error running thread %d\n", ret);
524 		BUG();
525 	}
526 
527 	return ret;
528 }
529 
530 /*
531  * return the block group that starts at or after bytenr
532  */
533 static struct btrfs_block_group_cache *
btrfs_lookup_first_block_group(struct btrfs_fs_info * info,u64 bytenr)534 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
535 {
536 	struct btrfs_block_group_cache *cache;
537 
538 	cache = block_group_cache_tree_search(info, bytenr, 0);
539 
540 	return cache;
541 }
542 
543 /*
544  * return the block group that contains the given bytenr
545  */
btrfs_lookup_block_group(struct btrfs_fs_info * info,u64 bytenr)546 struct btrfs_block_group_cache *btrfs_lookup_block_group(
547 						 struct btrfs_fs_info *info,
548 						 u64 bytenr)
549 {
550 	struct btrfs_block_group_cache *cache;
551 
552 	cache = block_group_cache_tree_search(info, bytenr, 1);
553 
554 	return cache;
555 }
556 
__find_space_info(struct btrfs_fs_info * info,u64 flags)557 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
558 						  u64 flags)
559 {
560 	struct list_head *head = &info->space_info;
561 	struct btrfs_space_info *found;
562 
563 	flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM |
564 		 BTRFS_BLOCK_GROUP_METADATA;
565 
566 	rcu_read_lock();
567 	list_for_each_entry_rcu(found, head, list) {
568 		if (found->flags & flags) {
569 			rcu_read_unlock();
570 			return found;
571 		}
572 	}
573 	rcu_read_unlock();
574 	return NULL;
575 }
576 
577 /*
578  * after adding space to the filesystem, we need to clear the full flags
579  * on all the space infos.
580  */
btrfs_clear_space_info_full(struct btrfs_fs_info * info)581 void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
582 {
583 	struct list_head *head = &info->space_info;
584 	struct btrfs_space_info *found;
585 
586 	rcu_read_lock();
587 	list_for_each_entry_rcu(found, head, list)
588 		found->full = 0;
589 	rcu_read_unlock();
590 }
591 
div_factor(u64 num,int factor)592 static u64 div_factor(u64 num, int factor)
593 {
594 	if (factor == 10)
595 		return num;
596 	num *= factor;
597 	do_div(num, 10);
598 	return num;
599 }
600 
div_factor_fine(u64 num,int factor)601 static u64 div_factor_fine(u64 num, int factor)
602 {
603 	if (factor == 100)
604 		return num;
605 	num *= factor;
606 	do_div(num, 100);
607 	return num;
608 }
609 
btrfs_find_block_group(struct btrfs_root * root,u64 search_start,u64 search_hint,int owner)610 u64 btrfs_find_block_group(struct btrfs_root *root,
611 			   u64 search_start, u64 search_hint, int owner)
612 {
613 	struct btrfs_block_group_cache *cache;
614 	u64 used;
615 	u64 last = max(search_hint, search_start);
616 	u64 group_start = 0;
617 	int full_search = 0;
618 	int factor = 9;
619 	int wrapped = 0;
620 again:
621 	while (1) {
622 		cache = btrfs_lookup_first_block_group(root->fs_info, last);
623 		if (!cache)
624 			break;
625 
626 		spin_lock(&cache->lock);
627 		last = cache->key.objectid + cache->key.offset;
628 		used = btrfs_block_group_used(&cache->item);
629 
630 		if ((full_search || !cache->ro) &&
631 		    block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
632 			if (used + cache->pinned + cache->reserved <
633 			    div_factor(cache->key.offset, factor)) {
634 				group_start = cache->key.objectid;
635 				spin_unlock(&cache->lock);
636 				btrfs_put_block_group(cache);
637 				goto found;
638 			}
639 		}
640 		spin_unlock(&cache->lock);
641 		btrfs_put_block_group(cache);
642 		cond_resched();
643 	}
644 	if (!wrapped) {
645 		last = search_start;
646 		wrapped = 1;
647 		goto again;
648 	}
649 	if (!full_search && factor < 10) {
650 		last = search_start;
651 		full_search = 1;
652 		factor = 10;
653 		goto again;
654 	}
655 found:
656 	return group_start;
657 }
658 
659 /* simple helper to search for an existing extent at a given offset */
btrfs_lookup_extent(struct btrfs_root * root,u64 start,u64 len)660 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
661 {
662 	int ret;
663 	struct btrfs_key key;
664 	struct btrfs_path *path;
665 
666 	path = btrfs_alloc_path();
667 	BUG_ON(!path);
668 	key.objectid = start;
669 	key.offset = len;
670 	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
671 	ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
672 				0, 0);
673 	btrfs_free_path(path);
674 	return ret;
675 }
676 
677 /*
678  * helper function to lookup reference count and flags of extent.
679  *
680  * the head node for delayed ref is used to store the sum of all the
681  * reference count modifications queued up in the rbtree. the head
682  * node may also store the extent flags to set. This way you can check
683  * to see what the reference count and extent flags would be if all of
684  * the delayed refs are not processed.
685  */
btrfs_lookup_extent_info(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytenr,u64 num_bytes,u64 * refs,u64 * flags)686 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
687 			     struct btrfs_root *root, u64 bytenr,
688 			     u64 num_bytes, u64 *refs, u64 *flags)
689 {
690 	struct btrfs_delayed_ref_head *head;
691 	struct btrfs_delayed_ref_root *delayed_refs;
692 	struct btrfs_path *path;
693 	struct btrfs_extent_item *ei;
694 	struct extent_buffer *leaf;
695 	struct btrfs_key key;
696 	u32 item_size;
697 	u64 num_refs;
698 	u64 extent_flags;
699 	int ret;
700 
701 	path = btrfs_alloc_path();
702 	if (!path)
703 		return -ENOMEM;
704 
705 	key.objectid = bytenr;
706 	key.type = BTRFS_EXTENT_ITEM_KEY;
707 	key.offset = num_bytes;
708 	if (!trans) {
709 		path->skip_locking = 1;
710 		path->search_commit_root = 1;
711 	}
712 again:
713 	ret = btrfs_search_slot(trans, root->fs_info->extent_root,
714 				&key, path, 0, 0);
715 	if (ret < 0)
716 		goto out_free;
717 
718 	if (ret == 0) {
719 		leaf = path->nodes[0];
720 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
721 		if (item_size >= sizeof(*ei)) {
722 			ei = btrfs_item_ptr(leaf, path->slots[0],
723 					    struct btrfs_extent_item);
724 			num_refs = btrfs_extent_refs(leaf, ei);
725 			extent_flags = btrfs_extent_flags(leaf, ei);
726 		} else {
727 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
728 			struct btrfs_extent_item_v0 *ei0;
729 			BUG_ON(item_size != sizeof(*ei0));
730 			ei0 = btrfs_item_ptr(leaf, path->slots[0],
731 					     struct btrfs_extent_item_v0);
732 			num_refs = btrfs_extent_refs_v0(leaf, ei0);
733 			/* FIXME: this isn't correct for data */
734 			extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
735 #else
736 			BUG();
737 #endif
738 		}
739 		BUG_ON(num_refs == 0);
740 	} else {
741 		num_refs = 0;
742 		extent_flags = 0;
743 		ret = 0;
744 	}
745 
746 	if (!trans)
747 		goto out;
748 
749 	delayed_refs = &trans->transaction->delayed_refs;
750 	spin_lock(&delayed_refs->lock);
751 	head = btrfs_find_delayed_ref_head(trans, bytenr);
752 	if (head) {
753 		if (!mutex_trylock(&head->mutex)) {
754 			atomic_inc(&head->node.refs);
755 			spin_unlock(&delayed_refs->lock);
756 
757 			btrfs_release_path(root->fs_info->extent_root, path);
758 
759 			mutex_lock(&head->mutex);
760 			mutex_unlock(&head->mutex);
761 			btrfs_put_delayed_ref(&head->node);
762 			goto again;
763 		}
764 		if (head->extent_op && head->extent_op->update_flags)
765 			extent_flags |= head->extent_op->flags_to_set;
766 		else
767 			BUG_ON(num_refs == 0);
768 
769 		num_refs += head->node.ref_mod;
770 		mutex_unlock(&head->mutex);
771 	}
772 	spin_unlock(&delayed_refs->lock);
773 out:
774 	WARN_ON(num_refs == 0);
775 	if (refs)
776 		*refs = num_refs;
777 	if (flags)
778 		*flags = extent_flags;
779 out_free:
780 	btrfs_free_path(path);
781 	return ret;
782 }
783 
784 /*
785  * Back reference rules.  Back refs have three main goals:
786  *
787  * 1) differentiate between all holders of references to an extent so that
788  *    when a reference is dropped we can make sure it was a valid reference
789  *    before freeing the extent.
790  *
791  * 2) Provide enough information to quickly find the holders of an extent
792  *    if we notice a given block is corrupted or bad.
793  *
794  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
795  *    maintenance.  This is actually the same as #2, but with a slightly
796  *    different use case.
797  *
798  * There are two kinds of back refs. The implicit back refs is optimized
799  * for pointers in non-shared tree blocks. For a given pointer in a block,
800  * back refs of this kind provide information about the block's owner tree
801  * and the pointer's key. These information allow us to find the block by
802  * b-tree searching. The full back refs is for pointers in tree blocks not
803  * referenced by their owner trees. The location of tree block is recorded
804  * in the back refs. Actually the full back refs is generic, and can be
805  * used in all cases the implicit back refs is used. The major shortcoming
806  * of the full back refs is its overhead. Every time a tree block gets
807  * COWed, we have to update back refs entry for all pointers in it.
808  *
809  * For a newly allocated tree block, we use implicit back refs for
810  * pointers in it. This means most tree related operations only involve
811  * implicit back refs. For a tree block created in old transaction, the
812  * only way to drop a reference to it is COW it. So we can detect the
813  * event that tree block loses its owner tree's reference and do the
814  * back refs conversion.
815  *
816  * When a tree block is COW'd through a tree, there are four cases:
817  *
818  * The reference count of the block is one and the tree is the block's
819  * owner tree. Nothing to do in this case.
820  *
821  * The reference count of the block is one and the tree is not the
822  * block's owner tree. In this case, full back refs is used for pointers
823  * in the block. Remove these full back refs, add implicit back refs for
824  * every pointers in the new block.
825  *
826  * The reference count of the block is greater than one and the tree is
827  * the block's owner tree. In this case, implicit back refs is used for
828  * pointers in the block. Add full back refs for every pointers in the
829  * block, increase lower level extents' reference counts. The original
830  * implicit back refs are entailed to the new block.
831  *
832  * The reference count of the block is greater than one and the tree is
833  * not the block's owner tree. Add implicit back refs for every pointer in
834  * the new block, increase lower level extents' reference count.
835  *
836  * Back Reference Key composing:
837  *
838  * The key objectid corresponds to the first byte in the extent,
839  * The key type is used to differentiate between types of back refs.
840  * There are different meanings of the key offset for different types
841  * of back refs.
842  *
843  * File extents can be referenced by:
844  *
845  * - multiple snapshots, subvolumes, or different generations in one subvol
846  * - different files inside a single subvolume
847  * - different offsets inside a file (bookend extents in file.c)
848  *
849  * The extent ref structure for the implicit back refs has fields for:
850  *
851  * - Objectid of the subvolume root
852  * - objectid of the file holding the reference
853  * - original offset in the file
854  * - how many bookend extents
855  *
856  * The key offset for the implicit back refs is hash of the first
857  * three fields.
858  *
859  * The extent ref structure for the full back refs has field for:
860  *
861  * - number of pointers in the tree leaf
862  *
863  * The key offset for the implicit back refs is the first byte of
864  * the tree leaf
865  *
866  * When a file extent is allocated, The implicit back refs is used.
867  * the fields are filled in:
868  *
869  *     (root_key.objectid, inode objectid, offset in file, 1)
870  *
871  * When a file extent is removed file truncation, we find the
872  * corresponding implicit back refs and check the following fields:
873  *
874  *     (btrfs_header_owner(leaf), inode objectid, offset in file)
875  *
876  * Btree extents can be referenced by:
877  *
878  * - Different subvolumes
879  *
880  * Both the implicit back refs and the full back refs for tree blocks
881  * only consist of key. The key offset for the implicit back refs is
882  * objectid of block's owner tree. The key offset for the full back refs
883  * is the first byte of parent block.
884  *
885  * When implicit back refs is used, information about the lowest key and
886  * level of the tree block are required. These information are stored in
887  * tree block info structure.
888  */
889 
890 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
convert_extent_item_v0(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,u64 owner,u32 extra_size)891 static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
892 				  struct btrfs_root *root,
893 				  struct btrfs_path *path,
894 				  u64 owner, u32 extra_size)
895 {
896 	struct btrfs_extent_item *item;
897 	struct btrfs_extent_item_v0 *ei0;
898 	struct btrfs_extent_ref_v0 *ref0;
899 	struct btrfs_tree_block_info *bi;
900 	struct extent_buffer *leaf;
901 	struct btrfs_key key;
902 	struct btrfs_key found_key;
903 	u32 new_size = sizeof(*item);
904 	u64 refs;
905 	int ret;
906 
907 	leaf = path->nodes[0];
908 	BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0));
909 
910 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
911 	ei0 = btrfs_item_ptr(leaf, path->slots[0],
912 			     struct btrfs_extent_item_v0);
913 	refs = btrfs_extent_refs_v0(leaf, ei0);
914 
915 	if (owner == (u64)-1) {
916 		while (1) {
917 			if (path->slots[0] >= btrfs_header_nritems(leaf)) {
918 				ret = btrfs_next_leaf(root, path);
919 				if (ret < 0)
920 					return ret;
921 				BUG_ON(ret > 0);
922 				leaf = path->nodes[0];
923 			}
924 			btrfs_item_key_to_cpu(leaf, &found_key,
925 					      path->slots[0]);
926 			BUG_ON(key.objectid != found_key.objectid);
927 			if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) {
928 				path->slots[0]++;
929 				continue;
930 			}
931 			ref0 = btrfs_item_ptr(leaf, path->slots[0],
932 					      struct btrfs_extent_ref_v0);
933 			owner = btrfs_ref_objectid_v0(leaf, ref0);
934 			break;
935 		}
936 	}
937 	btrfs_release_path(root, path);
938 
939 	if (owner < BTRFS_FIRST_FREE_OBJECTID)
940 		new_size += sizeof(*bi);
941 
942 	new_size -= sizeof(*ei0);
943 	ret = btrfs_search_slot(trans, root, &key, path,
944 				new_size + extra_size, 1);
945 	if (ret < 0)
946 		return ret;
947 	BUG_ON(ret);
948 
949 	ret = btrfs_extend_item(trans, root, path, new_size);
950 	BUG_ON(ret);
951 
952 	leaf = path->nodes[0];
953 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
954 	btrfs_set_extent_refs(leaf, item, refs);
955 	/* FIXME: get real generation */
956 	btrfs_set_extent_generation(leaf, item, 0);
957 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
958 		btrfs_set_extent_flags(leaf, item,
959 				       BTRFS_EXTENT_FLAG_TREE_BLOCK |
960 				       BTRFS_BLOCK_FLAG_FULL_BACKREF);
961 		bi = (struct btrfs_tree_block_info *)(item + 1);
962 		/* FIXME: get first key of the block */
963 		memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi));
964 		btrfs_set_tree_block_level(leaf, bi, (int)owner);
965 	} else {
966 		btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA);
967 	}
968 	btrfs_mark_buffer_dirty(leaf);
969 	return 0;
970 }
971 #endif
972 
hash_extent_data_ref(u64 root_objectid,u64 owner,u64 offset)973 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
974 {
975 	u32 high_crc = ~(u32)0;
976 	u32 low_crc = ~(u32)0;
977 	__le64 lenum;
978 
979 	lenum = cpu_to_le64(root_objectid);
980 	high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
981 	lenum = cpu_to_le64(owner);
982 	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
983 	lenum = cpu_to_le64(offset);
984 	low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
985 
986 	return ((u64)high_crc << 31) ^ (u64)low_crc;
987 }
988 
hash_extent_data_ref_item(struct extent_buffer * leaf,struct btrfs_extent_data_ref * ref)989 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
990 				     struct btrfs_extent_data_ref *ref)
991 {
992 	return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
993 				    btrfs_extent_data_ref_objectid(leaf, ref),
994 				    btrfs_extent_data_ref_offset(leaf, ref));
995 }
996 
match_extent_data_ref(struct extent_buffer * leaf,struct btrfs_extent_data_ref * ref,u64 root_objectid,u64 owner,u64 offset)997 static int match_extent_data_ref(struct extent_buffer *leaf,
998 				 struct btrfs_extent_data_ref *ref,
999 				 u64 root_objectid, u64 owner, u64 offset)
1000 {
1001 	if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
1002 	    btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
1003 	    btrfs_extent_data_ref_offset(leaf, ref) != offset)
1004 		return 0;
1005 	return 1;
1006 }
1007 
lookup_extent_data_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,u64 bytenr,u64 parent,u64 root_objectid,u64 owner,u64 offset)1008 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
1009 					   struct btrfs_root *root,
1010 					   struct btrfs_path *path,
1011 					   u64 bytenr, u64 parent,
1012 					   u64 root_objectid,
1013 					   u64 owner, u64 offset)
1014 {
1015 	struct btrfs_key key;
1016 	struct btrfs_extent_data_ref *ref;
1017 	struct extent_buffer *leaf;
1018 	u32 nritems;
1019 	int ret;
1020 	int recow;
1021 	int err = -ENOENT;
1022 
1023 	key.objectid = bytenr;
1024 	if (parent) {
1025 		key.type = BTRFS_SHARED_DATA_REF_KEY;
1026 		key.offset = parent;
1027 	} else {
1028 		key.type = BTRFS_EXTENT_DATA_REF_KEY;
1029 		key.offset = hash_extent_data_ref(root_objectid,
1030 						  owner, offset);
1031 	}
1032 again:
1033 	recow = 0;
1034 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1035 	if (ret < 0) {
1036 		err = ret;
1037 		goto fail;
1038 	}
1039 
1040 	if (parent) {
1041 		if (!ret)
1042 			return 0;
1043 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1044 		key.type = BTRFS_EXTENT_REF_V0_KEY;
1045 		btrfs_release_path(root, path);
1046 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1047 		if (ret < 0) {
1048 			err = ret;
1049 			goto fail;
1050 		}
1051 		if (!ret)
1052 			return 0;
1053 #endif
1054 		goto fail;
1055 	}
1056 
1057 	leaf = path->nodes[0];
1058 	nritems = btrfs_header_nritems(leaf);
1059 	while (1) {
1060 		if (path->slots[0] >= nritems) {
1061 			ret = btrfs_next_leaf(root, path);
1062 			if (ret < 0)
1063 				err = ret;
1064 			if (ret)
1065 				goto fail;
1066 
1067 			leaf = path->nodes[0];
1068 			nritems = btrfs_header_nritems(leaf);
1069 			recow = 1;
1070 		}
1071 
1072 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1073 		if (key.objectid != bytenr ||
1074 		    key.type != BTRFS_EXTENT_DATA_REF_KEY)
1075 			goto fail;
1076 
1077 		ref = btrfs_item_ptr(leaf, path->slots[0],
1078 				     struct btrfs_extent_data_ref);
1079 
1080 		if (match_extent_data_ref(leaf, ref, root_objectid,
1081 					  owner, offset)) {
1082 			if (recow) {
1083 				btrfs_release_path(root, path);
1084 				goto again;
1085 			}
1086 			err = 0;
1087 			break;
1088 		}
1089 		path->slots[0]++;
1090 	}
1091 fail:
1092 	return err;
1093 }
1094 
insert_extent_data_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,u64 bytenr,u64 parent,u64 root_objectid,u64 owner,u64 offset,int refs_to_add)1095 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
1096 					   struct btrfs_root *root,
1097 					   struct btrfs_path *path,
1098 					   u64 bytenr, u64 parent,
1099 					   u64 root_objectid, u64 owner,
1100 					   u64 offset, int refs_to_add)
1101 {
1102 	struct btrfs_key key;
1103 	struct extent_buffer *leaf;
1104 	u32 size;
1105 	u32 num_refs;
1106 	int ret;
1107 
1108 	key.objectid = bytenr;
1109 	if (parent) {
1110 		key.type = BTRFS_SHARED_DATA_REF_KEY;
1111 		key.offset = parent;
1112 		size = sizeof(struct btrfs_shared_data_ref);
1113 	} else {
1114 		key.type = BTRFS_EXTENT_DATA_REF_KEY;
1115 		key.offset = hash_extent_data_ref(root_objectid,
1116 						  owner, offset);
1117 		size = sizeof(struct btrfs_extent_data_ref);
1118 	}
1119 
1120 	ret = btrfs_insert_empty_item(trans, root, path, &key, size);
1121 	if (ret && ret != -EEXIST)
1122 		goto fail;
1123 
1124 	leaf = path->nodes[0];
1125 	if (parent) {
1126 		struct btrfs_shared_data_ref *ref;
1127 		ref = btrfs_item_ptr(leaf, path->slots[0],
1128 				     struct btrfs_shared_data_ref);
1129 		if (ret == 0) {
1130 			btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
1131 		} else {
1132 			num_refs = btrfs_shared_data_ref_count(leaf, ref);
1133 			num_refs += refs_to_add;
1134 			btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
1135 		}
1136 	} else {
1137 		struct btrfs_extent_data_ref *ref;
1138 		while (ret == -EEXIST) {
1139 			ref = btrfs_item_ptr(leaf, path->slots[0],
1140 					     struct btrfs_extent_data_ref);
1141 			if (match_extent_data_ref(leaf, ref, root_objectid,
1142 						  owner, offset))
1143 				break;
1144 			btrfs_release_path(root, path);
1145 			key.offset++;
1146 			ret = btrfs_insert_empty_item(trans, root, path, &key,
1147 						      size);
1148 			if (ret && ret != -EEXIST)
1149 				goto fail;
1150 
1151 			leaf = path->nodes[0];
1152 		}
1153 		ref = btrfs_item_ptr(leaf, path->slots[0],
1154 				     struct btrfs_extent_data_ref);
1155 		if (ret == 0) {
1156 			btrfs_set_extent_data_ref_root(leaf, ref,
1157 						       root_objectid);
1158 			btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
1159 			btrfs_set_extent_data_ref_offset(leaf, ref, offset);
1160 			btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
1161 		} else {
1162 			num_refs = btrfs_extent_data_ref_count(leaf, ref);
1163 			num_refs += refs_to_add;
1164 			btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
1165 		}
1166 	}
1167 	btrfs_mark_buffer_dirty(leaf);
1168 	ret = 0;
1169 fail:
1170 	btrfs_release_path(root, path);
1171 	return ret;
1172 }
1173 
remove_extent_data_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,int refs_to_drop)1174 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
1175 					   struct btrfs_root *root,
1176 					   struct btrfs_path *path,
1177 					   int refs_to_drop)
1178 {
1179 	struct btrfs_key key;
1180 	struct btrfs_extent_data_ref *ref1 = NULL;
1181 	struct btrfs_shared_data_ref *ref2 = NULL;
1182 	struct extent_buffer *leaf;
1183 	u32 num_refs = 0;
1184 	int ret = 0;
1185 
1186 	leaf = path->nodes[0];
1187 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1188 
1189 	if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1190 		ref1 = btrfs_item_ptr(leaf, path->slots[0],
1191 				      struct btrfs_extent_data_ref);
1192 		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1193 	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1194 		ref2 = btrfs_item_ptr(leaf, path->slots[0],
1195 				      struct btrfs_shared_data_ref);
1196 		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1197 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1198 	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1199 		struct btrfs_extent_ref_v0 *ref0;
1200 		ref0 = btrfs_item_ptr(leaf, path->slots[0],
1201 				      struct btrfs_extent_ref_v0);
1202 		num_refs = btrfs_ref_count_v0(leaf, ref0);
1203 #endif
1204 	} else {
1205 		BUG();
1206 	}
1207 
1208 	BUG_ON(num_refs < refs_to_drop);
1209 	num_refs -= refs_to_drop;
1210 
1211 	if (num_refs == 0) {
1212 		ret = btrfs_del_item(trans, root, path);
1213 	} else {
1214 		if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
1215 			btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
1216 		else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
1217 			btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
1218 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1219 		else {
1220 			struct btrfs_extent_ref_v0 *ref0;
1221 			ref0 = btrfs_item_ptr(leaf, path->slots[0],
1222 					struct btrfs_extent_ref_v0);
1223 			btrfs_set_ref_count_v0(leaf, ref0, num_refs);
1224 		}
1225 #endif
1226 		btrfs_mark_buffer_dirty(leaf);
1227 	}
1228 	return ret;
1229 }
1230 
extent_data_ref_count(struct btrfs_root * root,struct btrfs_path * path,struct btrfs_extent_inline_ref * iref)1231 static noinline u32 extent_data_ref_count(struct btrfs_root *root,
1232 					  struct btrfs_path *path,
1233 					  struct btrfs_extent_inline_ref *iref)
1234 {
1235 	struct btrfs_key key;
1236 	struct extent_buffer *leaf;
1237 	struct btrfs_extent_data_ref *ref1;
1238 	struct btrfs_shared_data_ref *ref2;
1239 	u32 num_refs = 0;
1240 
1241 	leaf = path->nodes[0];
1242 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1243 	if (iref) {
1244 		if (btrfs_extent_inline_ref_type(leaf, iref) ==
1245 		    BTRFS_EXTENT_DATA_REF_KEY) {
1246 			ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
1247 			num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1248 		} else {
1249 			ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
1250 			num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1251 		}
1252 	} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
1253 		ref1 = btrfs_item_ptr(leaf, path->slots[0],
1254 				      struct btrfs_extent_data_ref);
1255 		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
1256 	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
1257 		ref2 = btrfs_item_ptr(leaf, path->slots[0],
1258 				      struct btrfs_shared_data_ref);
1259 		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
1260 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1261 	} else if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
1262 		struct btrfs_extent_ref_v0 *ref0;
1263 		ref0 = btrfs_item_ptr(leaf, path->slots[0],
1264 				      struct btrfs_extent_ref_v0);
1265 		num_refs = btrfs_ref_count_v0(leaf, ref0);
1266 #endif
1267 	} else {
1268 		WARN_ON(1);
1269 	}
1270 	return num_refs;
1271 }
1272 
lookup_tree_block_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,u64 bytenr,u64 parent,u64 root_objectid)1273 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
1274 					  struct btrfs_root *root,
1275 					  struct btrfs_path *path,
1276 					  u64 bytenr, u64 parent,
1277 					  u64 root_objectid)
1278 {
1279 	struct btrfs_key key;
1280 	int ret;
1281 
1282 	key.objectid = bytenr;
1283 	if (parent) {
1284 		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1285 		key.offset = parent;
1286 	} else {
1287 		key.type = BTRFS_TREE_BLOCK_REF_KEY;
1288 		key.offset = root_objectid;
1289 	}
1290 
1291 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1292 	if (ret > 0)
1293 		ret = -ENOENT;
1294 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1295 	if (ret == -ENOENT && parent) {
1296 		btrfs_release_path(root, path);
1297 		key.type = BTRFS_EXTENT_REF_V0_KEY;
1298 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1299 		if (ret > 0)
1300 			ret = -ENOENT;
1301 	}
1302 #endif
1303 	return ret;
1304 }
1305 
insert_tree_block_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,u64 bytenr,u64 parent,u64 root_objectid)1306 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
1307 					  struct btrfs_root *root,
1308 					  struct btrfs_path *path,
1309 					  u64 bytenr, u64 parent,
1310 					  u64 root_objectid)
1311 {
1312 	struct btrfs_key key;
1313 	int ret;
1314 
1315 	key.objectid = bytenr;
1316 	if (parent) {
1317 		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
1318 		key.offset = parent;
1319 	} else {
1320 		key.type = BTRFS_TREE_BLOCK_REF_KEY;
1321 		key.offset = root_objectid;
1322 	}
1323 
1324 	ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
1325 	btrfs_release_path(root, path);
1326 	return ret;
1327 }
1328 
extent_ref_type(u64 parent,u64 owner)1329 static inline int extent_ref_type(u64 parent, u64 owner)
1330 {
1331 	int type;
1332 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1333 		if (parent > 0)
1334 			type = BTRFS_SHARED_BLOCK_REF_KEY;
1335 		else
1336 			type = BTRFS_TREE_BLOCK_REF_KEY;
1337 	} else {
1338 		if (parent > 0)
1339 			type = BTRFS_SHARED_DATA_REF_KEY;
1340 		else
1341 			type = BTRFS_EXTENT_DATA_REF_KEY;
1342 	}
1343 	return type;
1344 }
1345 
find_next_key(struct btrfs_path * path,int level,struct btrfs_key * key)1346 static int find_next_key(struct btrfs_path *path, int level,
1347 			 struct btrfs_key *key)
1348 
1349 {
1350 	for (; level < BTRFS_MAX_LEVEL; level++) {
1351 		if (!path->nodes[level])
1352 			break;
1353 		if (path->slots[level] + 1 >=
1354 		    btrfs_header_nritems(path->nodes[level]))
1355 			continue;
1356 		if (level == 0)
1357 			btrfs_item_key_to_cpu(path->nodes[level], key,
1358 					      path->slots[level] + 1);
1359 		else
1360 			btrfs_node_key_to_cpu(path->nodes[level], key,
1361 					      path->slots[level] + 1);
1362 		return 0;
1363 	}
1364 	return 1;
1365 }
1366 
1367 /*
1368  * look for inline back ref. if back ref is found, *ref_ret is set
1369  * to the address of inline back ref, and 0 is returned.
1370  *
1371  * if back ref isn't found, *ref_ret is set to the address where it
1372  * should be inserted, and -ENOENT is returned.
1373  *
1374  * if insert is true and there are too many inline back refs, the path
1375  * points to the extent item, and -EAGAIN is returned.
1376  *
1377  * NOTE: inline back refs are ordered in the same way that back ref
1378  *	 items in the tree are ordered.
1379  */
1380 static noinline_for_stack
lookup_inline_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_extent_inline_ref ** ref_ret,u64 bytenr,u64 num_bytes,u64 parent,u64 root_objectid,u64 owner,u64 offset,int insert)1381 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
1382 				 struct btrfs_root *root,
1383 				 struct btrfs_path *path,
1384 				 struct btrfs_extent_inline_ref **ref_ret,
1385 				 u64 bytenr, u64 num_bytes,
1386 				 u64 parent, u64 root_objectid,
1387 				 u64 owner, u64 offset, int insert)
1388 {
1389 	struct btrfs_key key;
1390 	struct extent_buffer *leaf;
1391 	struct btrfs_extent_item *ei;
1392 	struct btrfs_extent_inline_ref *iref;
1393 	u64 flags;
1394 	u64 item_size;
1395 	unsigned long ptr;
1396 	unsigned long end;
1397 	int extra_size;
1398 	int type;
1399 	int want;
1400 	int ret;
1401 	int err = 0;
1402 
1403 	key.objectid = bytenr;
1404 	key.type = BTRFS_EXTENT_ITEM_KEY;
1405 	key.offset = num_bytes;
1406 
1407 	want = extent_ref_type(parent, owner);
1408 	if (insert) {
1409 		extra_size = btrfs_extent_inline_ref_size(want);
1410 		path->keep_locks = 1;
1411 	} else
1412 		extra_size = -1;
1413 	ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
1414 	if (ret < 0) {
1415 		err = ret;
1416 		goto out;
1417 	}
1418 	BUG_ON(ret);
1419 
1420 	leaf = path->nodes[0];
1421 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1422 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1423 	if (item_size < sizeof(*ei)) {
1424 		if (!insert) {
1425 			err = -ENOENT;
1426 			goto out;
1427 		}
1428 		ret = convert_extent_item_v0(trans, root, path, owner,
1429 					     extra_size);
1430 		if (ret < 0) {
1431 			err = ret;
1432 			goto out;
1433 		}
1434 		leaf = path->nodes[0];
1435 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1436 	}
1437 #endif
1438 	BUG_ON(item_size < sizeof(*ei));
1439 
1440 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1441 	flags = btrfs_extent_flags(leaf, ei);
1442 
1443 	ptr = (unsigned long)(ei + 1);
1444 	end = (unsigned long)ei + item_size;
1445 
1446 	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
1447 		ptr += sizeof(struct btrfs_tree_block_info);
1448 		BUG_ON(ptr > end);
1449 	} else {
1450 		BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
1451 	}
1452 
1453 	err = -ENOENT;
1454 	while (1) {
1455 		if (ptr >= end) {
1456 			WARN_ON(ptr > end);
1457 			break;
1458 		}
1459 		iref = (struct btrfs_extent_inline_ref *)ptr;
1460 		type = btrfs_extent_inline_ref_type(leaf, iref);
1461 		if (want < type)
1462 			break;
1463 		if (want > type) {
1464 			ptr += btrfs_extent_inline_ref_size(type);
1465 			continue;
1466 		}
1467 
1468 		if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1469 			struct btrfs_extent_data_ref *dref;
1470 			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1471 			if (match_extent_data_ref(leaf, dref, root_objectid,
1472 						  owner, offset)) {
1473 				err = 0;
1474 				break;
1475 			}
1476 			if (hash_extent_data_ref_item(leaf, dref) <
1477 			    hash_extent_data_ref(root_objectid, owner, offset))
1478 				break;
1479 		} else {
1480 			u64 ref_offset;
1481 			ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
1482 			if (parent > 0) {
1483 				if (parent == ref_offset) {
1484 					err = 0;
1485 					break;
1486 				}
1487 				if (ref_offset < parent)
1488 					break;
1489 			} else {
1490 				if (root_objectid == ref_offset) {
1491 					err = 0;
1492 					break;
1493 				}
1494 				if (ref_offset < root_objectid)
1495 					break;
1496 			}
1497 		}
1498 		ptr += btrfs_extent_inline_ref_size(type);
1499 	}
1500 	if (err == -ENOENT && insert) {
1501 		if (item_size + extra_size >=
1502 		    BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
1503 			err = -EAGAIN;
1504 			goto out;
1505 		}
1506 		/*
1507 		 * To add new inline back ref, we have to make sure
1508 		 * there is no corresponding back ref item.
1509 		 * For simplicity, we just do not add new inline back
1510 		 * ref if there is any kind of item for this block
1511 		 */
1512 		if (find_next_key(path, 0, &key) == 0 &&
1513 		    key.objectid == bytenr &&
1514 		    key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
1515 			err = -EAGAIN;
1516 			goto out;
1517 		}
1518 	}
1519 	*ref_ret = (struct btrfs_extent_inline_ref *)ptr;
1520 out:
1521 	if (insert) {
1522 		path->keep_locks = 0;
1523 		btrfs_unlock_up_safe(path, 1);
1524 	}
1525 	return err;
1526 }
1527 
1528 /*
1529  * helper to add new inline back ref
1530  */
1531 static noinline_for_stack
setup_inline_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_extent_inline_ref * iref,u64 parent,u64 root_objectid,u64 owner,u64 offset,int refs_to_add,struct btrfs_delayed_extent_op * extent_op)1532 int setup_inline_extent_backref(struct btrfs_trans_handle *trans,
1533 				struct btrfs_root *root,
1534 				struct btrfs_path *path,
1535 				struct btrfs_extent_inline_ref *iref,
1536 				u64 parent, u64 root_objectid,
1537 				u64 owner, u64 offset, int refs_to_add,
1538 				struct btrfs_delayed_extent_op *extent_op)
1539 {
1540 	struct extent_buffer *leaf;
1541 	struct btrfs_extent_item *ei;
1542 	unsigned long ptr;
1543 	unsigned long end;
1544 	unsigned long item_offset;
1545 	u64 refs;
1546 	int size;
1547 	int type;
1548 	int ret;
1549 
1550 	leaf = path->nodes[0];
1551 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1552 	item_offset = (unsigned long)iref - (unsigned long)ei;
1553 
1554 	type = extent_ref_type(parent, owner);
1555 	size = btrfs_extent_inline_ref_size(type);
1556 
1557 	ret = btrfs_extend_item(trans, root, path, size);
1558 	BUG_ON(ret);
1559 
1560 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1561 	refs = btrfs_extent_refs(leaf, ei);
1562 	refs += refs_to_add;
1563 	btrfs_set_extent_refs(leaf, ei, refs);
1564 	if (extent_op)
1565 		__run_delayed_extent_op(extent_op, leaf, ei);
1566 
1567 	ptr = (unsigned long)ei + item_offset;
1568 	end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]);
1569 	if (ptr < end - size)
1570 		memmove_extent_buffer(leaf, ptr + size, ptr,
1571 				      end - size - ptr);
1572 
1573 	iref = (struct btrfs_extent_inline_ref *)ptr;
1574 	btrfs_set_extent_inline_ref_type(leaf, iref, type);
1575 	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1576 		struct btrfs_extent_data_ref *dref;
1577 		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1578 		btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
1579 		btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
1580 		btrfs_set_extent_data_ref_offset(leaf, dref, offset);
1581 		btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
1582 	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1583 		struct btrfs_shared_data_ref *sref;
1584 		sref = (struct btrfs_shared_data_ref *)(iref + 1);
1585 		btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
1586 		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1587 	} else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
1588 		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
1589 	} else {
1590 		btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
1591 	}
1592 	btrfs_mark_buffer_dirty(leaf);
1593 	return 0;
1594 }
1595 
lookup_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_extent_inline_ref ** ref_ret,u64 bytenr,u64 num_bytes,u64 parent,u64 root_objectid,u64 owner,u64 offset)1596 static int lookup_extent_backref(struct btrfs_trans_handle *trans,
1597 				 struct btrfs_root *root,
1598 				 struct btrfs_path *path,
1599 				 struct btrfs_extent_inline_ref **ref_ret,
1600 				 u64 bytenr, u64 num_bytes, u64 parent,
1601 				 u64 root_objectid, u64 owner, u64 offset)
1602 {
1603 	int ret;
1604 
1605 	ret = lookup_inline_extent_backref(trans, root, path, ref_ret,
1606 					   bytenr, num_bytes, parent,
1607 					   root_objectid, owner, offset, 0);
1608 	if (ret != -ENOENT)
1609 		return ret;
1610 
1611 	btrfs_release_path(root, path);
1612 	*ref_ret = NULL;
1613 
1614 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1615 		ret = lookup_tree_block_ref(trans, root, path, bytenr, parent,
1616 					    root_objectid);
1617 	} else {
1618 		ret = lookup_extent_data_ref(trans, root, path, bytenr, parent,
1619 					     root_objectid, owner, offset);
1620 	}
1621 	return ret;
1622 }
1623 
1624 /*
1625  * helper to update/remove inline back ref
1626  */
1627 static noinline_for_stack
update_inline_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_extent_inline_ref * iref,int refs_to_mod,struct btrfs_delayed_extent_op * extent_op)1628 int update_inline_extent_backref(struct btrfs_trans_handle *trans,
1629 				 struct btrfs_root *root,
1630 				 struct btrfs_path *path,
1631 				 struct btrfs_extent_inline_ref *iref,
1632 				 int refs_to_mod,
1633 				 struct btrfs_delayed_extent_op *extent_op)
1634 {
1635 	struct extent_buffer *leaf;
1636 	struct btrfs_extent_item *ei;
1637 	struct btrfs_extent_data_ref *dref = NULL;
1638 	struct btrfs_shared_data_ref *sref = NULL;
1639 	unsigned long ptr;
1640 	unsigned long end;
1641 	u32 item_size;
1642 	int size;
1643 	int type;
1644 	int ret;
1645 	u64 refs;
1646 
1647 	leaf = path->nodes[0];
1648 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1649 	refs = btrfs_extent_refs(leaf, ei);
1650 	WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
1651 	refs += refs_to_mod;
1652 	btrfs_set_extent_refs(leaf, ei, refs);
1653 	if (extent_op)
1654 		__run_delayed_extent_op(extent_op, leaf, ei);
1655 
1656 	type = btrfs_extent_inline_ref_type(leaf, iref);
1657 
1658 	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
1659 		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
1660 		refs = btrfs_extent_data_ref_count(leaf, dref);
1661 	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
1662 		sref = (struct btrfs_shared_data_ref *)(iref + 1);
1663 		refs = btrfs_shared_data_ref_count(leaf, sref);
1664 	} else {
1665 		refs = 1;
1666 		BUG_ON(refs_to_mod != -1);
1667 	}
1668 
1669 	BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
1670 	refs += refs_to_mod;
1671 
1672 	if (refs > 0) {
1673 		if (type == BTRFS_EXTENT_DATA_REF_KEY)
1674 			btrfs_set_extent_data_ref_count(leaf, dref, refs);
1675 		else
1676 			btrfs_set_shared_data_ref_count(leaf, sref, refs);
1677 	} else {
1678 		size =  btrfs_extent_inline_ref_size(type);
1679 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1680 		ptr = (unsigned long)iref;
1681 		end = (unsigned long)ei + item_size;
1682 		if (ptr + size < end)
1683 			memmove_extent_buffer(leaf, ptr, ptr + size,
1684 					      end - ptr - size);
1685 		item_size -= size;
1686 		ret = btrfs_truncate_item(trans, root, path, item_size, 1);
1687 		BUG_ON(ret);
1688 	}
1689 	btrfs_mark_buffer_dirty(leaf);
1690 	return 0;
1691 }
1692 
1693 static noinline_for_stack
insert_inline_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,u64 bytenr,u64 num_bytes,u64 parent,u64 root_objectid,u64 owner,u64 offset,int refs_to_add,struct btrfs_delayed_extent_op * extent_op)1694 int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
1695 				 struct btrfs_root *root,
1696 				 struct btrfs_path *path,
1697 				 u64 bytenr, u64 num_bytes, u64 parent,
1698 				 u64 root_objectid, u64 owner,
1699 				 u64 offset, int refs_to_add,
1700 				 struct btrfs_delayed_extent_op *extent_op)
1701 {
1702 	struct btrfs_extent_inline_ref *iref;
1703 	int ret;
1704 
1705 	ret = lookup_inline_extent_backref(trans, root, path, &iref,
1706 					   bytenr, num_bytes, parent,
1707 					   root_objectid, owner, offset, 1);
1708 	if (ret == 0) {
1709 		BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
1710 		ret = update_inline_extent_backref(trans, root, path, iref,
1711 						   refs_to_add, extent_op);
1712 	} else if (ret == -ENOENT) {
1713 		ret = setup_inline_extent_backref(trans, root, path, iref,
1714 						  parent, root_objectid,
1715 						  owner, offset, refs_to_add,
1716 						  extent_op);
1717 	}
1718 	return ret;
1719 }
1720 
insert_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,u64 bytenr,u64 parent,u64 root_objectid,u64 owner,u64 offset,int refs_to_add)1721 static int insert_extent_backref(struct btrfs_trans_handle *trans,
1722 				 struct btrfs_root *root,
1723 				 struct btrfs_path *path,
1724 				 u64 bytenr, u64 parent, u64 root_objectid,
1725 				 u64 owner, u64 offset, int refs_to_add)
1726 {
1727 	int ret;
1728 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1729 		BUG_ON(refs_to_add != 1);
1730 		ret = insert_tree_block_ref(trans, root, path, bytenr,
1731 					    parent, root_objectid);
1732 	} else {
1733 		ret = insert_extent_data_ref(trans, root, path, bytenr,
1734 					     parent, root_objectid,
1735 					     owner, offset, refs_to_add);
1736 	}
1737 	return ret;
1738 }
1739 
remove_extent_backref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_extent_inline_ref * iref,int refs_to_drop,int is_data)1740 static int remove_extent_backref(struct btrfs_trans_handle *trans,
1741 				 struct btrfs_root *root,
1742 				 struct btrfs_path *path,
1743 				 struct btrfs_extent_inline_ref *iref,
1744 				 int refs_to_drop, int is_data)
1745 {
1746 	int ret;
1747 
1748 	BUG_ON(!is_data && refs_to_drop != 1);
1749 	if (iref) {
1750 		ret = update_inline_extent_backref(trans, root, path, iref,
1751 						   -refs_to_drop, NULL);
1752 	} else if (is_data) {
1753 		ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
1754 	} else {
1755 		ret = btrfs_del_item(trans, root, path);
1756 	}
1757 	return ret;
1758 }
1759 
btrfs_issue_discard(struct block_device * bdev,u64 start,u64 len)1760 static int btrfs_issue_discard(struct block_device *bdev,
1761 				u64 start, u64 len)
1762 {
1763 	return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
1764 }
1765 
btrfs_discard_extent(struct btrfs_root * root,u64 bytenr,u64 num_bytes,u64 * actual_bytes)1766 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
1767 				u64 num_bytes, u64 *actual_bytes)
1768 {
1769 	int ret;
1770 	u64 discarded_bytes = 0;
1771 	struct btrfs_multi_bio *multi = NULL;
1772 
1773 
1774 	/* Tell the block device(s) that the sectors can be discarded */
1775 	ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
1776 			      bytenr, &num_bytes, &multi, 0);
1777 	if (!ret) {
1778 		struct btrfs_bio_stripe *stripe = multi->stripes;
1779 		int i;
1780 
1781 
1782 		for (i = 0; i < multi->num_stripes; i++, stripe++) {
1783 			ret = btrfs_issue_discard(stripe->dev->bdev,
1784 						  stripe->physical,
1785 						  stripe->length);
1786 			if (!ret)
1787 				discarded_bytes += stripe->length;
1788 			else if (ret != -EOPNOTSUPP)
1789 				break;
1790 		}
1791 		kfree(multi);
1792 	}
1793 	if (discarded_bytes && ret == -EOPNOTSUPP)
1794 		ret = 0;
1795 
1796 	if (actual_bytes)
1797 		*actual_bytes = discarded_bytes;
1798 
1799 
1800 	return ret;
1801 }
1802 
btrfs_inc_extent_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytenr,u64 num_bytes,u64 parent,u64 root_objectid,u64 owner,u64 offset)1803 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1804 			 struct btrfs_root *root,
1805 			 u64 bytenr, u64 num_bytes, u64 parent,
1806 			 u64 root_objectid, u64 owner, u64 offset)
1807 {
1808 	int ret;
1809 	BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
1810 	       root_objectid == BTRFS_TREE_LOG_OBJECTID);
1811 
1812 	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
1813 		ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
1814 					parent, root_objectid, (int)owner,
1815 					BTRFS_ADD_DELAYED_REF, NULL);
1816 	} else {
1817 		ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
1818 					parent, root_objectid, owner, offset,
1819 					BTRFS_ADD_DELAYED_REF, NULL);
1820 	}
1821 	return ret;
1822 }
1823 
__btrfs_inc_extent_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytenr,u64 num_bytes,u64 parent,u64 root_objectid,u64 owner,u64 offset,int refs_to_add,struct btrfs_delayed_extent_op * extent_op)1824 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1825 				  struct btrfs_root *root,
1826 				  u64 bytenr, u64 num_bytes,
1827 				  u64 parent, u64 root_objectid,
1828 				  u64 owner, u64 offset, int refs_to_add,
1829 				  struct btrfs_delayed_extent_op *extent_op)
1830 {
1831 	struct btrfs_path *path;
1832 	struct extent_buffer *leaf;
1833 	struct btrfs_extent_item *item;
1834 	u64 refs;
1835 	int ret;
1836 	int err = 0;
1837 
1838 	path = btrfs_alloc_path();
1839 	if (!path)
1840 		return -ENOMEM;
1841 
1842 	path->reada = 1;
1843 	path->leave_spinning = 1;
1844 	/* this will setup the path even if it fails to insert the back ref */
1845 	ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
1846 					   path, bytenr, num_bytes, parent,
1847 					   root_objectid, owner, offset,
1848 					   refs_to_add, extent_op);
1849 	if (ret == 0)
1850 		goto out;
1851 
1852 	if (ret != -EAGAIN) {
1853 		err = ret;
1854 		goto out;
1855 	}
1856 
1857 	leaf = path->nodes[0];
1858 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1859 	refs = btrfs_extent_refs(leaf, item);
1860 	btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
1861 	if (extent_op)
1862 		__run_delayed_extent_op(extent_op, leaf, item);
1863 
1864 	btrfs_mark_buffer_dirty(leaf);
1865 	btrfs_release_path(root->fs_info->extent_root, path);
1866 
1867 	path->reada = 1;
1868 	path->leave_spinning = 1;
1869 
1870 	/* now insert the actual backref */
1871 	ret = insert_extent_backref(trans, root->fs_info->extent_root,
1872 				    path, bytenr, parent, root_objectid,
1873 				    owner, offset, refs_to_add);
1874 	BUG_ON(ret);
1875 out:
1876 	btrfs_free_path(path);
1877 	return err;
1878 }
1879 
run_delayed_data_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_delayed_ref_node * node,struct btrfs_delayed_extent_op * extent_op,int insert_reserved)1880 static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
1881 				struct btrfs_root *root,
1882 				struct btrfs_delayed_ref_node *node,
1883 				struct btrfs_delayed_extent_op *extent_op,
1884 				int insert_reserved)
1885 {
1886 	int ret = 0;
1887 	struct btrfs_delayed_data_ref *ref;
1888 	struct btrfs_key ins;
1889 	u64 parent = 0;
1890 	u64 ref_root = 0;
1891 	u64 flags = 0;
1892 
1893 	ins.objectid = node->bytenr;
1894 	ins.offset = node->num_bytes;
1895 	ins.type = BTRFS_EXTENT_ITEM_KEY;
1896 
1897 	ref = btrfs_delayed_node_to_data_ref(node);
1898 	if (node->type == BTRFS_SHARED_DATA_REF_KEY)
1899 		parent = ref->parent;
1900 	else
1901 		ref_root = ref->root;
1902 
1903 	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
1904 		if (extent_op) {
1905 			BUG_ON(extent_op->update_key);
1906 			flags |= extent_op->flags_to_set;
1907 		}
1908 		ret = alloc_reserved_file_extent(trans, root,
1909 						 parent, ref_root, flags,
1910 						 ref->objectid, ref->offset,
1911 						 &ins, node->ref_mod);
1912 	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
1913 		ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
1914 					     node->num_bytes, parent,
1915 					     ref_root, ref->objectid,
1916 					     ref->offset, node->ref_mod,
1917 					     extent_op);
1918 	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
1919 		ret = __btrfs_free_extent(trans, root, node->bytenr,
1920 					  node->num_bytes, parent,
1921 					  ref_root, ref->objectid,
1922 					  ref->offset, node->ref_mod,
1923 					  extent_op);
1924 	} else {
1925 		BUG();
1926 	}
1927 	return ret;
1928 }
1929 
__run_delayed_extent_op(struct btrfs_delayed_extent_op * extent_op,struct extent_buffer * leaf,struct btrfs_extent_item * ei)1930 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
1931 				    struct extent_buffer *leaf,
1932 				    struct btrfs_extent_item *ei)
1933 {
1934 	u64 flags = btrfs_extent_flags(leaf, ei);
1935 	if (extent_op->update_flags) {
1936 		flags |= extent_op->flags_to_set;
1937 		btrfs_set_extent_flags(leaf, ei, flags);
1938 	}
1939 
1940 	if (extent_op->update_key) {
1941 		struct btrfs_tree_block_info *bi;
1942 		BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
1943 		bi = (struct btrfs_tree_block_info *)(ei + 1);
1944 		btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
1945 	}
1946 }
1947 
run_delayed_extent_op(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_delayed_ref_node * node,struct btrfs_delayed_extent_op * extent_op)1948 static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
1949 				 struct btrfs_root *root,
1950 				 struct btrfs_delayed_ref_node *node,
1951 				 struct btrfs_delayed_extent_op *extent_op)
1952 {
1953 	struct btrfs_key key;
1954 	struct btrfs_path *path;
1955 	struct btrfs_extent_item *ei;
1956 	struct extent_buffer *leaf;
1957 	u32 item_size;
1958 	int ret;
1959 	int err = 0;
1960 
1961 	path = btrfs_alloc_path();
1962 	if (!path)
1963 		return -ENOMEM;
1964 
1965 	key.objectid = node->bytenr;
1966 	key.type = BTRFS_EXTENT_ITEM_KEY;
1967 	key.offset = node->num_bytes;
1968 
1969 	path->reada = 1;
1970 	path->leave_spinning = 1;
1971 	ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
1972 				path, 0, 1);
1973 	if (ret < 0) {
1974 		err = ret;
1975 		goto out;
1976 	}
1977 	if (ret > 0) {
1978 		err = -EIO;
1979 		goto out;
1980 	}
1981 
1982 	leaf = path->nodes[0];
1983 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1984 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
1985 	if (item_size < sizeof(*ei)) {
1986 		ret = convert_extent_item_v0(trans, root->fs_info->extent_root,
1987 					     path, (u64)-1, 0);
1988 		if (ret < 0) {
1989 			err = ret;
1990 			goto out;
1991 		}
1992 		leaf = path->nodes[0];
1993 		item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1994 	}
1995 #endif
1996 	BUG_ON(item_size < sizeof(*ei));
1997 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
1998 	__run_delayed_extent_op(extent_op, leaf, ei);
1999 
2000 	btrfs_mark_buffer_dirty(leaf);
2001 out:
2002 	btrfs_free_path(path);
2003 	return err;
2004 }
2005 
run_delayed_tree_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_delayed_ref_node * node,struct btrfs_delayed_extent_op * extent_op,int insert_reserved)2006 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
2007 				struct btrfs_root *root,
2008 				struct btrfs_delayed_ref_node *node,
2009 				struct btrfs_delayed_extent_op *extent_op,
2010 				int insert_reserved)
2011 {
2012 	int ret = 0;
2013 	struct btrfs_delayed_tree_ref *ref;
2014 	struct btrfs_key ins;
2015 	u64 parent = 0;
2016 	u64 ref_root = 0;
2017 
2018 	ins.objectid = node->bytenr;
2019 	ins.offset = node->num_bytes;
2020 	ins.type = BTRFS_EXTENT_ITEM_KEY;
2021 
2022 	ref = btrfs_delayed_node_to_tree_ref(node);
2023 	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2024 		parent = ref->parent;
2025 	else
2026 		ref_root = ref->root;
2027 
2028 	BUG_ON(node->ref_mod != 1);
2029 	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
2030 		BUG_ON(!extent_op || !extent_op->update_flags ||
2031 		       !extent_op->update_key);
2032 		ret = alloc_reserved_tree_block(trans, root,
2033 						parent, ref_root,
2034 						extent_op->flags_to_set,
2035 						&extent_op->key,
2036 						ref->level, &ins);
2037 	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
2038 		ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
2039 					     node->num_bytes, parent, ref_root,
2040 					     ref->level, 0, 1, extent_op);
2041 	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
2042 		ret = __btrfs_free_extent(trans, root, node->bytenr,
2043 					  node->num_bytes, parent, ref_root,
2044 					  ref->level, 0, 1, extent_op);
2045 	} else {
2046 		BUG();
2047 	}
2048 	return ret;
2049 }
2050 
2051 /* helper function to actually process a single delayed ref entry */
run_one_delayed_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_delayed_ref_node * node,struct btrfs_delayed_extent_op * extent_op,int insert_reserved)2052 static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2053 			       struct btrfs_root *root,
2054 			       struct btrfs_delayed_ref_node *node,
2055 			       struct btrfs_delayed_extent_op *extent_op,
2056 			       int insert_reserved)
2057 {
2058 	int ret;
2059 	if (btrfs_delayed_ref_is_head(node)) {
2060 		struct btrfs_delayed_ref_head *head;
2061 		/*
2062 		 * we've hit the end of the chain and we were supposed
2063 		 * to insert this extent into the tree.  But, it got
2064 		 * deleted before we ever needed to insert it, so all
2065 		 * we have to do is clean up the accounting
2066 		 */
2067 		BUG_ON(extent_op);
2068 		head = btrfs_delayed_node_to_head(node);
2069 		if (insert_reserved) {
2070 			btrfs_pin_extent(root, node->bytenr,
2071 					 node->num_bytes, 1);
2072 			if (head->is_data) {
2073 				ret = btrfs_del_csums(trans, root,
2074 						      node->bytenr,
2075 						      node->num_bytes);
2076 				BUG_ON(ret);
2077 			}
2078 		}
2079 		mutex_unlock(&head->mutex);
2080 		return 0;
2081 	}
2082 
2083 	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
2084 	    node->type == BTRFS_SHARED_BLOCK_REF_KEY)
2085 		ret = run_delayed_tree_ref(trans, root, node, extent_op,
2086 					   insert_reserved);
2087 	else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
2088 		 node->type == BTRFS_SHARED_DATA_REF_KEY)
2089 		ret = run_delayed_data_ref(trans, root, node, extent_op,
2090 					   insert_reserved);
2091 	else
2092 		BUG();
2093 	return ret;
2094 }
2095 
2096 static noinline struct btrfs_delayed_ref_node *
select_delayed_ref(struct btrfs_delayed_ref_head * head)2097 select_delayed_ref(struct btrfs_delayed_ref_head *head)
2098 {
2099 	struct rb_node *node;
2100 	struct btrfs_delayed_ref_node *ref;
2101 	int action = BTRFS_ADD_DELAYED_REF;
2102 again:
2103 	/*
2104 	 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
2105 	 * this prevents ref count from going down to zero when
2106 	 * there still are pending delayed ref.
2107 	 */
2108 	node = rb_prev(&head->node.rb_node);
2109 	while (1) {
2110 		if (!node)
2111 			break;
2112 		ref = rb_entry(node, struct btrfs_delayed_ref_node,
2113 				rb_node);
2114 		if (ref->bytenr != head->node.bytenr)
2115 			break;
2116 		if (ref->action == action)
2117 			return ref;
2118 		node = rb_prev(node);
2119 	}
2120 	if (action == BTRFS_ADD_DELAYED_REF) {
2121 		action = BTRFS_DROP_DELAYED_REF;
2122 		goto again;
2123 	}
2124 	return NULL;
2125 }
2126 
run_clustered_refs(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct list_head * cluster)2127 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2128 				       struct btrfs_root *root,
2129 				       struct list_head *cluster)
2130 {
2131 	struct btrfs_delayed_ref_root *delayed_refs;
2132 	struct btrfs_delayed_ref_node *ref;
2133 	struct btrfs_delayed_ref_head *locked_ref = NULL;
2134 	struct btrfs_delayed_extent_op *extent_op;
2135 	int ret;
2136 	int count = 0;
2137 	int must_insert_reserved = 0;
2138 
2139 	delayed_refs = &trans->transaction->delayed_refs;
2140 	while (1) {
2141 		if (!locked_ref) {
2142 			/* pick a new head ref from the cluster list */
2143 			if (list_empty(cluster))
2144 				break;
2145 
2146 			locked_ref = list_entry(cluster->next,
2147 				     struct btrfs_delayed_ref_head, cluster);
2148 
2149 			/* grab the lock that says we are going to process
2150 			 * all the refs for this head */
2151 			ret = btrfs_delayed_ref_lock(trans, locked_ref);
2152 
2153 			/*
2154 			 * we may have dropped the spin lock to get the head
2155 			 * mutex lock, and that might have given someone else
2156 			 * time to free the head.  If that's true, it has been
2157 			 * removed from our list and we can move on.
2158 			 */
2159 			if (ret == -EAGAIN) {
2160 				locked_ref = NULL;
2161 				count++;
2162 				continue;
2163 			}
2164 		}
2165 
2166 		/*
2167 		 * record the must insert reserved flag before we
2168 		 * drop the spin lock.
2169 		 */
2170 		must_insert_reserved = locked_ref->must_insert_reserved;
2171 		locked_ref->must_insert_reserved = 0;
2172 
2173 		extent_op = locked_ref->extent_op;
2174 		locked_ref->extent_op = NULL;
2175 
2176 		/*
2177 		 * locked_ref is the head node, so we have to go one
2178 		 * node back for any delayed ref updates
2179 		 */
2180 		ref = select_delayed_ref(locked_ref);
2181 		if (!ref) {
2182 			/* All delayed refs have been processed, Go ahead
2183 			 * and send the head node to run_one_delayed_ref,
2184 			 * so that any accounting fixes can happen
2185 			 */
2186 			ref = &locked_ref->node;
2187 
2188 			if (extent_op && must_insert_reserved) {
2189 				kfree(extent_op);
2190 				extent_op = NULL;
2191 			}
2192 
2193 			if (extent_op) {
2194 				spin_unlock(&delayed_refs->lock);
2195 
2196 				ret = run_delayed_extent_op(trans, root,
2197 							    ref, extent_op);
2198 				BUG_ON(ret);
2199 				kfree(extent_op);
2200 
2201 				cond_resched();
2202 				spin_lock(&delayed_refs->lock);
2203 				continue;
2204 			}
2205 
2206 			list_del_init(&locked_ref->cluster);
2207 			locked_ref = NULL;
2208 		}
2209 
2210 		ref->in_tree = 0;
2211 		rb_erase(&ref->rb_node, &delayed_refs->root);
2212 		delayed_refs->num_entries--;
2213 
2214 		spin_unlock(&delayed_refs->lock);
2215 
2216 		ret = run_one_delayed_ref(trans, root, ref, extent_op,
2217 					  must_insert_reserved);
2218 		BUG_ON(ret);
2219 
2220 		btrfs_put_delayed_ref(ref);
2221 		kfree(extent_op);
2222 		count++;
2223 
2224 		cond_resched();
2225 		spin_lock(&delayed_refs->lock);
2226 	}
2227 	return count;
2228 }
2229 
2230 /*
2231  * this starts processing the delayed reference count updates and
2232  * extent insertions we have queued up so far.  count can be
2233  * 0, which means to process everything in the tree at the start
2234  * of the run (but not newly added entries), or it can be some target
2235  * number you'd like to process.
2236  */
btrfs_run_delayed_refs(struct btrfs_trans_handle * trans,struct btrfs_root * root,unsigned long count)2237 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
2238 			   struct btrfs_root *root, unsigned long count)
2239 {
2240 	struct rb_node *node;
2241 	struct btrfs_delayed_ref_root *delayed_refs;
2242 	struct btrfs_delayed_ref_node *ref;
2243 	struct list_head cluster;
2244 	int ret;
2245 	int run_all = count == (unsigned long)-1;
2246 	int run_most = 0;
2247 
2248 	if (root == root->fs_info->extent_root)
2249 		root = root->fs_info->tree_root;
2250 
2251 	delayed_refs = &trans->transaction->delayed_refs;
2252 	INIT_LIST_HEAD(&cluster);
2253 again:
2254 	spin_lock(&delayed_refs->lock);
2255 	if (count == 0) {
2256 		count = delayed_refs->num_entries * 2;
2257 		run_most = 1;
2258 	}
2259 	while (1) {
2260 		if (!(run_all || run_most) &&
2261 		    delayed_refs->num_heads_ready < 64)
2262 			break;
2263 
2264 		/*
2265 		 * go find something we can process in the rbtree.  We start at
2266 		 * the beginning of the tree, and then build a cluster
2267 		 * of refs to process starting at the first one we are able to
2268 		 * lock
2269 		 */
2270 		ret = btrfs_find_ref_cluster(trans, &cluster,
2271 					     delayed_refs->run_delayed_start);
2272 		if (ret)
2273 			break;
2274 
2275 		ret = run_clustered_refs(trans, root, &cluster);
2276 		BUG_ON(ret < 0);
2277 
2278 		count -= min_t(unsigned long, ret, count);
2279 
2280 		if (count == 0)
2281 			break;
2282 	}
2283 
2284 	if (run_all) {
2285 		node = rb_first(&delayed_refs->root);
2286 		if (!node)
2287 			goto out;
2288 		count = (unsigned long)-1;
2289 
2290 		while (node) {
2291 			ref = rb_entry(node, struct btrfs_delayed_ref_node,
2292 				       rb_node);
2293 			if (btrfs_delayed_ref_is_head(ref)) {
2294 				struct btrfs_delayed_ref_head *head;
2295 
2296 				head = btrfs_delayed_node_to_head(ref);
2297 				atomic_inc(&ref->refs);
2298 
2299 				spin_unlock(&delayed_refs->lock);
2300 				mutex_lock(&head->mutex);
2301 				mutex_unlock(&head->mutex);
2302 
2303 				btrfs_put_delayed_ref(ref);
2304 				cond_resched();
2305 				goto again;
2306 			}
2307 			node = rb_next(node);
2308 		}
2309 		spin_unlock(&delayed_refs->lock);
2310 		schedule_timeout(1);
2311 		goto again;
2312 	}
2313 out:
2314 	spin_unlock(&delayed_refs->lock);
2315 	return 0;
2316 }
2317 
btrfs_set_disk_extent_flags(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytenr,u64 num_bytes,u64 flags,int is_data)2318 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2319 				struct btrfs_root *root,
2320 				u64 bytenr, u64 num_bytes, u64 flags,
2321 				int is_data)
2322 {
2323 	struct btrfs_delayed_extent_op *extent_op;
2324 	int ret;
2325 
2326 	extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2327 	if (!extent_op)
2328 		return -ENOMEM;
2329 
2330 	extent_op->flags_to_set = flags;
2331 	extent_op->update_flags = 1;
2332 	extent_op->update_key = 0;
2333 	extent_op->is_data = is_data ? 1 : 0;
2334 
2335 	ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
2336 	if (ret)
2337 		kfree(extent_op);
2338 	return ret;
2339 }
2340 
check_delayed_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,u64 objectid,u64 offset,u64 bytenr)2341 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
2342 				      struct btrfs_root *root,
2343 				      struct btrfs_path *path,
2344 				      u64 objectid, u64 offset, u64 bytenr)
2345 {
2346 	struct btrfs_delayed_ref_head *head;
2347 	struct btrfs_delayed_ref_node *ref;
2348 	struct btrfs_delayed_data_ref *data_ref;
2349 	struct btrfs_delayed_ref_root *delayed_refs;
2350 	struct rb_node *node;
2351 	int ret = 0;
2352 
2353 	ret = -ENOENT;
2354 	delayed_refs = &trans->transaction->delayed_refs;
2355 	spin_lock(&delayed_refs->lock);
2356 	head = btrfs_find_delayed_ref_head(trans, bytenr);
2357 	if (!head)
2358 		goto out;
2359 
2360 	if (!mutex_trylock(&head->mutex)) {
2361 		atomic_inc(&head->node.refs);
2362 		spin_unlock(&delayed_refs->lock);
2363 
2364 		btrfs_release_path(root->fs_info->extent_root, path);
2365 
2366 		mutex_lock(&head->mutex);
2367 		mutex_unlock(&head->mutex);
2368 		btrfs_put_delayed_ref(&head->node);
2369 		return -EAGAIN;
2370 	}
2371 
2372 	node = rb_prev(&head->node.rb_node);
2373 	if (!node)
2374 		goto out_unlock;
2375 
2376 	ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2377 
2378 	if (ref->bytenr != bytenr)
2379 		goto out_unlock;
2380 
2381 	ret = 1;
2382 	if (ref->type != BTRFS_EXTENT_DATA_REF_KEY)
2383 		goto out_unlock;
2384 
2385 	data_ref = btrfs_delayed_node_to_data_ref(ref);
2386 
2387 	node = rb_prev(node);
2388 	if (node) {
2389 		ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2390 		if (ref->bytenr == bytenr)
2391 			goto out_unlock;
2392 	}
2393 
2394 	if (data_ref->root != root->root_key.objectid ||
2395 	    data_ref->objectid != objectid || data_ref->offset != offset)
2396 		goto out_unlock;
2397 
2398 	ret = 0;
2399 out_unlock:
2400 	mutex_unlock(&head->mutex);
2401 out:
2402 	spin_unlock(&delayed_refs->lock);
2403 	return ret;
2404 }
2405 
check_committed_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,u64 objectid,u64 offset,u64 bytenr)2406 static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
2407 					struct btrfs_root *root,
2408 					struct btrfs_path *path,
2409 					u64 objectid, u64 offset, u64 bytenr)
2410 {
2411 	struct btrfs_root *extent_root = root->fs_info->extent_root;
2412 	struct extent_buffer *leaf;
2413 	struct btrfs_extent_data_ref *ref;
2414 	struct btrfs_extent_inline_ref *iref;
2415 	struct btrfs_extent_item *ei;
2416 	struct btrfs_key key;
2417 	u32 item_size;
2418 	int ret;
2419 
2420 	key.objectid = bytenr;
2421 	key.offset = (u64)-1;
2422 	key.type = BTRFS_EXTENT_ITEM_KEY;
2423 
2424 	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2425 	if (ret < 0)
2426 		goto out;
2427 	BUG_ON(ret == 0);
2428 
2429 	ret = -ENOENT;
2430 	if (path->slots[0] == 0)
2431 		goto out;
2432 
2433 	path->slots[0]--;
2434 	leaf = path->nodes[0];
2435 	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2436 
2437 	if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
2438 		goto out;
2439 
2440 	ret = 1;
2441 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2442 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
2443 	if (item_size < sizeof(*ei)) {
2444 		WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
2445 		goto out;
2446 	}
2447 #endif
2448 	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
2449 
2450 	if (item_size != sizeof(*ei) +
2451 	    btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
2452 		goto out;
2453 
2454 	if (btrfs_extent_generation(leaf, ei) <=
2455 	    btrfs_root_last_snapshot(&root->root_item))
2456 		goto out;
2457 
2458 	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
2459 	if (btrfs_extent_inline_ref_type(leaf, iref) !=
2460 	    BTRFS_EXTENT_DATA_REF_KEY)
2461 		goto out;
2462 
2463 	ref = (struct btrfs_extent_data_ref *)(&iref->offset);
2464 	if (btrfs_extent_refs(leaf, ei) !=
2465 	    btrfs_extent_data_ref_count(leaf, ref) ||
2466 	    btrfs_extent_data_ref_root(leaf, ref) !=
2467 	    root->root_key.objectid ||
2468 	    btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
2469 	    btrfs_extent_data_ref_offset(leaf, ref) != offset)
2470 		goto out;
2471 
2472 	ret = 0;
2473 out:
2474 	return ret;
2475 }
2476 
btrfs_cross_ref_exist(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 objectid,u64 offset,u64 bytenr)2477 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2478 			  struct btrfs_root *root,
2479 			  u64 objectid, u64 offset, u64 bytenr)
2480 {
2481 	struct btrfs_path *path;
2482 	int ret;
2483 	int ret2;
2484 
2485 	path = btrfs_alloc_path();
2486 	if (!path)
2487 		return -ENOENT;
2488 
2489 	do {
2490 		ret = check_committed_ref(trans, root, path, objectid,
2491 					  offset, bytenr);
2492 		if (ret && ret != -ENOENT)
2493 			goto out;
2494 
2495 		ret2 = check_delayed_ref(trans, root, path, objectid,
2496 					 offset, bytenr);
2497 	} while (ret2 == -EAGAIN);
2498 
2499 	if (ret2 && ret2 != -ENOENT) {
2500 		ret = ret2;
2501 		goto out;
2502 	}
2503 
2504 	if (ret != -ENOENT || ret2 != -ENOENT)
2505 		ret = 0;
2506 out:
2507 	btrfs_free_path(path);
2508 	if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
2509 		WARN_ON(ret > 0);
2510 	return ret;
2511 }
2512 
2513 #if 0
2514 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2515 		    struct extent_buffer *buf, u32 nr_extents)
2516 {
2517 	struct btrfs_key key;
2518 	struct btrfs_file_extent_item *fi;
2519 	u64 root_gen;
2520 	u32 nritems;
2521 	int i;
2522 	int level;
2523 	int ret = 0;
2524 	int shared = 0;
2525 
2526 	if (!root->ref_cows)
2527 		return 0;
2528 
2529 	if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
2530 		shared = 0;
2531 		root_gen = root->root_key.offset;
2532 	} else {
2533 		shared = 1;
2534 		root_gen = trans->transid - 1;
2535 	}
2536 
2537 	level = btrfs_header_level(buf);
2538 	nritems = btrfs_header_nritems(buf);
2539 
2540 	if (level == 0) {
2541 		struct btrfs_leaf_ref *ref;
2542 		struct btrfs_extent_info *info;
2543 
2544 		ref = btrfs_alloc_leaf_ref(root, nr_extents);
2545 		if (!ref) {
2546 			ret = -ENOMEM;
2547 			goto out;
2548 		}
2549 
2550 		ref->root_gen = root_gen;
2551 		ref->bytenr = buf->start;
2552 		ref->owner = btrfs_header_owner(buf);
2553 		ref->generation = btrfs_header_generation(buf);
2554 		ref->nritems = nr_extents;
2555 		info = ref->extents;
2556 
2557 		for (i = 0; nr_extents > 0 && i < nritems; i++) {
2558 			u64 disk_bytenr;
2559 			btrfs_item_key_to_cpu(buf, &key, i);
2560 			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2561 				continue;
2562 			fi = btrfs_item_ptr(buf, i,
2563 					    struct btrfs_file_extent_item);
2564 			if (btrfs_file_extent_type(buf, fi) ==
2565 			    BTRFS_FILE_EXTENT_INLINE)
2566 				continue;
2567 			disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2568 			if (disk_bytenr == 0)
2569 				continue;
2570 
2571 			info->bytenr = disk_bytenr;
2572 			info->num_bytes =
2573 				btrfs_file_extent_disk_num_bytes(buf, fi);
2574 			info->objectid = key.objectid;
2575 			info->offset = key.offset;
2576 			info++;
2577 		}
2578 
2579 		ret = btrfs_add_leaf_ref(root, ref, shared);
2580 		if (ret == -EEXIST && shared) {
2581 			struct btrfs_leaf_ref *old;
2582 			old = btrfs_lookup_leaf_ref(root, ref->bytenr);
2583 			BUG_ON(!old);
2584 			btrfs_remove_leaf_ref(root, old);
2585 			btrfs_free_leaf_ref(root, old);
2586 			ret = btrfs_add_leaf_ref(root, ref, shared);
2587 		}
2588 		WARN_ON(ret);
2589 		btrfs_free_leaf_ref(root, ref);
2590 	}
2591 out:
2592 	return ret;
2593 }
2594 
2595 /* when a block goes through cow, we update the reference counts of
2596  * everything that block points to.  The internal pointers of the block
2597  * can be in just about any order, and it is likely to have clusters of
2598  * things that are close together and clusters of things that are not.
2599  *
2600  * To help reduce the seeks that come with updating all of these reference
2601  * counts, sort them by byte number before actual updates are done.
2602  *
2603  * struct refsort is used to match byte number to slot in the btree block.
2604  * we sort based on the byte number and then use the slot to actually
2605  * find the item.
2606  *
2607  * struct refsort is smaller than strcut btrfs_item and smaller than
2608  * struct btrfs_key_ptr.  Since we're currently limited to the page size
2609  * for a btree block, there's no way for a kmalloc of refsorts for a
2610  * single node to be bigger than a page.
2611  */
2612 struct refsort {
2613 	u64 bytenr;
2614 	u32 slot;
2615 };
2616 
2617 /*
2618  * for passing into sort()
2619  */
2620 static int refsort_cmp(const void *a_void, const void *b_void)
2621 {
2622 	const struct refsort *a = a_void;
2623 	const struct refsort *b = b_void;
2624 
2625 	if (a->bytenr < b->bytenr)
2626 		return -1;
2627 	if (a->bytenr > b->bytenr)
2628 		return 1;
2629 	return 0;
2630 }
2631 #endif
2632 
__btrfs_mod_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * buf,int full_backref,int inc)2633 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
2634 			   struct btrfs_root *root,
2635 			   struct extent_buffer *buf,
2636 			   int full_backref, int inc)
2637 {
2638 	u64 bytenr;
2639 	u64 num_bytes;
2640 	u64 parent;
2641 	u64 ref_root;
2642 	u32 nritems;
2643 	struct btrfs_key key;
2644 	struct btrfs_file_extent_item *fi;
2645 	int i;
2646 	int level;
2647 	int ret = 0;
2648 	int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
2649 			    u64, u64, u64, u64, u64, u64);
2650 
2651 	ref_root = btrfs_header_owner(buf);
2652 	nritems = btrfs_header_nritems(buf);
2653 	level = btrfs_header_level(buf);
2654 
2655 	if (!root->ref_cows && level == 0)
2656 		return 0;
2657 
2658 	if (inc)
2659 		process_func = btrfs_inc_extent_ref;
2660 	else
2661 		process_func = btrfs_free_extent;
2662 
2663 	if (full_backref)
2664 		parent = buf->start;
2665 	else
2666 		parent = 0;
2667 
2668 	for (i = 0; i < nritems; i++) {
2669 		if (level == 0) {
2670 			btrfs_item_key_to_cpu(buf, &key, i);
2671 			if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2672 				continue;
2673 			fi = btrfs_item_ptr(buf, i,
2674 					    struct btrfs_file_extent_item);
2675 			if (btrfs_file_extent_type(buf, fi) ==
2676 			    BTRFS_FILE_EXTENT_INLINE)
2677 				continue;
2678 			bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
2679 			if (bytenr == 0)
2680 				continue;
2681 
2682 			num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
2683 			key.offset -= btrfs_file_extent_offset(buf, fi);
2684 			ret = process_func(trans, root, bytenr, num_bytes,
2685 					   parent, ref_root, key.objectid,
2686 					   key.offset);
2687 			if (ret)
2688 				goto fail;
2689 		} else {
2690 			bytenr = btrfs_node_blockptr(buf, i);
2691 			num_bytes = btrfs_level_size(root, level - 1);
2692 			ret = process_func(trans, root, bytenr, num_bytes,
2693 					   parent, ref_root, level - 1, 0);
2694 			if (ret)
2695 				goto fail;
2696 		}
2697 	}
2698 	return 0;
2699 fail:
2700 	BUG();
2701 	return ret;
2702 }
2703 
btrfs_inc_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * buf,int full_backref)2704 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2705 		  struct extent_buffer *buf, int full_backref)
2706 {
2707 	return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
2708 }
2709 
btrfs_dec_ref(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * buf,int full_backref)2710 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2711 		  struct extent_buffer *buf, int full_backref)
2712 {
2713 	return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
2714 }
2715 
write_one_cache_group(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct btrfs_block_group_cache * cache)2716 static int write_one_cache_group(struct btrfs_trans_handle *trans,
2717 				 struct btrfs_root *root,
2718 				 struct btrfs_path *path,
2719 				 struct btrfs_block_group_cache *cache)
2720 {
2721 	int ret;
2722 	struct btrfs_root *extent_root = root->fs_info->extent_root;
2723 	unsigned long bi;
2724 	struct extent_buffer *leaf;
2725 
2726 	ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
2727 	if (ret < 0)
2728 		goto fail;
2729 	BUG_ON(ret);
2730 
2731 	leaf = path->nodes[0];
2732 	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
2733 	write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
2734 	btrfs_mark_buffer_dirty(leaf);
2735 	btrfs_release_path(extent_root, path);
2736 fail:
2737 	if (ret)
2738 		return ret;
2739 	return 0;
2740 
2741 }
2742 
2743 static struct btrfs_block_group_cache *
next_block_group(struct btrfs_root * root,struct btrfs_block_group_cache * cache)2744 next_block_group(struct btrfs_root *root,
2745 		 struct btrfs_block_group_cache *cache)
2746 {
2747 	struct rb_node *node;
2748 	spin_lock(&root->fs_info->block_group_cache_lock);
2749 	node = rb_next(&cache->cache_node);
2750 	btrfs_put_block_group(cache);
2751 	if (node) {
2752 		cache = rb_entry(node, struct btrfs_block_group_cache,
2753 				 cache_node);
2754 		btrfs_get_block_group(cache);
2755 	} else
2756 		cache = NULL;
2757 	spin_unlock(&root->fs_info->block_group_cache_lock);
2758 	return cache;
2759 }
2760 
cache_save_setup(struct btrfs_block_group_cache * block_group,struct btrfs_trans_handle * trans,struct btrfs_path * path)2761 static int cache_save_setup(struct btrfs_block_group_cache *block_group,
2762 			    struct btrfs_trans_handle *trans,
2763 			    struct btrfs_path *path)
2764 {
2765 	struct btrfs_root *root = block_group->fs_info->tree_root;
2766 	struct inode *inode = NULL;
2767 	u64 alloc_hint = 0;
2768 	int dcs = BTRFS_DC_ERROR;
2769 	int num_pages = 0;
2770 	int retries = 0;
2771 	int ret = 0;
2772 
2773 	/*
2774 	 * If this block group is smaller than 100 megs don't bother caching the
2775 	 * block group.
2776 	 */
2777 	if (block_group->key.offset < (100 * 1024 * 1024)) {
2778 		spin_lock(&block_group->lock);
2779 		block_group->disk_cache_state = BTRFS_DC_WRITTEN;
2780 		spin_unlock(&block_group->lock);
2781 		return 0;
2782 	}
2783 
2784 again:
2785 	inode = lookup_free_space_inode(root, block_group, path);
2786 	if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2787 		ret = PTR_ERR(inode);
2788 		btrfs_release_path(root, path);
2789 		goto out;
2790 	}
2791 
2792 	if (IS_ERR(inode)) {
2793 		BUG_ON(retries);
2794 		retries++;
2795 
2796 		if (block_group->ro)
2797 			goto out_free;
2798 
2799 		ret = create_free_space_inode(root, trans, block_group, path);
2800 		if (ret)
2801 			goto out_free;
2802 		goto again;
2803 	}
2804 
2805 	/*
2806 	 * We want to set the generation to 0, that way if anything goes wrong
2807 	 * from here on out we know not to trust this cache when we load up next
2808 	 * time.
2809 	 */
2810 	BTRFS_I(inode)->generation = 0;
2811 	ret = btrfs_update_inode(trans, root, inode);
2812 	WARN_ON(ret);
2813 
2814 	if (i_size_read(inode) > 0) {
2815 		ret = btrfs_truncate_free_space_cache(root, trans, path,
2816 						      inode);
2817 		if (ret)
2818 			goto out_put;
2819 	}
2820 
2821 	spin_lock(&block_group->lock);
2822 	if (block_group->cached != BTRFS_CACHE_FINISHED) {
2823 		/* We're not cached, don't bother trying to write stuff out */
2824 		dcs = BTRFS_DC_WRITTEN;
2825 		spin_unlock(&block_group->lock);
2826 		goto out_put;
2827 	}
2828 	spin_unlock(&block_group->lock);
2829 
2830 	num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
2831 	if (!num_pages)
2832 		num_pages = 1;
2833 
2834 	/*
2835 	 * Just to make absolutely sure we have enough space, we're going to
2836 	 * preallocate 12 pages worth of space for each block group.  In
2837 	 * practice we ought to use at most 8, but we need extra space so we can
2838 	 * add our header and have a terminator between the extents and the
2839 	 * bitmaps.
2840 	 */
2841 	num_pages *= 16;
2842 	num_pages *= PAGE_CACHE_SIZE;
2843 
2844 	ret = btrfs_check_data_free_space(inode, num_pages);
2845 	if (ret)
2846 		goto out_put;
2847 
2848 	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
2849 					      num_pages, num_pages,
2850 					      &alloc_hint);
2851 	if (!ret)
2852 		dcs = BTRFS_DC_SETUP;
2853 	btrfs_free_reserved_data_space(inode, num_pages);
2854 out_put:
2855 	iput(inode);
2856 out_free:
2857 	btrfs_release_path(root, path);
2858 out:
2859 	spin_lock(&block_group->lock);
2860 	block_group->disk_cache_state = dcs;
2861 	spin_unlock(&block_group->lock);
2862 
2863 	return ret;
2864 }
2865 
btrfs_write_dirty_block_groups(struct btrfs_trans_handle * trans,struct btrfs_root * root)2866 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2867 				   struct btrfs_root *root)
2868 {
2869 	struct btrfs_block_group_cache *cache;
2870 	int err = 0;
2871 	struct btrfs_path *path;
2872 	u64 last = 0;
2873 
2874 	path = btrfs_alloc_path();
2875 	if (!path)
2876 		return -ENOMEM;
2877 
2878 again:
2879 	while (1) {
2880 		cache = btrfs_lookup_first_block_group(root->fs_info, last);
2881 		while (cache) {
2882 			if (cache->disk_cache_state == BTRFS_DC_CLEAR)
2883 				break;
2884 			cache = next_block_group(root, cache);
2885 		}
2886 		if (!cache) {
2887 			if (last == 0)
2888 				break;
2889 			last = 0;
2890 			continue;
2891 		}
2892 		err = cache_save_setup(cache, trans, path);
2893 		last = cache->key.objectid + cache->key.offset;
2894 		btrfs_put_block_group(cache);
2895 	}
2896 
2897 	while (1) {
2898 		if (last == 0) {
2899 			err = btrfs_run_delayed_refs(trans, root,
2900 						     (unsigned long)-1);
2901 			BUG_ON(err);
2902 		}
2903 
2904 		cache = btrfs_lookup_first_block_group(root->fs_info, last);
2905 		while (cache) {
2906 			if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
2907 				btrfs_put_block_group(cache);
2908 				goto again;
2909 			}
2910 
2911 			if (cache->dirty)
2912 				break;
2913 			cache = next_block_group(root, cache);
2914 		}
2915 		if (!cache) {
2916 			if (last == 0)
2917 				break;
2918 			last = 0;
2919 			continue;
2920 		}
2921 
2922 		if (cache->disk_cache_state == BTRFS_DC_SETUP)
2923 			cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
2924 		cache->dirty = 0;
2925 		last = cache->key.objectid + cache->key.offset;
2926 
2927 		err = write_one_cache_group(trans, root, path, cache);
2928 		BUG_ON(err);
2929 		btrfs_put_block_group(cache);
2930 	}
2931 
2932 	while (1) {
2933 		/*
2934 		 * I don't think this is needed since we're just marking our
2935 		 * preallocated extent as written, but just in case it can't
2936 		 * hurt.
2937 		 */
2938 		if (last == 0) {
2939 			err = btrfs_run_delayed_refs(trans, root,
2940 						     (unsigned long)-1);
2941 			BUG_ON(err);
2942 		}
2943 
2944 		cache = btrfs_lookup_first_block_group(root->fs_info, last);
2945 		while (cache) {
2946 			/*
2947 			 * Really this shouldn't happen, but it could if we
2948 			 * couldn't write the entire preallocated extent and
2949 			 * splitting the extent resulted in a new block.
2950 			 */
2951 			if (cache->dirty) {
2952 				btrfs_put_block_group(cache);
2953 				goto again;
2954 			}
2955 			if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2956 				break;
2957 			cache = next_block_group(root, cache);
2958 		}
2959 		if (!cache) {
2960 			if (last == 0)
2961 				break;
2962 			last = 0;
2963 			continue;
2964 		}
2965 
2966 		btrfs_write_out_cache(root, trans, cache, path);
2967 
2968 		/*
2969 		 * If we didn't have an error then the cache state is still
2970 		 * NEED_WRITE, so we can set it to WRITTEN.
2971 		 */
2972 		if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2973 			cache->disk_cache_state = BTRFS_DC_WRITTEN;
2974 		last = cache->key.objectid + cache->key.offset;
2975 		btrfs_put_block_group(cache);
2976 	}
2977 
2978 	btrfs_free_path(path);
2979 	return 0;
2980 }
2981 
btrfs_extent_readonly(struct btrfs_root * root,u64 bytenr)2982 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
2983 {
2984 	struct btrfs_block_group_cache *block_group;
2985 	int readonly = 0;
2986 
2987 	block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
2988 	if (!block_group || block_group->ro)
2989 		readonly = 1;
2990 	if (block_group)
2991 		btrfs_put_block_group(block_group);
2992 	return readonly;
2993 }
2994 
update_space_info(struct btrfs_fs_info * info,u64 flags,u64 total_bytes,u64 bytes_used,struct btrfs_space_info ** space_info)2995 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2996 			     u64 total_bytes, u64 bytes_used,
2997 			     struct btrfs_space_info **space_info)
2998 {
2999 	struct btrfs_space_info *found;
3000 	int i;
3001 	int factor;
3002 
3003 	if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
3004 		     BTRFS_BLOCK_GROUP_RAID10))
3005 		factor = 2;
3006 	else
3007 		factor = 1;
3008 
3009 	found = __find_space_info(info, flags);
3010 	if (found) {
3011 		spin_lock(&found->lock);
3012 		found->total_bytes += total_bytes;
3013 		found->disk_total += total_bytes * factor;
3014 		found->bytes_used += bytes_used;
3015 		found->disk_used += bytes_used * factor;
3016 		found->full = 0;
3017 		spin_unlock(&found->lock);
3018 		*space_info = found;
3019 		return 0;
3020 	}
3021 	found = kzalloc(sizeof(*found), GFP_NOFS);
3022 	if (!found)
3023 		return -ENOMEM;
3024 
3025 	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
3026 		INIT_LIST_HEAD(&found->block_groups[i]);
3027 	init_rwsem(&found->groups_sem);
3028 	spin_lock_init(&found->lock);
3029 	found->flags = flags & (BTRFS_BLOCK_GROUP_DATA |
3030 				BTRFS_BLOCK_GROUP_SYSTEM |
3031 				BTRFS_BLOCK_GROUP_METADATA);
3032 	found->total_bytes = total_bytes;
3033 	found->disk_total = total_bytes * factor;
3034 	found->bytes_used = bytes_used;
3035 	found->disk_used = bytes_used * factor;
3036 	found->bytes_pinned = 0;
3037 	found->bytes_reserved = 0;
3038 	found->bytes_readonly = 0;
3039 	found->bytes_may_use = 0;
3040 	found->full = 0;
3041 	found->force_alloc = CHUNK_ALLOC_NO_FORCE;
3042 	found->chunk_alloc = 0;
3043 	*space_info = found;
3044 	list_add_rcu(&found->list, &info->space_info);
3045 	atomic_set(&found->caching_threads, 0);
3046 	return 0;
3047 }
3048 
set_avail_alloc_bits(struct btrfs_fs_info * fs_info,u64 flags)3049 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3050 {
3051 	u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
3052 				   BTRFS_BLOCK_GROUP_RAID1 |
3053 				   BTRFS_BLOCK_GROUP_RAID10 |
3054 				   BTRFS_BLOCK_GROUP_DUP);
3055 	if (extra_flags) {
3056 		if (flags & BTRFS_BLOCK_GROUP_DATA)
3057 			fs_info->avail_data_alloc_bits |= extra_flags;
3058 		if (flags & BTRFS_BLOCK_GROUP_METADATA)
3059 			fs_info->avail_metadata_alloc_bits |= extra_flags;
3060 		if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3061 			fs_info->avail_system_alloc_bits |= extra_flags;
3062 	}
3063 }
3064 
btrfs_reduce_alloc_profile(struct btrfs_root * root,u64 flags)3065 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3066 {
3067 	/*
3068 	 * we add in the count of missing devices because we want
3069 	 * to make sure that any RAID levels on a degraded FS
3070 	 * continue to be honored.
3071 	 */
3072 	u64 num_devices = root->fs_info->fs_devices->rw_devices +
3073 		root->fs_info->fs_devices->missing_devices;
3074 
3075 	if (num_devices == 1)
3076 		flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
3077 	if (num_devices < 4)
3078 		flags &= ~BTRFS_BLOCK_GROUP_RAID10;
3079 
3080 	if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
3081 	    (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3082 		      BTRFS_BLOCK_GROUP_RAID10))) {
3083 		flags &= ~BTRFS_BLOCK_GROUP_DUP;
3084 	}
3085 
3086 	if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
3087 	    (flags & BTRFS_BLOCK_GROUP_RAID10)) {
3088 		flags &= ~BTRFS_BLOCK_GROUP_RAID1;
3089 	}
3090 
3091 	if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
3092 	    ((flags & BTRFS_BLOCK_GROUP_RAID1) |
3093 	     (flags & BTRFS_BLOCK_GROUP_RAID10) |
3094 	     (flags & BTRFS_BLOCK_GROUP_DUP)))
3095 		flags &= ~BTRFS_BLOCK_GROUP_RAID0;
3096 	return flags;
3097 }
3098 
get_alloc_profile(struct btrfs_root * root,u64 flags)3099 static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
3100 {
3101 	if (flags & BTRFS_BLOCK_GROUP_DATA)
3102 		flags |= root->fs_info->avail_data_alloc_bits &
3103 			 root->fs_info->data_alloc_profile;
3104 	else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3105 		flags |= root->fs_info->avail_system_alloc_bits &
3106 			 root->fs_info->system_alloc_profile;
3107 	else if (flags & BTRFS_BLOCK_GROUP_METADATA)
3108 		flags |= root->fs_info->avail_metadata_alloc_bits &
3109 			 root->fs_info->metadata_alloc_profile;
3110 	return btrfs_reduce_alloc_profile(root, flags);
3111 }
3112 
btrfs_get_alloc_profile(struct btrfs_root * root,int data)3113 u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
3114 {
3115 	u64 flags;
3116 
3117 	if (data)
3118 		flags = BTRFS_BLOCK_GROUP_DATA;
3119 	else if (root == root->fs_info->chunk_root)
3120 		flags = BTRFS_BLOCK_GROUP_SYSTEM;
3121 	else
3122 		flags = BTRFS_BLOCK_GROUP_METADATA;
3123 
3124 	return get_alloc_profile(root, flags);
3125 }
3126 
btrfs_set_inode_space_info(struct btrfs_root * root,struct inode * inode)3127 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
3128 {
3129 	BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
3130 						       BTRFS_BLOCK_GROUP_DATA);
3131 }
3132 
3133 /*
3134  * This will check the space that the inode allocates from to make sure we have
3135  * enough space for bytes.
3136  */
btrfs_check_data_free_space(struct inode * inode,u64 bytes)3137 int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
3138 {
3139 	struct btrfs_space_info *data_sinfo;
3140 	struct btrfs_root *root = BTRFS_I(inode)->root;
3141 	u64 used;
3142 	int ret = 0, committed = 0, alloc_chunk = 1;
3143 
3144 	/* make sure bytes are sectorsize aligned */
3145 	bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
3146 
3147 	if (root == root->fs_info->tree_root) {
3148 		alloc_chunk = 0;
3149 		committed = 1;
3150 	}
3151 
3152 	data_sinfo = BTRFS_I(inode)->space_info;
3153 	if (!data_sinfo)
3154 		goto alloc;
3155 
3156 again:
3157 	/* make sure we have enough space to handle the data first */
3158 	spin_lock(&data_sinfo->lock);
3159 	used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
3160 		data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
3161 		data_sinfo->bytes_may_use;
3162 
3163 	if (used + bytes > data_sinfo->total_bytes) {
3164 		struct btrfs_trans_handle *trans;
3165 
3166 		/*
3167 		 * if we don't have enough free bytes in this space then we need
3168 		 * to alloc a new chunk.
3169 		 */
3170 		if (!data_sinfo->full && alloc_chunk) {
3171 			u64 alloc_target;
3172 
3173 			data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
3174 			spin_unlock(&data_sinfo->lock);
3175 alloc:
3176 			alloc_target = btrfs_get_alloc_profile(root, 1);
3177 			trans = btrfs_join_transaction(root, 1);
3178 			if (IS_ERR(trans))
3179 				return PTR_ERR(trans);
3180 
3181 			ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3182 					     bytes + 2 * 1024 * 1024,
3183 					     alloc_target,
3184 					     CHUNK_ALLOC_NO_FORCE);
3185 			btrfs_end_transaction(trans, root);
3186 			if (ret < 0) {
3187 				if (ret != -ENOSPC)
3188 					return ret;
3189 				else
3190 					goto commit_trans;
3191 			}
3192 
3193 			if (!data_sinfo) {
3194 				btrfs_set_inode_space_info(root, inode);
3195 				data_sinfo = BTRFS_I(inode)->space_info;
3196 			}
3197 			goto again;
3198 		}
3199 		spin_unlock(&data_sinfo->lock);
3200 
3201 		/* commit the current transaction and try again */
3202 commit_trans:
3203 		if (!committed && !root->fs_info->open_ioctl_trans) {
3204 			committed = 1;
3205 			trans = btrfs_join_transaction(root, 1);
3206 			if (IS_ERR(trans))
3207 				return PTR_ERR(trans);
3208 			ret = btrfs_commit_transaction(trans, root);
3209 			if (ret)
3210 				return ret;
3211 			goto again;
3212 		}
3213 
3214 #if 0 /* I hope we never need this code again, just in case */
3215 		printk(KERN_ERR "no space left, need %llu, %llu bytes_used, "
3216 		       "%llu bytes_reserved, " "%llu bytes_pinned, "
3217 		       "%llu bytes_readonly, %llu may use %llu total\n",
3218 		       (unsigned long long)bytes,
3219 		       (unsigned long long)data_sinfo->bytes_used,
3220 		       (unsigned long long)data_sinfo->bytes_reserved,
3221 		       (unsigned long long)data_sinfo->bytes_pinned,
3222 		       (unsigned long long)data_sinfo->bytes_readonly,
3223 		       (unsigned long long)data_sinfo->bytes_may_use,
3224 		       (unsigned long long)data_sinfo->total_bytes);
3225 #endif
3226 		return -ENOSPC;
3227 	}
3228 	data_sinfo->bytes_may_use += bytes;
3229 	BTRFS_I(inode)->reserved_bytes += bytes;
3230 	spin_unlock(&data_sinfo->lock);
3231 
3232 	return 0;
3233 }
3234 
3235 /*
3236  * called when we are clearing an delalloc extent from the
3237  * inode's io_tree or there was an error for whatever reason
3238  * after calling btrfs_check_data_free_space
3239  */
btrfs_free_reserved_data_space(struct inode * inode,u64 bytes)3240 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
3241 {
3242 	struct btrfs_root *root = BTRFS_I(inode)->root;
3243 	struct btrfs_space_info *data_sinfo;
3244 
3245 	/* make sure bytes are sectorsize aligned */
3246 	bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
3247 
3248 	data_sinfo = BTRFS_I(inode)->space_info;
3249 	spin_lock(&data_sinfo->lock);
3250 	data_sinfo->bytes_may_use -= bytes;
3251 	BTRFS_I(inode)->reserved_bytes -= bytes;
3252 	spin_unlock(&data_sinfo->lock);
3253 }
3254 
force_metadata_allocation(struct btrfs_fs_info * info)3255 static void force_metadata_allocation(struct btrfs_fs_info *info)
3256 {
3257 	struct list_head *head = &info->space_info;
3258 	struct btrfs_space_info *found;
3259 
3260 	rcu_read_lock();
3261 	list_for_each_entry_rcu(found, head, list) {
3262 		if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
3263 			found->force_alloc = CHUNK_ALLOC_FORCE;
3264 	}
3265 	rcu_read_unlock();
3266 }
3267 
should_alloc_chunk(struct btrfs_root * root,struct btrfs_space_info * sinfo,u64 alloc_bytes,int force)3268 static int should_alloc_chunk(struct btrfs_root *root,
3269 			      struct btrfs_space_info *sinfo, u64 alloc_bytes,
3270 			      int force)
3271 {
3272 	u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3273 	u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
3274 	u64 thresh;
3275 
3276 	if (force == CHUNK_ALLOC_FORCE)
3277 		return 1;
3278 
3279 	/*
3280 	 * in limited mode, we want to have some free space up to
3281 	 * about 1% of the FS size.
3282 	 */
3283 	if (force == CHUNK_ALLOC_LIMITED) {
3284 		thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3285 		thresh = max_t(u64, 64 * 1024 * 1024,
3286 			       div_factor_fine(thresh, 1));
3287 
3288 		if (num_bytes - num_allocated < thresh)
3289 			return 1;
3290 	}
3291 
3292 	/*
3293 	 * we have two similar checks here, one based on percentage
3294 	 * and once based on a hard number of 256MB.  The idea
3295 	 * is that if we have a good amount of free
3296 	 * room, don't allocate a chunk.  A good mount is
3297 	 * less than 80% utilized of the chunks we have allocated,
3298 	 * or more than 256MB free
3299 	 */
3300 	if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
3301 		return 0;
3302 
3303 	if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
3304 		return 0;
3305 
3306 	thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3307 
3308 	/* 256MB or 5% of the FS */
3309 	thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
3310 
3311 	if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
3312 		return 0;
3313 	return 1;
3314 }
3315 
do_chunk_alloc(struct btrfs_trans_handle * trans,struct btrfs_root * extent_root,u64 alloc_bytes,u64 flags,int force)3316 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3317 			  struct btrfs_root *extent_root, u64 alloc_bytes,
3318 			  u64 flags, int force)
3319 {
3320 	struct btrfs_space_info *space_info;
3321 	struct btrfs_fs_info *fs_info = extent_root->fs_info;
3322 	int wait_for_alloc = 0;
3323 	int ret = 0;
3324 
3325 	flags = btrfs_reduce_alloc_profile(extent_root, flags);
3326 
3327 	space_info = __find_space_info(extent_root->fs_info, flags);
3328 	if (!space_info) {
3329 		ret = update_space_info(extent_root->fs_info, flags,
3330 					0, 0, &space_info);
3331 		BUG_ON(ret);
3332 	}
3333 	BUG_ON(!space_info);
3334 
3335 again:
3336 	spin_lock(&space_info->lock);
3337 	if (space_info->force_alloc)
3338 		force = space_info->force_alloc;
3339 	if (space_info->full) {
3340 		spin_unlock(&space_info->lock);
3341 		return 0;
3342 	}
3343 
3344 	if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
3345 		spin_unlock(&space_info->lock);
3346 		return 0;
3347 	} else if (space_info->chunk_alloc) {
3348 		wait_for_alloc = 1;
3349 	} else {
3350 		space_info->chunk_alloc = 1;
3351 	}
3352 
3353 	spin_unlock(&space_info->lock);
3354 
3355 	mutex_lock(&fs_info->chunk_mutex);
3356 
3357 	/*
3358 	 * The chunk_mutex is held throughout the entirety of a chunk
3359 	 * allocation, so once we've acquired the chunk_mutex we know that the
3360 	 * other guy is done and we need to recheck and see if we should
3361 	 * allocate.
3362 	 */
3363 	if (wait_for_alloc) {
3364 		mutex_unlock(&fs_info->chunk_mutex);
3365 		wait_for_alloc = 0;
3366 		goto again;
3367 	}
3368 
3369 	/*
3370 	 * If we have mixed data/metadata chunks we want to make sure we keep
3371 	 * allocating mixed chunks instead of individual chunks.
3372 	 */
3373 	if (btrfs_mixed_space_info(space_info))
3374 		flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
3375 
3376 	/*
3377 	 * if we're doing a data chunk, go ahead and make sure that
3378 	 * we keep a reasonable number of metadata chunks allocated in the
3379 	 * FS as well.
3380 	 */
3381 	if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
3382 		fs_info->data_chunk_allocations++;
3383 		if (!(fs_info->data_chunk_allocations %
3384 		      fs_info->metadata_ratio))
3385 			force_metadata_allocation(fs_info);
3386 	}
3387 
3388 	ret = btrfs_alloc_chunk(trans, extent_root, flags);
3389 	spin_lock(&space_info->lock);
3390 	if (ret)
3391 		space_info->full = 1;
3392 	else
3393 		ret = 1;
3394 
3395 	space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
3396 	space_info->chunk_alloc = 0;
3397 	spin_unlock(&space_info->lock);
3398 	mutex_unlock(&extent_root->fs_info->chunk_mutex);
3399 	return ret;
3400 }
3401 
3402 /*
3403  * shrink metadata reservation for delalloc
3404  */
shrink_delalloc(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 to_reclaim,int sync)3405 static int shrink_delalloc(struct btrfs_trans_handle *trans,
3406 			   struct btrfs_root *root, u64 to_reclaim, int sync)
3407 {
3408 	struct btrfs_block_rsv *block_rsv;
3409 	struct btrfs_space_info *space_info;
3410 	u64 reserved;
3411 	u64 max_reclaim;
3412 	u64 reclaimed = 0;
3413 	long time_left;
3414 	int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3415 	int loops = 0;
3416 	unsigned long progress;
3417 
3418 	block_rsv = &root->fs_info->delalloc_block_rsv;
3419 	space_info = block_rsv->space_info;
3420 
3421 	smp_mb();
3422 	reserved = space_info->bytes_reserved;
3423 	progress = space_info->reservation_progress;
3424 
3425 	if (reserved == 0)
3426 		return 0;
3427 
3428 	max_reclaim = min(reserved, to_reclaim);
3429 
3430 	while (loops < 1024) {
3431 		/* have the flusher threads jump in and do some IO */
3432 		smp_mb();
3433 		nr_pages = min_t(unsigned long, nr_pages,
3434 		       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
3435 		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3436 
3437 		spin_lock(&space_info->lock);
3438 		if (reserved > space_info->bytes_reserved)
3439 			reclaimed += reserved - space_info->bytes_reserved;
3440 		reserved = space_info->bytes_reserved;
3441 		spin_unlock(&space_info->lock);
3442 
3443 		loops++;
3444 
3445 		if (reserved == 0 || reclaimed >= max_reclaim)
3446 			break;
3447 
3448 		if (trans && trans->transaction->blocked)
3449 			return -EAGAIN;
3450 
3451 		time_left = schedule_timeout_interruptible(1);
3452 
3453 		/* We were interrupted, exit */
3454 		if (time_left)
3455 			break;
3456 
3457 		/* we've kicked the IO a few times, if anything has been freed,
3458 		 * exit.  There is no sense in looping here for a long time
3459 		 * when we really need to commit the transaction, or there are
3460 		 * just too many writers without enough free space
3461 		 */
3462 
3463 		if (loops > 3) {
3464 			smp_mb();
3465 			if (progress != space_info->reservation_progress)
3466 				break;
3467 		}
3468 
3469 	}
3470 	return reclaimed >= to_reclaim;
3471 }
3472 
3473 /*
3474  * Retries tells us how many times we've called reserve_metadata_bytes.  The
3475  * idea is if this is the first call (retries == 0) then we will add to our
3476  * reserved count if we can't make the allocation in order to hold our place
3477  * while we go and try and free up space.  That way for retries > 1 we don't try
3478  * and add space, we just check to see if the amount of unused space is >= the
3479  * total space, meaning that our reservation is valid.
3480  *
3481  * However if we don't intend to retry this reservation, pass -1 as retries so
3482  * that it short circuits this logic.
3483  */
reserve_metadata_bytes(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_block_rsv * block_rsv,u64 orig_bytes,int flush)3484 static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
3485 				  struct btrfs_root *root,
3486 				  struct btrfs_block_rsv *block_rsv,
3487 				  u64 orig_bytes, int flush)
3488 {
3489 	struct btrfs_space_info *space_info = block_rsv->space_info;
3490 	u64 unused;
3491 	u64 num_bytes = orig_bytes;
3492 	int retries = 0;
3493 	int ret = 0;
3494 	bool reserved = false;
3495 	bool committed = false;
3496 
3497 again:
3498 	ret = -ENOSPC;
3499 	if (reserved)
3500 		num_bytes = 0;
3501 
3502 	spin_lock(&space_info->lock);
3503 	unused = space_info->bytes_used + space_info->bytes_reserved +
3504 		 space_info->bytes_pinned + space_info->bytes_readonly +
3505 		 space_info->bytes_may_use;
3506 
3507 	/*
3508 	 * The idea here is that we've not already over-reserved the block group
3509 	 * then we can go ahead and save our reservation first and then start
3510 	 * flushing if we need to.  Otherwise if we've already overcommitted
3511 	 * lets start flushing stuff first and then come back and try to make
3512 	 * our reservation.
3513 	 */
3514 	if (unused <= space_info->total_bytes) {
3515 		unused = space_info->total_bytes - unused;
3516 		if (unused >= num_bytes) {
3517 			if (!reserved)
3518 				space_info->bytes_reserved += orig_bytes;
3519 			ret = 0;
3520 		} else {
3521 			/*
3522 			 * Ok set num_bytes to orig_bytes since we aren't
3523 			 * overocmmitted, this way we only try and reclaim what
3524 			 * we need.
3525 			 */
3526 			num_bytes = orig_bytes;
3527 		}
3528 	} else {
3529 		/*
3530 		 * Ok we're over committed, set num_bytes to the overcommitted
3531 		 * amount plus the amount of bytes that we need for this
3532 		 * reservation.
3533 		 */
3534 		num_bytes = unused - space_info->total_bytes +
3535 			(orig_bytes * (retries + 1));
3536 	}
3537 
3538 	/*
3539 	 * Couldn't make our reservation, save our place so while we're trying
3540 	 * to reclaim space we can actually use it instead of somebody else
3541 	 * stealing it from us.
3542 	 */
3543 	if (ret && !reserved) {
3544 		space_info->bytes_reserved += orig_bytes;
3545 		reserved = true;
3546 	}
3547 
3548 	spin_unlock(&space_info->lock);
3549 
3550 	if (!ret)
3551 		return 0;
3552 
3553 	if (!flush)
3554 		goto out;
3555 
3556 	/*
3557 	 * We do synchronous shrinking since we don't actually unreserve
3558 	 * metadata until after the IO is completed.
3559 	 */
3560 	ret = shrink_delalloc(trans, root, num_bytes, 1);
3561 	if (ret > 0)
3562 		return 0;
3563 	else if (ret < 0)
3564 		goto out;
3565 
3566 	/*
3567 	 * So if we were overcommitted it's possible that somebody else flushed
3568 	 * out enough space and we simply didn't have enough space to reclaim,
3569 	 * so go back around and try again.
3570 	 */
3571 	if (retries < 2) {
3572 		retries++;
3573 		goto again;
3574 	}
3575 
3576 	spin_lock(&space_info->lock);
3577 	/*
3578 	 * Not enough space to be reclaimed, don't bother committing the
3579 	 * transaction.
3580 	 */
3581 	if (space_info->bytes_pinned < orig_bytes)
3582 		ret = -ENOSPC;
3583 	spin_unlock(&space_info->lock);
3584 	if (ret)
3585 		goto out;
3586 
3587 	ret = -EAGAIN;
3588 	if (trans || committed)
3589 		goto out;
3590 
3591 	ret = -ENOSPC;
3592 	trans = btrfs_join_transaction(root, 1);
3593 	if (IS_ERR(trans))
3594 		goto out;
3595 	ret = btrfs_commit_transaction(trans, root);
3596 	if (!ret) {
3597 		trans = NULL;
3598 		committed = true;
3599 		goto again;
3600 	}
3601 
3602 out:
3603 	if (reserved) {
3604 		spin_lock(&space_info->lock);
3605 		space_info->bytes_reserved -= orig_bytes;
3606 		spin_unlock(&space_info->lock);
3607 	}
3608 
3609 	return ret;
3610 }
3611 
get_block_rsv(struct btrfs_trans_handle * trans,struct btrfs_root * root)3612 static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
3613 					     struct btrfs_root *root)
3614 {
3615 	struct btrfs_block_rsv *block_rsv;
3616 	if (root->ref_cows)
3617 		block_rsv = trans->block_rsv;
3618 	else
3619 		block_rsv = root->block_rsv;
3620 
3621 	if (!block_rsv)
3622 		block_rsv = &root->fs_info->empty_block_rsv;
3623 
3624 	return block_rsv;
3625 }
3626 
block_rsv_use_bytes(struct btrfs_block_rsv * block_rsv,u64 num_bytes)3627 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
3628 			       u64 num_bytes)
3629 {
3630 	int ret = -ENOSPC;
3631 	spin_lock(&block_rsv->lock);
3632 	if (block_rsv->reserved >= num_bytes) {
3633 		block_rsv->reserved -= num_bytes;
3634 		if (block_rsv->reserved < block_rsv->size)
3635 			block_rsv->full = 0;
3636 		ret = 0;
3637 	}
3638 	spin_unlock(&block_rsv->lock);
3639 	return ret;
3640 }
3641 
block_rsv_add_bytes(struct btrfs_block_rsv * block_rsv,u64 num_bytes,int update_size)3642 static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
3643 				u64 num_bytes, int update_size)
3644 {
3645 	spin_lock(&block_rsv->lock);
3646 	block_rsv->reserved += num_bytes;
3647 	if (update_size)
3648 		block_rsv->size += num_bytes;
3649 	else if (block_rsv->reserved >= block_rsv->size)
3650 		block_rsv->full = 1;
3651 	spin_unlock(&block_rsv->lock);
3652 }
3653 
block_rsv_release_bytes(struct btrfs_block_rsv * block_rsv,struct btrfs_block_rsv * dest,u64 num_bytes)3654 void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3655 			     struct btrfs_block_rsv *dest, u64 num_bytes)
3656 {
3657 	struct btrfs_space_info *space_info = block_rsv->space_info;
3658 
3659 	spin_lock(&block_rsv->lock);
3660 	if (num_bytes == (u64)-1)
3661 		num_bytes = block_rsv->size;
3662 	block_rsv->size -= num_bytes;
3663 	if (block_rsv->reserved >= block_rsv->size) {
3664 		num_bytes = block_rsv->reserved - block_rsv->size;
3665 		block_rsv->reserved = block_rsv->size;
3666 		block_rsv->full = 1;
3667 	} else {
3668 		num_bytes = 0;
3669 	}
3670 	spin_unlock(&block_rsv->lock);
3671 
3672 	if (num_bytes > 0) {
3673 		if (dest) {
3674 			spin_lock(&dest->lock);
3675 			if (!dest->full) {
3676 				u64 bytes_to_add;
3677 
3678 				bytes_to_add = dest->size - dest->reserved;
3679 				bytes_to_add = min(num_bytes, bytes_to_add);
3680 				dest->reserved += bytes_to_add;
3681 				if (dest->reserved >= dest->size)
3682 					dest->full = 1;
3683 				num_bytes -= bytes_to_add;
3684 			}
3685 			spin_unlock(&dest->lock);
3686 		}
3687 		if (num_bytes) {
3688 			spin_lock(&space_info->lock);
3689 			space_info->bytes_reserved -= num_bytes;
3690 			space_info->reservation_progress++;
3691 			spin_unlock(&space_info->lock);
3692 		}
3693 	}
3694 }
3695 
block_rsv_migrate_bytes(struct btrfs_block_rsv * src,struct btrfs_block_rsv * dst,u64 num_bytes)3696 static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
3697 				   struct btrfs_block_rsv *dst, u64 num_bytes)
3698 {
3699 	int ret;
3700 
3701 	ret = block_rsv_use_bytes(src, num_bytes);
3702 	if (ret)
3703 		return ret;
3704 
3705 	block_rsv_add_bytes(dst, num_bytes, 1);
3706 	return 0;
3707 }
3708 
btrfs_init_block_rsv(struct btrfs_block_rsv * rsv)3709 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
3710 {
3711 	memset(rsv, 0, sizeof(*rsv));
3712 	spin_lock_init(&rsv->lock);
3713 	atomic_set(&rsv->usage, 1);
3714 	rsv->priority = 6;
3715 	INIT_LIST_HEAD(&rsv->list);
3716 }
3717 
btrfs_alloc_block_rsv(struct btrfs_root * root)3718 struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
3719 {
3720 	struct btrfs_block_rsv *block_rsv;
3721 	struct btrfs_fs_info *fs_info = root->fs_info;
3722 
3723 	block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
3724 	if (!block_rsv)
3725 		return NULL;
3726 
3727 	btrfs_init_block_rsv(block_rsv);
3728 	block_rsv->space_info = __find_space_info(fs_info,
3729 						  BTRFS_BLOCK_GROUP_METADATA);
3730 	return block_rsv;
3731 }
3732 
btrfs_free_block_rsv(struct btrfs_root * root,struct btrfs_block_rsv * rsv)3733 void btrfs_free_block_rsv(struct btrfs_root *root,
3734 			  struct btrfs_block_rsv *rsv)
3735 {
3736 	if (rsv && atomic_dec_and_test(&rsv->usage)) {
3737 		btrfs_block_rsv_release(root, rsv, (u64)-1);
3738 		if (!rsv->durable)
3739 			kfree(rsv);
3740 	}
3741 }
3742 
3743 /*
3744  * make the block_rsv struct be able to capture freed space.
3745  * the captured space will re-add to the the block_rsv struct
3746  * after transaction commit
3747  */
btrfs_add_durable_block_rsv(struct btrfs_fs_info * fs_info,struct btrfs_block_rsv * block_rsv)3748 void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
3749 				 struct btrfs_block_rsv *block_rsv)
3750 {
3751 	block_rsv->durable = 1;
3752 	mutex_lock(&fs_info->durable_block_rsv_mutex);
3753 	list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list);
3754 	mutex_unlock(&fs_info->durable_block_rsv_mutex);
3755 }
3756 
btrfs_block_rsv_add(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_block_rsv * block_rsv,u64 num_bytes)3757 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
3758 			struct btrfs_root *root,
3759 			struct btrfs_block_rsv *block_rsv,
3760 			u64 num_bytes)
3761 {
3762 	int ret;
3763 
3764 	if (num_bytes == 0)
3765 		return 0;
3766 
3767 	ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
3768 	if (!ret) {
3769 		block_rsv_add_bytes(block_rsv, num_bytes, 1);
3770 		return 0;
3771 	}
3772 
3773 	return ret;
3774 }
3775 
btrfs_block_rsv_check(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_block_rsv * block_rsv,u64 min_reserved,int min_factor)3776 int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3777 			  struct btrfs_root *root,
3778 			  struct btrfs_block_rsv *block_rsv,
3779 			  u64 min_reserved, int min_factor)
3780 {
3781 	u64 num_bytes = 0;
3782 	int commit_trans = 0;
3783 	int ret = -ENOSPC;
3784 
3785 	if (!block_rsv)
3786 		return 0;
3787 
3788 	spin_lock(&block_rsv->lock);
3789 	if (min_factor > 0)
3790 		num_bytes = div_factor(block_rsv->size, min_factor);
3791 	if (min_reserved > num_bytes)
3792 		num_bytes = min_reserved;
3793 
3794 	if (block_rsv->reserved >= num_bytes) {
3795 		ret = 0;
3796 	} else {
3797 		num_bytes -= block_rsv->reserved;
3798 		if (block_rsv->durable &&
3799 		    block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes)
3800 			commit_trans = 1;
3801 	}
3802 	spin_unlock(&block_rsv->lock);
3803 	if (!ret)
3804 		return 0;
3805 
3806 	if (block_rsv->refill_used) {
3807 		ret = reserve_metadata_bytes(trans, root, block_rsv,
3808 					     num_bytes, 0);
3809 		if (!ret) {
3810 			block_rsv_add_bytes(block_rsv, num_bytes, 0);
3811 			return 0;
3812 		}
3813 	}
3814 
3815 	if (commit_trans) {
3816 		if (trans)
3817 			return -EAGAIN;
3818 
3819 		trans = btrfs_join_transaction(root, 1);
3820 		BUG_ON(IS_ERR(trans));
3821 		ret = btrfs_commit_transaction(trans, root);
3822 		return 0;
3823 	}
3824 
3825 	return -ENOSPC;
3826 }
3827 
btrfs_block_rsv_migrate(struct btrfs_block_rsv * src_rsv,struct btrfs_block_rsv * dst_rsv,u64 num_bytes)3828 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
3829 			    struct btrfs_block_rsv *dst_rsv,
3830 			    u64 num_bytes)
3831 {
3832 	return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
3833 }
3834 
btrfs_block_rsv_release(struct btrfs_root * root,struct btrfs_block_rsv * block_rsv,u64 num_bytes)3835 void btrfs_block_rsv_release(struct btrfs_root *root,
3836 			     struct btrfs_block_rsv *block_rsv,
3837 			     u64 num_bytes)
3838 {
3839 	struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
3840 	if (global_rsv->full || global_rsv == block_rsv ||
3841 	    block_rsv->space_info != global_rsv->space_info)
3842 		global_rsv = NULL;
3843 	block_rsv_release_bytes(block_rsv, global_rsv, num_bytes);
3844 }
3845 
3846 /*
3847  * helper to calculate size of global block reservation.
3848  * the desired value is sum of space used by extent tree,
3849  * checksum tree and root tree
3850  */
calc_global_metadata_size(struct btrfs_fs_info * fs_info)3851 static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3852 {
3853 	struct btrfs_space_info *sinfo;
3854 	u64 num_bytes;
3855 	u64 meta_used;
3856 	u64 data_used;
3857 	int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
3858 #if 0
3859 	/*
3860 	 * per tree used space accounting can be inaccuracy, so we
3861 	 * can't rely on it.
3862 	 */
3863 	spin_lock(&fs_info->extent_root->accounting_lock);
3864 	num_bytes = btrfs_root_used(&fs_info->extent_root->root_item);
3865 	spin_unlock(&fs_info->extent_root->accounting_lock);
3866 
3867 	spin_lock(&fs_info->csum_root->accounting_lock);
3868 	num_bytes += btrfs_root_used(&fs_info->csum_root->root_item);
3869 	spin_unlock(&fs_info->csum_root->accounting_lock);
3870 
3871 	spin_lock(&fs_info->tree_root->accounting_lock);
3872 	num_bytes += btrfs_root_used(&fs_info->tree_root->root_item);
3873 	spin_unlock(&fs_info->tree_root->accounting_lock);
3874 #endif
3875 	sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
3876 	spin_lock(&sinfo->lock);
3877 	data_used = sinfo->bytes_used;
3878 	spin_unlock(&sinfo->lock);
3879 
3880 	sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
3881 	spin_lock(&sinfo->lock);
3882 	if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
3883 		data_used = 0;
3884 	meta_used = sinfo->bytes_used;
3885 	spin_unlock(&sinfo->lock);
3886 
3887 	num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
3888 		    csum_size * 2;
3889 	num_bytes += div64_u64(data_used + meta_used, 50);
3890 
3891 	if (num_bytes * 3 > meta_used)
3892 		num_bytes = div64_u64(meta_used, 3);
3893 
3894 	return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
3895 }
3896 
update_global_block_rsv(struct btrfs_fs_info * fs_info)3897 static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3898 {
3899 	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
3900 	struct btrfs_space_info *sinfo = block_rsv->space_info;
3901 	u64 num_bytes;
3902 
3903 	num_bytes = calc_global_metadata_size(fs_info);
3904 
3905 	spin_lock(&block_rsv->lock);
3906 	spin_lock(&sinfo->lock);
3907 
3908 	block_rsv->size = num_bytes;
3909 
3910 	num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
3911 		    sinfo->bytes_reserved + sinfo->bytes_readonly +
3912 		    sinfo->bytes_may_use;
3913 
3914 	if (sinfo->total_bytes > num_bytes) {
3915 		num_bytes = sinfo->total_bytes - num_bytes;
3916 		block_rsv->reserved += num_bytes;
3917 		sinfo->bytes_reserved += num_bytes;
3918 	}
3919 
3920 	if (block_rsv->reserved >= block_rsv->size) {
3921 		num_bytes = block_rsv->reserved - block_rsv->size;
3922 		sinfo->bytes_reserved -= num_bytes;
3923 		sinfo->reservation_progress++;
3924 		block_rsv->reserved = block_rsv->size;
3925 		block_rsv->full = 1;
3926 	}
3927 #if 0
3928 	printk(KERN_INFO"global block rsv size %llu reserved %llu\n",
3929 		block_rsv->size, block_rsv->reserved);
3930 #endif
3931 	spin_unlock(&sinfo->lock);
3932 	spin_unlock(&block_rsv->lock);
3933 }
3934 
init_global_block_rsv(struct btrfs_fs_info * fs_info)3935 static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
3936 {
3937 	struct btrfs_space_info *space_info;
3938 
3939 	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
3940 	fs_info->chunk_block_rsv.space_info = space_info;
3941 	fs_info->chunk_block_rsv.priority = 10;
3942 
3943 	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
3944 	fs_info->global_block_rsv.space_info = space_info;
3945 	fs_info->global_block_rsv.priority = 10;
3946 	fs_info->global_block_rsv.refill_used = 1;
3947 	fs_info->delalloc_block_rsv.space_info = space_info;
3948 	fs_info->trans_block_rsv.space_info = space_info;
3949 	fs_info->empty_block_rsv.space_info = space_info;
3950 	fs_info->empty_block_rsv.priority = 10;
3951 
3952 	fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
3953 	fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
3954 	fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
3955 	fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
3956 	fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
3957 
3958 	btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv);
3959 
3960 	btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv);
3961 
3962 	update_global_block_rsv(fs_info);
3963 }
3964 
release_global_block_rsv(struct btrfs_fs_info * fs_info)3965 static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
3966 {
3967 	block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1);
3968 	WARN_ON(fs_info->delalloc_block_rsv.size > 0);
3969 	WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
3970 	WARN_ON(fs_info->trans_block_rsv.size > 0);
3971 	WARN_ON(fs_info->trans_block_rsv.reserved > 0);
3972 	WARN_ON(fs_info->chunk_block_rsv.size > 0);
3973 	WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
3974 }
3975 
calc_trans_metadata_size(struct btrfs_root * root,int num_items)3976 static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
3977 {
3978 	return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3979 		3 * num_items;
3980 }
3981 
btrfs_trans_reserve_metadata(struct btrfs_trans_handle * trans,struct btrfs_root * root,int num_items)3982 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3983 				 struct btrfs_root *root,
3984 				 int num_items)
3985 {
3986 	u64 num_bytes;
3987 	int ret;
3988 
3989 	if (num_items == 0 || root->fs_info->chunk_root == root)
3990 		return 0;
3991 
3992 	num_bytes = calc_trans_metadata_size(root, num_items);
3993 	ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3994 				  num_bytes);
3995 	if (!ret) {
3996 		trans->bytes_reserved += num_bytes;
3997 		trans->block_rsv = &root->fs_info->trans_block_rsv;
3998 	}
3999 	return ret;
4000 }
4001 
btrfs_trans_release_metadata(struct btrfs_trans_handle * trans,struct btrfs_root * root)4002 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
4003 				  struct btrfs_root *root)
4004 {
4005 	if (!trans->bytes_reserved)
4006 		return;
4007 
4008 	BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv);
4009 	btrfs_block_rsv_release(root, trans->block_rsv,
4010 				trans->bytes_reserved);
4011 	trans->bytes_reserved = 0;
4012 }
4013 
btrfs_orphan_reserve_metadata(struct btrfs_trans_handle * trans,struct inode * inode)4014 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
4015 				  struct inode *inode)
4016 {
4017 	struct btrfs_root *root = BTRFS_I(inode)->root;
4018 	struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
4019 	struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
4020 
4021 	/*
4022 	 * one for deleting orphan item, one for updating inode and
4023 	 * two for calling btrfs_truncate_inode_items.
4024 	 *
4025 	 * btrfs_truncate_inode_items is a delete operation, it frees
4026 	 * more space than it uses in most cases. So two units of
4027 	 * metadata space should be enough for calling it many times.
4028 	 * If all of the metadata space is used, we can commit
4029 	 * transaction and use space it freed.
4030 	 */
4031 	u64 num_bytes = calc_trans_metadata_size(root, 4);
4032 	return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4033 }
4034 
btrfs_orphan_release_metadata(struct inode * inode)4035 void btrfs_orphan_release_metadata(struct inode *inode)
4036 {
4037 	struct btrfs_root *root = BTRFS_I(inode)->root;
4038 	u64 num_bytes = calc_trans_metadata_size(root, 4);
4039 	btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
4040 }
4041 
btrfs_snap_reserve_metadata(struct btrfs_trans_handle * trans,struct btrfs_pending_snapshot * pending)4042 int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
4043 				struct btrfs_pending_snapshot *pending)
4044 {
4045 	struct btrfs_root *root = pending->root;
4046 	struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
4047 	struct btrfs_block_rsv *dst_rsv = &pending->block_rsv;
4048 	/*
4049 	 * two for root back/forward refs, two for directory entries
4050 	 * and one for root of the snapshot.
4051 	 */
4052 	u64 num_bytes = calc_trans_metadata_size(root, 5);
4053 	dst_rsv->space_info = src_rsv->space_info;
4054 	return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
4055 }
4056 
calc_csum_metadata_size(struct inode * inode,u64 num_bytes)4057 static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
4058 {
4059 	return num_bytes >>= 3;
4060 }
4061 
btrfs_delalloc_reserve_metadata(struct inode * inode,u64 num_bytes)4062 int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4063 {
4064 	struct btrfs_root *root = BTRFS_I(inode)->root;
4065 	struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
4066 	u64 to_reserve;
4067 	int nr_extents;
4068 	int reserved_extents;
4069 	int ret;
4070 
4071 	if (btrfs_transaction_in_commit(root->fs_info))
4072 		schedule_timeout(1);
4073 
4074 	num_bytes = ALIGN(num_bytes, root->sectorsize);
4075 
4076 	nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
4077 	reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
4078 
4079 	if (nr_extents > reserved_extents) {
4080 		nr_extents -= reserved_extents;
4081 		to_reserve = calc_trans_metadata_size(root, nr_extents);
4082 	} else {
4083 		nr_extents = 0;
4084 		to_reserve = 0;
4085 	}
4086 
4087 	to_reserve += calc_csum_metadata_size(inode, num_bytes);
4088 	ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
4089 	if (ret)
4090 		return ret;
4091 
4092 	atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
4093 	atomic_inc(&BTRFS_I(inode)->outstanding_extents);
4094 
4095 	block_rsv_add_bytes(block_rsv, to_reserve, 1);
4096 
4097 	if (block_rsv->size > 512 * 1024 * 1024)
4098 		shrink_delalloc(NULL, root, to_reserve, 0);
4099 
4100 	return 0;
4101 }
4102 
btrfs_delalloc_release_metadata(struct inode * inode,u64 num_bytes)4103 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4104 {
4105 	struct btrfs_root *root = BTRFS_I(inode)->root;
4106 	u64 to_free;
4107 	int nr_extents;
4108 	int reserved_extents;
4109 
4110 	num_bytes = ALIGN(num_bytes, root->sectorsize);
4111 	atomic_dec(&BTRFS_I(inode)->outstanding_extents);
4112 	WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
4113 
4114 	reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
4115 	do {
4116 		int old, new;
4117 
4118 		nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
4119 		if (nr_extents >= reserved_extents) {
4120 			nr_extents = 0;
4121 			break;
4122 		}
4123 		old = reserved_extents;
4124 		nr_extents = reserved_extents - nr_extents;
4125 		new = reserved_extents - nr_extents;
4126 		old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
4127 				     reserved_extents, new);
4128 		if (likely(old == reserved_extents))
4129 			break;
4130 		reserved_extents = old;
4131 	} while (1);
4132 
4133 	to_free = calc_csum_metadata_size(inode, num_bytes);
4134 	if (nr_extents > 0)
4135 		to_free += calc_trans_metadata_size(root, nr_extents);
4136 
4137 	btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
4138 				to_free);
4139 }
4140 
btrfs_delalloc_reserve_space(struct inode * inode,u64 num_bytes)4141 int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
4142 {
4143 	int ret;
4144 
4145 	ret = btrfs_check_data_free_space(inode, num_bytes);
4146 	if (ret)
4147 		return ret;
4148 
4149 	ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
4150 	if (ret) {
4151 		btrfs_free_reserved_data_space(inode, num_bytes);
4152 		return ret;
4153 	}
4154 
4155 	return 0;
4156 }
4157 
btrfs_delalloc_release_space(struct inode * inode,u64 num_bytes)4158 void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
4159 {
4160 	btrfs_delalloc_release_metadata(inode, num_bytes);
4161 	btrfs_free_reserved_data_space(inode, num_bytes);
4162 }
4163 
update_block_group(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytenr,u64 num_bytes,int alloc)4164 static int update_block_group(struct btrfs_trans_handle *trans,
4165 			      struct btrfs_root *root,
4166 			      u64 bytenr, u64 num_bytes, int alloc)
4167 {
4168 	struct btrfs_block_group_cache *cache = NULL;
4169 	struct btrfs_fs_info *info = root->fs_info;
4170 	u64 total = num_bytes;
4171 	u64 old_val;
4172 	u64 byte_in_group;
4173 	int factor;
4174 
4175 	/* block accounting for super block */
4176 	spin_lock(&info->delalloc_lock);
4177 	old_val = btrfs_super_bytes_used(&info->super_copy);
4178 	if (alloc)
4179 		old_val += num_bytes;
4180 	else
4181 		old_val -= num_bytes;
4182 	btrfs_set_super_bytes_used(&info->super_copy, old_val);
4183 	spin_unlock(&info->delalloc_lock);
4184 
4185 	while (total) {
4186 		cache = btrfs_lookup_block_group(info, bytenr);
4187 		if (!cache)
4188 			return -1;
4189 		if (cache->flags & (BTRFS_BLOCK_GROUP_DUP |
4190 				    BTRFS_BLOCK_GROUP_RAID1 |
4191 				    BTRFS_BLOCK_GROUP_RAID10))
4192 			factor = 2;
4193 		else
4194 			factor = 1;
4195 		/*
4196 		 * If this block group has free space cache written out, we
4197 		 * need to make sure to load it if we are removing space.  This
4198 		 * is because we need the unpinning stage to actually add the
4199 		 * space back to the block group, otherwise we will leak space.
4200 		 */
4201 		if (!alloc && cache->cached == BTRFS_CACHE_NO)
4202 			cache_block_group(cache, trans, NULL, 1);
4203 
4204 		byte_in_group = bytenr - cache->key.objectid;
4205 		WARN_ON(byte_in_group > cache->key.offset);
4206 
4207 		spin_lock(&cache->space_info->lock);
4208 		spin_lock(&cache->lock);
4209 
4210 		if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
4211 		    cache->disk_cache_state < BTRFS_DC_CLEAR)
4212 			cache->disk_cache_state = BTRFS_DC_CLEAR;
4213 
4214 		cache->dirty = 1;
4215 		old_val = btrfs_block_group_used(&cache->item);
4216 		num_bytes = min(total, cache->key.offset - byte_in_group);
4217 		if (alloc) {
4218 			old_val += num_bytes;
4219 			btrfs_set_block_group_used(&cache->item, old_val);
4220 			cache->reserved -= num_bytes;
4221 			cache->space_info->bytes_reserved -= num_bytes;
4222 			cache->space_info->reservation_progress++;
4223 			cache->space_info->bytes_used += num_bytes;
4224 			cache->space_info->disk_used += num_bytes * factor;
4225 			spin_unlock(&cache->lock);
4226 			spin_unlock(&cache->space_info->lock);
4227 		} else {
4228 			old_val -= num_bytes;
4229 			btrfs_set_block_group_used(&cache->item, old_val);
4230 			cache->pinned += num_bytes;
4231 			cache->space_info->bytes_pinned += num_bytes;
4232 			cache->space_info->bytes_used -= num_bytes;
4233 			cache->space_info->disk_used -= num_bytes * factor;
4234 			spin_unlock(&cache->lock);
4235 			spin_unlock(&cache->space_info->lock);
4236 
4237 			set_extent_dirty(info->pinned_extents,
4238 					 bytenr, bytenr + num_bytes - 1,
4239 					 GFP_NOFS | __GFP_NOFAIL);
4240 		}
4241 		btrfs_put_block_group(cache);
4242 		total -= num_bytes;
4243 		bytenr += num_bytes;
4244 	}
4245 	return 0;
4246 }
4247 
first_logical_byte(struct btrfs_root * root,u64 search_start)4248 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
4249 {
4250 	struct btrfs_block_group_cache *cache;
4251 	u64 bytenr;
4252 
4253 	cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
4254 	if (!cache)
4255 		return 0;
4256 
4257 	bytenr = cache->key.objectid;
4258 	btrfs_put_block_group(cache);
4259 
4260 	return bytenr;
4261 }
4262 
pin_down_extent(struct btrfs_root * root,struct btrfs_block_group_cache * cache,u64 bytenr,u64 num_bytes,int reserved)4263 static int pin_down_extent(struct btrfs_root *root,
4264 			   struct btrfs_block_group_cache *cache,
4265 			   u64 bytenr, u64 num_bytes, int reserved)
4266 {
4267 	spin_lock(&cache->space_info->lock);
4268 	spin_lock(&cache->lock);
4269 	cache->pinned += num_bytes;
4270 	cache->space_info->bytes_pinned += num_bytes;
4271 	if (reserved) {
4272 		cache->reserved -= num_bytes;
4273 		cache->space_info->bytes_reserved -= num_bytes;
4274 		cache->space_info->reservation_progress++;
4275 	}
4276 	spin_unlock(&cache->lock);
4277 	spin_unlock(&cache->space_info->lock);
4278 
4279 	set_extent_dirty(root->fs_info->pinned_extents, bytenr,
4280 			 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
4281 	return 0;
4282 }
4283 
4284 /*
4285  * this function must be called within transaction
4286  */
btrfs_pin_extent(struct btrfs_root * root,u64 bytenr,u64 num_bytes,int reserved)4287 int btrfs_pin_extent(struct btrfs_root *root,
4288 		     u64 bytenr, u64 num_bytes, int reserved)
4289 {
4290 	struct btrfs_block_group_cache *cache;
4291 
4292 	cache = btrfs_lookup_block_group(root->fs_info, bytenr);
4293 	BUG_ON(!cache);
4294 
4295 	pin_down_extent(root, cache, bytenr, num_bytes, reserved);
4296 
4297 	btrfs_put_block_group(cache);
4298 	return 0;
4299 }
4300 
4301 /*
4302  * update size of reserved extents. this function may return -EAGAIN
4303  * if 'reserve' is true or 'sinfo' is false.
4304  */
btrfs_update_reserved_bytes(struct btrfs_block_group_cache * cache,u64 num_bytes,int reserve,int sinfo)4305 int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
4306 				u64 num_bytes, int reserve, int sinfo)
4307 {
4308 	int ret = 0;
4309 	if (sinfo) {
4310 		struct btrfs_space_info *space_info = cache->space_info;
4311 		spin_lock(&space_info->lock);
4312 		spin_lock(&cache->lock);
4313 		if (reserve) {
4314 			if (cache->ro) {
4315 				ret = -EAGAIN;
4316 			} else {
4317 				cache->reserved += num_bytes;
4318 				space_info->bytes_reserved += num_bytes;
4319 			}
4320 		} else {
4321 			if (cache->ro)
4322 				space_info->bytes_readonly += num_bytes;
4323 			cache->reserved -= num_bytes;
4324 			space_info->bytes_reserved -= num_bytes;
4325 			space_info->reservation_progress++;
4326 		}
4327 		spin_unlock(&cache->lock);
4328 		spin_unlock(&space_info->lock);
4329 	} else {
4330 		spin_lock(&cache->lock);
4331 		if (cache->ro) {
4332 			ret = -EAGAIN;
4333 		} else {
4334 			if (reserve)
4335 				cache->reserved += num_bytes;
4336 			else
4337 				cache->reserved -= num_bytes;
4338 		}
4339 		spin_unlock(&cache->lock);
4340 	}
4341 	return ret;
4342 }
4343 
btrfs_prepare_extent_commit(struct btrfs_trans_handle * trans,struct btrfs_root * root)4344 int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
4345 				struct btrfs_root *root)
4346 {
4347 	struct btrfs_fs_info *fs_info = root->fs_info;
4348 	struct btrfs_caching_control *next;
4349 	struct btrfs_caching_control *caching_ctl;
4350 	struct btrfs_block_group_cache *cache;
4351 
4352 	down_write(&fs_info->extent_commit_sem);
4353 
4354 	list_for_each_entry_safe(caching_ctl, next,
4355 				 &fs_info->caching_block_groups, list) {
4356 		cache = caching_ctl->block_group;
4357 		if (block_group_cache_done(cache)) {
4358 			cache->last_byte_to_unpin = (u64)-1;
4359 			list_del_init(&caching_ctl->list);
4360 			put_caching_control(caching_ctl);
4361 		} else {
4362 			cache->last_byte_to_unpin = caching_ctl->progress;
4363 		}
4364 	}
4365 
4366 	if (fs_info->pinned_extents == &fs_info->freed_extents[0])
4367 		fs_info->pinned_extents = &fs_info->freed_extents[1];
4368 	else
4369 		fs_info->pinned_extents = &fs_info->freed_extents[0];
4370 
4371 	up_write(&fs_info->extent_commit_sem);
4372 
4373 	update_global_block_rsv(fs_info);
4374 	return 0;
4375 }
4376 
unpin_extent_range(struct btrfs_root * root,u64 start,u64 end)4377 static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
4378 {
4379 	struct btrfs_fs_info *fs_info = root->fs_info;
4380 	struct btrfs_block_group_cache *cache = NULL;
4381 	u64 len;
4382 
4383 	while (start <= end) {
4384 		if (!cache ||
4385 		    start >= cache->key.objectid + cache->key.offset) {
4386 			if (cache)
4387 				btrfs_put_block_group(cache);
4388 			cache = btrfs_lookup_block_group(fs_info, start);
4389 			BUG_ON(!cache);
4390 		}
4391 
4392 		len = cache->key.objectid + cache->key.offset - start;
4393 		len = min(len, end + 1 - start);
4394 
4395 		if (start < cache->last_byte_to_unpin) {
4396 			len = min(len, cache->last_byte_to_unpin - start);
4397 			btrfs_add_free_space(cache, start, len);
4398 		}
4399 
4400 		start += len;
4401 
4402 		spin_lock(&cache->space_info->lock);
4403 		spin_lock(&cache->lock);
4404 		cache->pinned -= len;
4405 		cache->space_info->bytes_pinned -= len;
4406 		if (cache->ro) {
4407 			cache->space_info->bytes_readonly += len;
4408 		} else if (cache->reserved_pinned > 0) {
4409 			len = min(len, cache->reserved_pinned);
4410 			cache->reserved_pinned -= len;
4411 			cache->space_info->bytes_reserved += len;
4412 		}
4413 		spin_unlock(&cache->lock);
4414 		spin_unlock(&cache->space_info->lock);
4415 	}
4416 
4417 	if (cache)
4418 		btrfs_put_block_group(cache);
4419 	return 0;
4420 }
4421 
btrfs_finish_extent_commit(struct btrfs_trans_handle * trans,struct btrfs_root * root)4422 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
4423 			       struct btrfs_root *root)
4424 {
4425 	struct btrfs_fs_info *fs_info = root->fs_info;
4426 	struct extent_io_tree *unpin;
4427 	struct btrfs_block_rsv *block_rsv;
4428 	struct btrfs_block_rsv *next_rsv;
4429 	u64 start;
4430 	u64 end;
4431 	int idx;
4432 	int ret;
4433 
4434 	if (fs_info->pinned_extents == &fs_info->freed_extents[0])
4435 		unpin = &fs_info->freed_extents[1];
4436 	else
4437 		unpin = &fs_info->freed_extents[0];
4438 
4439 	while (1) {
4440 		ret = find_first_extent_bit(unpin, 0, &start, &end,
4441 					    EXTENT_DIRTY);
4442 		if (ret)
4443 			break;
4444 
4445 		if (btrfs_test_opt(root, DISCARD))
4446 			ret = btrfs_discard_extent(root, start,
4447 						   end + 1 - start, NULL);
4448 
4449 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
4450 		unpin_extent_range(root, start, end);
4451 		cond_resched();
4452 	}
4453 
4454 	mutex_lock(&fs_info->durable_block_rsv_mutex);
4455 	list_for_each_entry_safe(block_rsv, next_rsv,
4456 				 &fs_info->durable_block_rsv_list, list) {
4457 
4458 		idx = trans->transid & 0x1;
4459 		if (block_rsv->freed[idx] > 0) {
4460 			block_rsv_add_bytes(block_rsv,
4461 					    block_rsv->freed[idx], 0);
4462 			block_rsv->freed[idx] = 0;
4463 		}
4464 		if (atomic_read(&block_rsv->usage) == 0) {
4465 			btrfs_block_rsv_release(root, block_rsv, (u64)-1);
4466 
4467 			if (block_rsv->freed[0] == 0 &&
4468 			    block_rsv->freed[1] == 0) {
4469 				list_del_init(&block_rsv->list);
4470 				kfree(block_rsv);
4471 			}
4472 		} else {
4473 			btrfs_block_rsv_release(root, block_rsv, 0);
4474 		}
4475 	}
4476 	mutex_unlock(&fs_info->durable_block_rsv_mutex);
4477 
4478 	return 0;
4479 }
4480 
__btrfs_free_extent(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytenr,u64 num_bytes,u64 parent,u64 root_objectid,u64 owner_objectid,u64 owner_offset,int refs_to_drop,struct btrfs_delayed_extent_op * extent_op)4481 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
4482 				struct btrfs_root *root,
4483 				u64 bytenr, u64 num_bytes, u64 parent,
4484 				u64 root_objectid, u64 owner_objectid,
4485 				u64 owner_offset, int refs_to_drop,
4486 				struct btrfs_delayed_extent_op *extent_op)
4487 {
4488 	struct btrfs_key key;
4489 	struct btrfs_path *path;
4490 	struct btrfs_fs_info *info = root->fs_info;
4491 	struct btrfs_root *extent_root = info->extent_root;
4492 	struct extent_buffer *leaf;
4493 	struct btrfs_extent_item *ei;
4494 	struct btrfs_extent_inline_ref *iref;
4495 	int ret;
4496 	int is_data;
4497 	int extent_slot = 0;
4498 	int found_extent = 0;
4499 	int num_to_del = 1;
4500 	u32 item_size;
4501 	u64 refs;
4502 
4503 	path = btrfs_alloc_path();
4504 	if (!path)
4505 		return -ENOMEM;
4506 
4507 	path->reada = 1;
4508 	path->leave_spinning = 1;
4509 
4510 	is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
4511 	BUG_ON(!is_data && refs_to_drop != 1);
4512 
4513 	ret = lookup_extent_backref(trans, extent_root, path, &iref,
4514 				    bytenr, num_bytes, parent,
4515 				    root_objectid, owner_objectid,
4516 				    owner_offset);
4517 	if (ret == 0) {
4518 		extent_slot = path->slots[0];
4519 		while (extent_slot >= 0) {
4520 			btrfs_item_key_to_cpu(path->nodes[0], &key,
4521 					      extent_slot);
4522 			if (key.objectid != bytenr)
4523 				break;
4524 			if (key.type == BTRFS_EXTENT_ITEM_KEY &&
4525 			    key.offset == num_bytes) {
4526 				found_extent = 1;
4527 				break;
4528 			}
4529 			if (path->slots[0] - extent_slot > 5)
4530 				break;
4531 			extent_slot--;
4532 		}
4533 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4534 		item_size = btrfs_item_size_nr(path->nodes[0], extent_slot);
4535 		if (found_extent && item_size < sizeof(*ei))
4536 			found_extent = 0;
4537 #endif
4538 		if (!found_extent) {
4539 			BUG_ON(iref);
4540 			ret = remove_extent_backref(trans, extent_root, path,
4541 						    NULL, refs_to_drop,
4542 						    is_data);
4543 			BUG_ON(ret);
4544 			btrfs_release_path(extent_root, path);
4545 			path->leave_spinning = 1;
4546 
4547 			key.objectid = bytenr;
4548 			key.type = BTRFS_EXTENT_ITEM_KEY;
4549 			key.offset = num_bytes;
4550 
4551 			ret = btrfs_search_slot(trans, extent_root,
4552 						&key, path, -1, 1);
4553 			if (ret) {
4554 				printk(KERN_ERR "umm, got %d back from search"
4555 				       ", was looking for %llu\n", ret,
4556 				       (unsigned long long)bytenr);
4557 				btrfs_print_leaf(extent_root, path->nodes[0]);
4558 			}
4559 			BUG_ON(ret);
4560 			extent_slot = path->slots[0];
4561 		}
4562 	} else {
4563 		btrfs_print_leaf(extent_root, path->nodes[0]);
4564 		WARN_ON(1);
4565 		printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
4566 		       "parent %llu root %llu  owner %llu offset %llu\n",
4567 		       (unsigned long long)bytenr,
4568 		       (unsigned long long)parent,
4569 		       (unsigned long long)root_objectid,
4570 		       (unsigned long long)owner_objectid,
4571 		       (unsigned long long)owner_offset);
4572 	}
4573 
4574 	leaf = path->nodes[0];
4575 	item_size = btrfs_item_size_nr(leaf, extent_slot);
4576 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
4577 	if (item_size < sizeof(*ei)) {
4578 		BUG_ON(found_extent || extent_slot != path->slots[0]);
4579 		ret = convert_extent_item_v0(trans, extent_root, path,
4580 					     owner_objectid, 0);
4581 		BUG_ON(ret < 0);
4582 
4583 		btrfs_release_path(extent_root, path);
4584 		path->leave_spinning = 1;
4585 
4586 		key.objectid = bytenr;
4587 		key.type = BTRFS_EXTENT_ITEM_KEY;
4588 		key.offset = num_bytes;
4589 
4590 		ret = btrfs_search_slot(trans, extent_root, &key, path,
4591 					-1, 1);
4592 		if (ret) {
4593 			printk(KERN_ERR "umm, got %d back from search"
4594 			       ", was looking for %llu\n", ret,
4595 			       (unsigned long long)bytenr);
4596 			btrfs_print_leaf(extent_root, path->nodes[0]);
4597 		}
4598 		BUG_ON(ret);
4599 		extent_slot = path->slots[0];
4600 		leaf = path->nodes[0];
4601 		item_size = btrfs_item_size_nr(leaf, extent_slot);
4602 	}
4603 #endif
4604 	BUG_ON(item_size < sizeof(*ei));
4605 	ei = btrfs_item_ptr(leaf, extent_slot,
4606 			    struct btrfs_extent_item);
4607 	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
4608 		struct btrfs_tree_block_info *bi;
4609 		BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
4610 		bi = (struct btrfs_tree_block_info *)(ei + 1);
4611 		WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
4612 	}
4613 
4614 	refs = btrfs_extent_refs(leaf, ei);
4615 	BUG_ON(refs < refs_to_drop);
4616 	refs -= refs_to_drop;
4617 
4618 	if (refs > 0) {
4619 		if (extent_op)
4620 			__run_delayed_extent_op(extent_op, leaf, ei);
4621 		/*
4622 		 * In the case of inline back ref, reference count will
4623 		 * be updated by remove_extent_backref
4624 		 */
4625 		if (iref) {
4626 			BUG_ON(!found_extent);
4627 		} else {
4628 			btrfs_set_extent_refs(leaf, ei, refs);
4629 			btrfs_mark_buffer_dirty(leaf);
4630 		}
4631 		if (found_extent) {
4632 			ret = remove_extent_backref(trans, extent_root, path,
4633 						    iref, refs_to_drop,
4634 						    is_data);
4635 			BUG_ON(ret);
4636 		}
4637 	} else {
4638 		if (found_extent) {
4639 			BUG_ON(is_data && refs_to_drop !=
4640 			       extent_data_ref_count(root, path, iref));
4641 			if (iref) {
4642 				BUG_ON(path->slots[0] != extent_slot);
4643 			} else {
4644 				BUG_ON(path->slots[0] != extent_slot + 1);
4645 				path->slots[0] = extent_slot;
4646 				num_to_del = 2;
4647 			}
4648 		}
4649 
4650 		ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
4651 				      num_to_del);
4652 		BUG_ON(ret);
4653 		btrfs_release_path(extent_root, path);
4654 
4655 		if (is_data) {
4656 			ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
4657 			BUG_ON(ret);
4658 		} else {
4659 			invalidate_mapping_pages(info->btree_inode->i_mapping,
4660 			     bytenr >> PAGE_CACHE_SHIFT,
4661 			     (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
4662 		}
4663 
4664 		ret = update_block_group(trans, root, bytenr, num_bytes, 0);
4665 		BUG_ON(ret);
4666 	}
4667 	btrfs_free_path(path);
4668 	return ret;
4669 }
4670 
4671 /*
4672  * when we free an block, it is possible (and likely) that we free the last
4673  * delayed ref for that extent as well.  This searches the delayed ref tree for
4674  * a given extent, and if there are no other delayed refs to be processed, it
4675  * removes it from the tree.
4676  */
check_ref_cleanup(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytenr)4677 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
4678 				      struct btrfs_root *root, u64 bytenr)
4679 {
4680 	struct btrfs_delayed_ref_head *head;
4681 	struct btrfs_delayed_ref_root *delayed_refs;
4682 	struct btrfs_delayed_ref_node *ref;
4683 	struct rb_node *node;
4684 	int ret = 0;
4685 
4686 	delayed_refs = &trans->transaction->delayed_refs;
4687 	spin_lock(&delayed_refs->lock);
4688 	head = btrfs_find_delayed_ref_head(trans, bytenr);
4689 	if (!head)
4690 		goto out;
4691 
4692 	node = rb_prev(&head->node.rb_node);
4693 	if (!node)
4694 		goto out;
4695 
4696 	ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
4697 
4698 	/* there are still entries for this ref, we can't drop it */
4699 	if (ref->bytenr == bytenr)
4700 		goto out;
4701 
4702 	if (head->extent_op) {
4703 		if (!head->must_insert_reserved)
4704 			goto out;
4705 		kfree(head->extent_op);
4706 		head->extent_op = NULL;
4707 	}
4708 
4709 	/*
4710 	 * waiting for the lock here would deadlock.  If someone else has it
4711 	 * locked they are already in the process of dropping it anyway
4712 	 */
4713 	if (!mutex_trylock(&head->mutex))
4714 		goto out;
4715 
4716 	/*
4717 	 * at this point we have a head with no other entries.  Go
4718 	 * ahead and process it.
4719 	 */
4720 	head->node.in_tree = 0;
4721 	rb_erase(&head->node.rb_node, &delayed_refs->root);
4722 
4723 	delayed_refs->num_entries--;
4724 
4725 	/*
4726 	 * we don't take a ref on the node because we're removing it from the
4727 	 * tree, so we just steal the ref the tree was holding.
4728 	 */
4729 	delayed_refs->num_heads--;
4730 	if (list_empty(&head->cluster))
4731 		delayed_refs->num_heads_ready--;
4732 
4733 	list_del_init(&head->cluster);
4734 	spin_unlock(&delayed_refs->lock);
4735 
4736 	BUG_ON(head->extent_op);
4737 	if (head->must_insert_reserved)
4738 		ret = 1;
4739 
4740 	mutex_unlock(&head->mutex);
4741 	btrfs_put_delayed_ref(&head->node);
4742 	return ret;
4743 out:
4744 	spin_unlock(&delayed_refs->lock);
4745 	return 0;
4746 }
4747 
btrfs_free_tree_block(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * buf,u64 parent,int last_ref)4748 void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4749 			   struct btrfs_root *root,
4750 			   struct extent_buffer *buf,
4751 			   u64 parent, int last_ref)
4752 {
4753 	struct btrfs_block_rsv *block_rsv;
4754 	struct btrfs_block_group_cache *cache = NULL;
4755 	int ret;
4756 
4757 	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4758 		ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
4759 						parent, root->root_key.objectid,
4760 						btrfs_header_level(buf),
4761 						BTRFS_DROP_DELAYED_REF, NULL);
4762 		BUG_ON(ret);
4763 	}
4764 
4765 	if (!last_ref)
4766 		return;
4767 
4768 	block_rsv = get_block_rsv(trans, root);
4769 	cache = btrfs_lookup_block_group(root->fs_info, buf->start);
4770 	if (block_rsv->space_info != cache->space_info)
4771 		goto out;
4772 
4773 	if (btrfs_header_generation(buf) == trans->transid) {
4774 		if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
4775 			ret = check_ref_cleanup(trans, root, buf->start);
4776 			if (!ret)
4777 				goto pin;
4778 		}
4779 
4780 		if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
4781 			pin_down_extent(root, cache, buf->start, buf->len, 1);
4782 			goto pin;
4783 		}
4784 
4785 		WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
4786 
4787 		btrfs_add_free_space(cache, buf->start, buf->len);
4788 		ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0);
4789 		if (ret == -EAGAIN) {
4790 			/* block group became read-only */
4791 			btrfs_update_reserved_bytes(cache, buf->len, 0, 1);
4792 			goto out;
4793 		}
4794 
4795 		ret = 1;
4796 		spin_lock(&block_rsv->lock);
4797 		if (block_rsv->reserved < block_rsv->size) {
4798 			block_rsv->reserved += buf->len;
4799 			ret = 0;
4800 		}
4801 		spin_unlock(&block_rsv->lock);
4802 
4803 		if (ret) {
4804 			spin_lock(&cache->space_info->lock);
4805 			cache->space_info->bytes_reserved -= buf->len;
4806 			cache->space_info->reservation_progress++;
4807 			spin_unlock(&cache->space_info->lock);
4808 		}
4809 		goto out;
4810 	}
4811 pin:
4812 	if (block_rsv->durable && !cache->ro) {
4813 		ret = 0;
4814 		spin_lock(&cache->lock);
4815 		if (!cache->ro) {
4816 			cache->reserved_pinned += buf->len;
4817 			ret = 1;
4818 		}
4819 		spin_unlock(&cache->lock);
4820 
4821 		if (ret) {
4822 			spin_lock(&block_rsv->lock);
4823 			block_rsv->freed[trans->transid & 0x1] += buf->len;
4824 			spin_unlock(&block_rsv->lock);
4825 		}
4826 	}
4827 out:
4828 	/*
4829 	 * Deleting the buffer, clear the corrupt flag since it doesn't matter
4830 	 * anymore.
4831 	 */
4832 	clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
4833 	btrfs_put_block_group(cache);
4834 }
4835 
btrfs_free_extent(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytenr,u64 num_bytes,u64 parent,u64 root_objectid,u64 owner,u64 offset)4836 int btrfs_free_extent(struct btrfs_trans_handle *trans,
4837 		      struct btrfs_root *root,
4838 		      u64 bytenr, u64 num_bytes, u64 parent,
4839 		      u64 root_objectid, u64 owner, u64 offset)
4840 {
4841 	int ret;
4842 
4843 	/*
4844 	 * tree log blocks never actually go into the extent allocation
4845 	 * tree, just update pinning info and exit early.
4846 	 */
4847 	if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
4848 		WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
4849 		/* unlocks the pinned mutex */
4850 		btrfs_pin_extent(root, bytenr, num_bytes, 1);
4851 		ret = 0;
4852 	} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
4853 		ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
4854 					parent, root_objectid, (int)owner,
4855 					BTRFS_DROP_DELAYED_REF, NULL);
4856 		BUG_ON(ret);
4857 	} else {
4858 		ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes,
4859 					parent, root_objectid, owner,
4860 					offset, BTRFS_DROP_DELAYED_REF, NULL);
4861 		BUG_ON(ret);
4862 	}
4863 	return ret;
4864 }
4865 
stripe_align(struct btrfs_root * root,u64 val)4866 static u64 stripe_align(struct btrfs_root *root, u64 val)
4867 {
4868 	u64 mask = ((u64)root->stripesize - 1);
4869 	u64 ret = (val + mask) & ~mask;
4870 	return ret;
4871 }
4872 
4873 /*
4874  * when we wait for progress in the block group caching, its because
4875  * our allocation attempt failed at least once.  So, we must sleep
4876  * and let some progress happen before we try again.
4877  *
4878  * This function will sleep at least once waiting for new free space to
4879  * show up, and then it will check the block group free space numbers
4880  * for our min num_bytes.  Another option is to have it go ahead
4881  * and look in the rbtree for a free extent of a given size, but this
4882  * is a good start.
4883  */
4884 static noinline int
wait_block_group_cache_progress(struct btrfs_block_group_cache * cache,u64 num_bytes)4885 wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
4886 				u64 num_bytes)
4887 {
4888 	struct btrfs_caching_control *caching_ctl;
4889 	DEFINE_WAIT(wait);
4890 
4891 	caching_ctl = get_caching_control(cache);
4892 	if (!caching_ctl)
4893 		return 0;
4894 
4895 	wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
4896 		   (cache->free_space >= num_bytes));
4897 
4898 	put_caching_control(caching_ctl);
4899 	return 0;
4900 }
4901 
4902 static noinline int
wait_block_group_cache_done(struct btrfs_block_group_cache * cache)4903 wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
4904 {
4905 	struct btrfs_caching_control *caching_ctl;
4906 	DEFINE_WAIT(wait);
4907 
4908 	caching_ctl = get_caching_control(cache);
4909 	if (!caching_ctl)
4910 		return 0;
4911 
4912 	wait_event(caching_ctl->wait, block_group_cache_done(cache));
4913 
4914 	put_caching_control(caching_ctl);
4915 	return 0;
4916 }
4917 
get_block_group_index(struct btrfs_block_group_cache * cache)4918 static int get_block_group_index(struct btrfs_block_group_cache *cache)
4919 {
4920 	int index;
4921 	if (cache->flags & BTRFS_BLOCK_GROUP_RAID10)
4922 		index = 0;
4923 	else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1)
4924 		index = 1;
4925 	else if (cache->flags & BTRFS_BLOCK_GROUP_DUP)
4926 		index = 2;
4927 	else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0)
4928 		index = 3;
4929 	else
4930 		index = 4;
4931 	return index;
4932 }
4933 
4934 enum btrfs_loop_type {
4935 	LOOP_FIND_IDEAL = 0,
4936 	LOOP_CACHING_NOWAIT = 1,
4937 	LOOP_CACHING_WAIT = 2,
4938 	LOOP_ALLOC_CHUNK = 3,
4939 	LOOP_NO_EMPTY_SIZE = 4,
4940 };
4941 
4942 /*
4943  * walks the btree of allocated extents and find a hole of a given size.
4944  * The key ins is changed to record the hole:
4945  * ins->objectid == block start
4946  * ins->flags = BTRFS_EXTENT_ITEM_KEY
4947  * ins->offset == number of blocks
4948  * Any available blocks before search_start are skipped.
4949  */
find_free_extent(struct btrfs_trans_handle * trans,struct btrfs_root * orig_root,u64 num_bytes,u64 empty_size,u64 search_start,u64 search_end,u64 hint_byte,struct btrfs_key * ins,int data)4950 static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4951 				     struct btrfs_root *orig_root,
4952 				     u64 num_bytes, u64 empty_size,
4953 				     u64 search_start, u64 search_end,
4954 				     u64 hint_byte, struct btrfs_key *ins,
4955 				     int data)
4956 {
4957 	int ret = 0;
4958 	struct btrfs_root *root = orig_root->fs_info->extent_root;
4959 	struct btrfs_free_cluster *last_ptr = NULL;
4960 	struct btrfs_block_group_cache *block_group = NULL;
4961 	int empty_cluster = 2 * 1024 * 1024;
4962 	int allowed_chunk_alloc = 0;
4963 	int done_chunk_alloc = 0;
4964 	struct btrfs_space_info *space_info;
4965 	int last_ptr_loop = 0;
4966 	int loop = 0;
4967 	int index = 0;
4968 	bool found_uncached_bg = false;
4969 	bool failed_cluster_refill = false;
4970 	bool failed_alloc = false;
4971 	bool use_cluster = true;
4972 	u64 ideal_cache_percent = 0;
4973 	u64 ideal_cache_offset = 0;
4974 
4975 	WARN_ON(num_bytes < root->sectorsize);
4976 	btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
4977 	ins->objectid = 0;
4978 	ins->offset = 0;
4979 
4980 	space_info = __find_space_info(root->fs_info, data);
4981 	if (!space_info) {
4982 		printk(KERN_ERR "No space info for %d\n", data);
4983 		return -ENOSPC;
4984 	}
4985 
4986 	/*
4987 	 * If the space info is for both data and metadata it means we have a
4988 	 * small filesystem and we can't use the clustering stuff.
4989 	 */
4990 	if (btrfs_mixed_space_info(space_info))
4991 		use_cluster = false;
4992 
4993 	if (orig_root->ref_cows || empty_size)
4994 		allowed_chunk_alloc = 1;
4995 
4996 	if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
4997 		last_ptr = &root->fs_info->meta_alloc_cluster;
4998 		if (!btrfs_test_opt(root, SSD))
4999 			empty_cluster = 64 * 1024;
5000 	}
5001 
5002 	if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
5003 	    btrfs_test_opt(root, SSD)) {
5004 		last_ptr = &root->fs_info->data_alloc_cluster;
5005 	}
5006 
5007 	if (last_ptr) {
5008 		spin_lock(&last_ptr->lock);
5009 		if (last_ptr->block_group)
5010 			hint_byte = last_ptr->window_start;
5011 		spin_unlock(&last_ptr->lock);
5012 	}
5013 
5014 	search_start = max(search_start, first_logical_byte(root, 0));
5015 	search_start = max(search_start, hint_byte);
5016 
5017 	if (!last_ptr)
5018 		empty_cluster = 0;
5019 
5020 	if (search_start == hint_byte) {
5021 ideal_cache:
5022 		block_group = btrfs_lookup_block_group(root->fs_info,
5023 						       search_start);
5024 		/*
5025 		 * we don't want to use the block group if it doesn't match our
5026 		 * allocation bits, or if its not cached.
5027 		 *
5028 		 * However if we are re-searching with an ideal block group
5029 		 * picked out then we don't care that the block group is cached.
5030 		 */
5031 		if (block_group && block_group_bits(block_group, data) &&
5032 		    (block_group->cached != BTRFS_CACHE_NO ||
5033 		     search_start == ideal_cache_offset)) {
5034 			down_read(&space_info->groups_sem);
5035 			if (list_empty(&block_group->list) ||
5036 			    block_group->ro) {
5037 				/*
5038 				 * someone is removing this block group,
5039 				 * we can't jump into the have_block_group
5040 				 * target because our list pointers are not
5041 				 * valid
5042 				 */
5043 				btrfs_put_block_group(block_group);
5044 				up_read(&space_info->groups_sem);
5045 			} else {
5046 				index = get_block_group_index(block_group);
5047 				goto have_block_group;
5048 			}
5049 		} else if (block_group) {
5050 			btrfs_put_block_group(block_group);
5051 		}
5052 	}
5053 search:
5054 	down_read(&space_info->groups_sem);
5055 	list_for_each_entry(block_group, &space_info->block_groups[index],
5056 			    list) {
5057 		u64 offset;
5058 		int cached;
5059 
5060 		btrfs_get_block_group(block_group);
5061 		search_start = block_group->key.objectid;
5062 
5063 		/*
5064 		 * this can happen if we end up cycling through all the
5065 		 * raid types, but we want to make sure we only allocate
5066 		 * for the proper type.
5067 		 */
5068 		if (!block_group_bits(block_group, data)) {
5069 		    u64 extra = BTRFS_BLOCK_GROUP_DUP |
5070 				BTRFS_BLOCK_GROUP_RAID1 |
5071 				BTRFS_BLOCK_GROUP_RAID10;
5072 
5073 			/*
5074 			 * if they asked for extra copies and this block group
5075 			 * doesn't provide them, bail.  This does allow us to
5076 			 * fill raid0 from raid1.
5077 			 */
5078 			if ((data & extra) && !(block_group->flags & extra))
5079 				goto loop;
5080 		}
5081 
5082 have_block_group:
5083 		if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
5084 			u64 free_percent;
5085 
5086 			ret = cache_block_group(block_group, trans,
5087 						orig_root, 1);
5088 			if (block_group->cached == BTRFS_CACHE_FINISHED)
5089 				goto have_block_group;
5090 
5091 			free_percent = btrfs_block_group_used(&block_group->item);
5092 			free_percent *= 100;
5093 			free_percent = div64_u64(free_percent,
5094 						 block_group->key.offset);
5095 			free_percent = 100 - free_percent;
5096 			if (free_percent > ideal_cache_percent &&
5097 			    likely(!block_group->ro)) {
5098 				ideal_cache_offset = block_group->key.objectid;
5099 				ideal_cache_percent = free_percent;
5100 			}
5101 
5102 			/*
5103 			 * We only want to start kthread caching if we are at
5104 			 * the point where we will wait for caching to make
5105 			 * progress, or if our ideal search is over and we've
5106 			 * found somebody to start caching.
5107 			 */
5108 			if (loop > LOOP_CACHING_NOWAIT ||
5109 			    (loop > LOOP_FIND_IDEAL &&
5110 			     atomic_read(&space_info->caching_threads) < 2)) {
5111 				ret = cache_block_group(block_group, trans,
5112 							orig_root, 0);
5113 				BUG_ON(ret);
5114 			}
5115 			found_uncached_bg = true;
5116 
5117 			/*
5118 			 * If loop is set for cached only, try the next block
5119 			 * group.
5120 			 */
5121 			if (loop == LOOP_FIND_IDEAL)
5122 				goto loop;
5123 		}
5124 
5125 		cached = block_group_cache_done(block_group);
5126 		if (unlikely(!cached))
5127 			found_uncached_bg = true;
5128 
5129 		if (unlikely(block_group->ro))
5130 			goto loop;
5131 
5132 		/*
5133 		 * Ok we want to try and use the cluster allocator, so lets look
5134 		 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
5135 		 * have tried the cluster allocator plenty of times at this
5136 		 * point and not have found anything, so we are likely way too
5137 		 * fragmented for the clustering stuff to find anything, so lets
5138 		 * just skip it and let the allocator find whatever block it can
5139 		 * find
5140 		 */
5141 		if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) {
5142 			/*
5143 			 * the refill lock keeps out other
5144 			 * people trying to start a new cluster
5145 			 */
5146 			spin_lock(&last_ptr->refill_lock);
5147 			if (last_ptr->block_group &&
5148 			    (last_ptr->block_group->ro ||
5149 			    !block_group_bits(last_ptr->block_group, data))) {
5150 				offset = 0;
5151 				goto refill_cluster;
5152 			}
5153 
5154 			offset = btrfs_alloc_from_cluster(block_group, last_ptr,
5155 						 num_bytes, search_start);
5156 			if (offset) {
5157 				/* we have a block, we're done */
5158 				spin_unlock(&last_ptr->refill_lock);
5159 				goto checks;
5160 			}
5161 
5162 			spin_lock(&last_ptr->lock);
5163 			/*
5164 			 * whoops, this cluster doesn't actually point to
5165 			 * this block group.  Get a ref on the block
5166 			 * group is does point to and try again
5167 			 */
5168 			if (!last_ptr_loop && last_ptr->block_group &&
5169 			    last_ptr->block_group != block_group) {
5170 
5171 				btrfs_put_block_group(block_group);
5172 				block_group = last_ptr->block_group;
5173 				btrfs_get_block_group(block_group);
5174 				spin_unlock(&last_ptr->lock);
5175 				spin_unlock(&last_ptr->refill_lock);
5176 
5177 				last_ptr_loop = 1;
5178 				search_start = block_group->key.objectid;
5179 				/*
5180 				 * we know this block group is properly
5181 				 * in the list because
5182 				 * btrfs_remove_block_group, drops the
5183 				 * cluster before it removes the block
5184 				 * group from the list
5185 				 */
5186 				goto have_block_group;
5187 			}
5188 			spin_unlock(&last_ptr->lock);
5189 refill_cluster:
5190 			/*
5191 			 * this cluster didn't work out, free it and
5192 			 * start over
5193 			 */
5194 			btrfs_return_cluster_to_free_space(NULL, last_ptr);
5195 
5196 			last_ptr_loop = 0;
5197 
5198 			/* allocate a cluster in this block group */
5199 			ret = btrfs_find_space_cluster(trans, root,
5200 					       block_group, last_ptr,
5201 					       offset, num_bytes,
5202 					       empty_cluster + empty_size);
5203 			if (ret == 0) {
5204 				/*
5205 				 * now pull our allocation out of this
5206 				 * cluster
5207 				 */
5208 				offset = btrfs_alloc_from_cluster(block_group,
5209 						  last_ptr, num_bytes,
5210 						  search_start);
5211 				if (offset) {
5212 					/* we found one, proceed */
5213 					spin_unlock(&last_ptr->refill_lock);
5214 					goto checks;
5215 				}
5216 			} else if (!cached && loop > LOOP_CACHING_NOWAIT
5217 				   && !failed_cluster_refill) {
5218 				spin_unlock(&last_ptr->refill_lock);
5219 
5220 				failed_cluster_refill = true;
5221 				wait_block_group_cache_progress(block_group,
5222 				       num_bytes + empty_cluster + empty_size);
5223 				goto have_block_group;
5224 			}
5225 
5226 			/*
5227 			 * at this point we either didn't find a cluster
5228 			 * or we weren't able to allocate a block from our
5229 			 * cluster.  Free the cluster we've been trying
5230 			 * to use, and go to the next block group
5231 			 */
5232 			btrfs_return_cluster_to_free_space(NULL, last_ptr);
5233 			spin_unlock(&last_ptr->refill_lock);
5234 			goto loop;
5235 		}
5236 
5237 		offset = btrfs_find_space_for_alloc(block_group, search_start,
5238 						    num_bytes, empty_size);
5239 		/*
5240 		 * If we didn't find a chunk, and we haven't failed on this
5241 		 * block group before, and this block group is in the middle of
5242 		 * caching and we are ok with waiting, then go ahead and wait
5243 		 * for progress to be made, and set failed_alloc to true.
5244 		 *
5245 		 * If failed_alloc is true then we've already waited on this
5246 		 * block group once and should move on to the next block group.
5247 		 */
5248 		if (!offset && !failed_alloc && !cached &&
5249 		    loop > LOOP_CACHING_NOWAIT) {
5250 			wait_block_group_cache_progress(block_group,
5251 						num_bytes + empty_size);
5252 			failed_alloc = true;
5253 			goto have_block_group;
5254 		} else if (!offset) {
5255 			goto loop;
5256 		}
5257 checks:
5258 		search_start = stripe_align(root, offset);
5259 		/* move on to the next group */
5260 		if (search_start + num_bytes >= search_end) {
5261 			btrfs_add_free_space(block_group, offset, num_bytes);
5262 			goto loop;
5263 		}
5264 
5265 		/* move on to the next group */
5266 		if (search_start + num_bytes >
5267 		    block_group->key.objectid + block_group->key.offset) {
5268 			btrfs_add_free_space(block_group, offset, num_bytes);
5269 			goto loop;
5270 		}
5271 
5272 		ins->objectid = search_start;
5273 		ins->offset = num_bytes;
5274 
5275 		if (offset < search_start)
5276 			btrfs_add_free_space(block_group, offset,
5277 					     search_start - offset);
5278 		BUG_ON(offset > search_start);
5279 
5280 		ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1,
5281 					    (data & BTRFS_BLOCK_GROUP_DATA));
5282 		if (ret == -EAGAIN) {
5283 			btrfs_add_free_space(block_group, offset, num_bytes);
5284 			goto loop;
5285 		}
5286 
5287 		/* we are all good, lets return */
5288 		ins->objectid = search_start;
5289 		ins->offset = num_bytes;
5290 
5291 		if (offset < search_start)
5292 			btrfs_add_free_space(block_group, offset,
5293 					     search_start - offset);
5294 		BUG_ON(offset > search_start);
5295 		break;
5296 loop:
5297 		failed_cluster_refill = false;
5298 		failed_alloc = false;
5299 		BUG_ON(index != get_block_group_index(block_group));
5300 		btrfs_put_block_group(block_group);
5301 	}
5302 	up_read(&space_info->groups_sem);
5303 
5304 	if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
5305 		goto search;
5306 
5307 	/* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for
5308 	 *			for them to make caching progress.  Also
5309 	 *			determine the best possible bg to cache
5310 	 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
5311 	 *			caching kthreads as we move along
5312 	 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
5313 	 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
5314 	 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
5315 	 *			again
5316 	 */
5317 	if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
5318 	    (found_uncached_bg || empty_size || empty_cluster ||
5319 	     allowed_chunk_alloc)) {
5320 		index = 0;
5321 		if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
5322 			found_uncached_bg = false;
5323 			loop++;
5324 			if (!ideal_cache_percent &&
5325 			    atomic_read(&space_info->caching_threads))
5326 				goto search;
5327 
5328 			/*
5329 			 * 1 of the following 2 things have happened so far
5330 			 *
5331 			 * 1) We found an ideal block group for caching that
5332 			 * is mostly full and will cache quickly, so we might
5333 			 * as well wait for it.
5334 			 *
5335 			 * 2) We searched for cached only and we didn't find
5336 			 * anything, and we didn't start any caching kthreads
5337 			 * either, so chances are we will loop through and
5338 			 * start a couple caching kthreads, and then come back
5339 			 * around and just wait for them.  This will be slower
5340 			 * because we will have 2 caching kthreads reading at
5341 			 * the same time when we could have just started one
5342 			 * and waited for it to get far enough to give us an
5343 			 * allocation, so go ahead and go to the wait caching
5344 			 * loop.
5345 			 */
5346 			loop = LOOP_CACHING_WAIT;
5347 			search_start = ideal_cache_offset;
5348 			ideal_cache_percent = 0;
5349 			goto ideal_cache;
5350 		} else if (loop == LOOP_FIND_IDEAL) {
5351 			/*
5352 			 * Didn't find a uncached bg, wait on anything we find
5353 			 * next.
5354 			 */
5355 			loop = LOOP_CACHING_WAIT;
5356 			goto search;
5357 		}
5358 
5359 		if (loop < LOOP_CACHING_WAIT) {
5360 			loop++;
5361 			goto search;
5362 		}
5363 
5364 		if (loop == LOOP_ALLOC_CHUNK) {
5365 			empty_size = 0;
5366 			empty_cluster = 0;
5367 		}
5368 
5369 		if (allowed_chunk_alloc) {
5370 			ret = do_chunk_alloc(trans, root, num_bytes +
5371 					     2 * 1024 * 1024, data,
5372 					     CHUNK_ALLOC_LIMITED);
5373 			allowed_chunk_alloc = 0;
5374 			done_chunk_alloc = 1;
5375 		} else if (!done_chunk_alloc &&
5376 			   space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
5377 			space_info->force_alloc = CHUNK_ALLOC_LIMITED;
5378 		}
5379 
5380 		if (loop < LOOP_NO_EMPTY_SIZE) {
5381 			loop++;
5382 			goto search;
5383 		}
5384 		ret = -ENOSPC;
5385 	} else if (!ins->objectid) {
5386 		ret = -ENOSPC;
5387 	}
5388 
5389 	/* we found what we needed */
5390 	if (ins->objectid) {
5391 		if (!(data & BTRFS_BLOCK_GROUP_DATA))
5392 			trans->block_group = block_group->key.objectid;
5393 
5394 		btrfs_put_block_group(block_group);
5395 		ret = 0;
5396 	}
5397 
5398 	return ret;
5399 }
5400 
dump_space_info(struct btrfs_space_info * info,u64 bytes,int dump_block_groups)5401 static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
5402 			    int dump_block_groups)
5403 {
5404 	struct btrfs_block_group_cache *cache;
5405 	int index = 0;
5406 
5407 	spin_lock(&info->lock);
5408 	printk(KERN_INFO "space_info has %llu free, is %sfull\n",
5409 	       (unsigned long long)(info->total_bytes - info->bytes_used -
5410 				    info->bytes_pinned - info->bytes_reserved -
5411 				    info->bytes_readonly),
5412 	       (info->full) ? "" : "not ");
5413 	printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, "
5414 	       "reserved=%llu, may_use=%llu, readonly=%llu\n",
5415 	       (unsigned long long)info->total_bytes,
5416 	       (unsigned long long)info->bytes_used,
5417 	       (unsigned long long)info->bytes_pinned,
5418 	       (unsigned long long)info->bytes_reserved,
5419 	       (unsigned long long)info->bytes_may_use,
5420 	       (unsigned long long)info->bytes_readonly);
5421 	spin_unlock(&info->lock);
5422 
5423 	if (!dump_block_groups)
5424 		return;
5425 
5426 	down_read(&info->groups_sem);
5427 again:
5428 	list_for_each_entry(cache, &info->block_groups[index], list) {
5429 		spin_lock(&cache->lock);
5430 		printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
5431 		       "%llu pinned %llu reserved\n",
5432 		       (unsigned long long)cache->key.objectid,
5433 		       (unsigned long long)cache->key.offset,
5434 		       (unsigned long long)btrfs_block_group_used(&cache->item),
5435 		       (unsigned long long)cache->pinned,
5436 		       (unsigned long long)cache->reserved);
5437 		btrfs_dump_free_space(cache, bytes);
5438 		spin_unlock(&cache->lock);
5439 	}
5440 	if (++index < BTRFS_NR_RAID_TYPES)
5441 		goto again;
5442 	up_read(&info->groups_sem);
5443 }
5444 
btrfs_reserve_extent(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 num_bytes,u64 min_alloc_size,u64 empty_size,u64 hint_byte,u64 search_end,struct btrfs_key * ins,u64 data)5445 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
5446 			 struct btrfs_root *root,
5447 			 u64 num_bytes, u64 min_alloc_size,
5448 			 u64 empty_size, u64 hint_byte,
5449 			 u64 search_end, struct btrfs_key *ins,
5450 			 u64 data)
5451 {
5452 	int ret;
5453 	u64 search_start = 0;
5454 
5455 	data = btrfs_get_alloc_profile(root, data);
5456 again:
5457 	/*
5458 	 * the only place that sets empty_size is btrfs_realloc_node, which
5459 	 * is not called recursively on allocations
5460 	 */
5461 	if (empty_size || root->ref_cows)
5462 		ret = do_chunk_alloc(trans, root->fs_info->extent_root,
5463 				     num_bytes + 2 * 1024 * 1024, data,
5464 				     CHUNK_ALLOC_NO_FORCE);
5465 
5466 	WARN_ON(num_bytes < root->sectorsize);
5467 	ret = find_free_extent(trans, root, num_bytes, empty_size,
5468 			       search_start, search_end, hint_byte,
5469 			       ins, data);
5470 
5471 	if (ret == -ENOSPC && num_bytes > min_alloc_size) {
5472 		num_bytes = num_bytes >> 1;
5473 		num_bytes = num_bytes & ~(root->sectorsize - 1);
5474 		num_bytes = max(num_bytes, min_alloc_size);
5475 		do_chunk_alloc(trans, root->fs_info->extent_root,
5476 			       num_bytes, data, CHUNK_ALLOC_FORCE);
5477 		goto again;
5478 	}
5479 	if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
5480 		struct btrfs_space_info *sinfo;
5481 
5482 		sinfo = __find_space_info(root->fs_info, data);
5483 		printk(KERN_ERR "btrfs allocation failed flags %llu, "
5484 		       "wanted %llu\n", (unsigned long long)data,
5485 		       (unsigned long long)num_bytes);
5486 		dump_space_info(sinfo, num_bytes, 1);
5487 	}
5488 
5489 	trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
5490 
5491 	return ret;
5492 }
5493 
btrfs_free_reserved_extent(struct btrfs_root * root,u64 start,u64 len)5494 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
5495 {
5496 	struct btrfs_block_group_cache *cache;
5497 	int ret = 0;
5498 
5499 	cache = btrfs_lookup_block_group(root->fs_info, start);
5500 	if (!cache) {
5501 		printk(KERN_ERR "Unable to find block group for %llu\n",
5502 		       (unsigned long long)start);
5503 		return -ENOSPC;
5504 	}
5505 
5506 	if (btrfs_test_opt(root, DISCARD))
5507 		ret = btrfs_discard_extent(root, start, len, NULL);
5508 
5509 	btrfs_add_free_space(cache, start, len);
5510 	btrfs_update_reserved_bytes(cache, len, 0, 1);
5511 	btrfs_put_block_group(cache);
5512 
5513 	trace_btrfs_reserved_extent_free(root, start, len);
5514 
5515 	return ret;
5516 }
5517 
alloc_reserved_file_extent(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 parent,u64 root_objectid,u64 flags,u64 owner,u64 offset,struct btrfs_key * ins,int ref_mod)5518 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
5519 				      struct btrfs_root *root,
5520 				      u64 parent, u64 root_objectid,
5521 				      u64 flags, u64 owner, u64 offset,
5522 				      struct btrfs_key *ins, int ref_mod)
5523 {
5524 	int ret;
5525 	struct btrfs_fs_info *fs_info = root->fs_info;
5526 	struct btrfs_extent_item *extent_item;
5527 	struct btrfs_extent_inline_ref *iref;
5528 	struct btrfs_path *path;
5529 	struct extent_buffer *leaf;
5530 	int type;
5531 	u32 size;
5532 
5533 	if (parent > 0)
5534 		type = BTRFS_SHARED_DATA_REF_KEY;
5535 	else
5536 		type = BTRFS_EXTENT_DATA_REF_KEY;
5537 
5538 	size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
5539 
5540 	path = btrfs_alloc_path();
5541 	if (!path)
5542 		return -ENOMEM;
5543 
5544 	path->leave_spinning = 1;
5545 	ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
5546 				      ins, size);
5547 	BUG_ON(ret);
5548 
5549 	leaf = path->nodes[0];
5550 	extent_item = btrfs_item_ptr(leaf, path->slots[0],
5551 				     struct btrfs_extent_item);
5552 	btrfs_set_extent_refs(leaf, extent_item, ref_mod);
5553 	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
5554 	btrfs_set_extent_flags(leaf, extent_item,
5555 			       flags | BTRFS_EXTENT_FLAG_DATA);
5556 
5557 	iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
5558 	btrfs_set_extent_inline_ref_type(leaf, iref, type);
5559 	if (parent > 0) {
5560 		struct btrfs_shared_data_ref *ref;
5561 		ref = (struct btrfs_shared_data_ref *)(iref + 1);
5562 		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
5563 		btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
5564 	} else {
5565 		struct btrfs_extent_data_ref *ref;
5566 		ref = (struct btrfs_extent_data_ref *)(&iref->offset);
5567 		btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
5568 		btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
5569 		btrfs_set_extent_data_ref_offset(leaf, ref, offset);
5570 		btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
5571 	}
5572 
5573 	btrfs_mark_buffer_dirty(path->nodes[0]);
5574 	btrfs_free_path(path);
5575 
5576 	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
5577 	if (ret) {
5578 		printk(KERN_ERR "btrfs update block group failed for %llu "
5579 		       "%llu\n", (unsigned long long)ins->objectid,
5580 		       (unsigned long long)ins->offset);
5581 		BUG();
5582 	}
5583 	return ret;
5584 }
5585 
alloc_reserved_tree_block(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 parent,u64 root_objectid,u64 flags,struct btrfs_disk_key * key,int level,struct btrfs_key * ins)5586 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
5587 				     struct btrfs_root *root,
5588 				     u64 parent, u64 root_objectid,
5589 				     u64 flags, struct btrfs_disk_key *key,
5590 				     int level, struct btrfs_key *ins)
5591 {
5592 	int ret;
5593 	struct btrfs_fs_info *fs_info = root->fs_info;
5594 	struct btrfs_extent_item *extent_item;
5595 	struct btrfs_tree_block_info *block_info;
5596 	struct btrfs_extent_inline_ref *iref;
5597 	struct btrfs_path *path;
5598 	struct extent_buffer *leaf;
5599 	u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
5600 
5601 	path = btrfs_alloc_path();
5602 	BUG_ON(!path);
5603 
5604 	path->leave_spinning = 1;
5605 	ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
5606 				      ins, size);
5607 	BUG_ON(ret);
5608 
5609 	leaf = path->nodes[0];
5610 	extent_item = btrfs_item_ptr(leaf, path->slots[0],
5611 				     struct btrfs_extent_item);
5612 	btrfs_set_extent_refs(leaf, extent_item, 1);
5613 	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
5614 	btrfs_set_extent_flags(leaf, extent_item,
5615 			       flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
5616 	block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
5617 
5618 	btrfs_set_tree_block_key(leaf, block_info, key);
5619 	btrfs_set_tree_block_level(leaf, block_info, level);
5620 
5621 	iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
5622 	if (parent > 0) {
5623 		BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
5624 		btrfs_set_extent_inline_ref_type(leaf, iref,
5625 						 BTRFS_SHARED_BLOCK_REF_KEY);
5626 		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
5627 	} else {
5628 		btrfs_set_extent_inline_ref_type(leaf, iref,
5629 						 BTRFS_TREE_BLOCK_REF_KEY);
5630 		btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
5631 	}
5632 
5633 	btrfs_mark_buffer_dirty(leaf);
5634 	btrfs_free_path(path);
5635 
5636 	ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
5637 	if (ret) {
5638 		printk(KERN_ERR "btrfs update block group failed for %llu "
5639 		       "%llu\n", (unsigned long long)ins->objectid,
5640 		       (unsigned long long)ins->offset);
5641 		BUG();
5642 	}
5643 	return ret;
5644 }
5645 
btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 root_objectid,u64 owner,u64 offset,struct btrfs_key * ins)5646 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
5647 				     struct btrfs_root *root,
5648 				     u64 root_objectid, u64 owner,
5649 				     u64 offset, struct btrfs_key *ins)
5650 {
5651 	int ret;
5652 
5653 	BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID);
5654 
5655 	ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset,
5656 					 0, root_objectid, owner, offset,
5657 					 BTRFS_ADD_DELAYED_EXTENT, NULL);
5658 	return ret;
5659 }
5660 
5661 /*
5662  * this is used by the tree logging recovery code.  It records that
5663  * an extent has been allocated and makes sure to clear the free
5664  * space cache bits as well
5665  */
btrfs_alloc_logged_file_extent(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 root_objectid,u64 owner,u64 offset,struct btrfs_key * ins)5666 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5667 				   struct btrfs_root *root,
5668 				   u64 root_objectid, u64 owner, u64 offset,
5669 				   struct btrfs_key *ins)
5670 {
5671 	int ret;
5672 	struct btrfs_block_group_cache *block_group;
5673 	struct btrfs_caching_control *caching_ctl;
5674 	u64 start = ins->objectid;
5675 	u64 num_bytes = ins->offset;
5676 
5677 	block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
5678 	cache_block_group(block_group, trans, NULL, 0);
5679 	caching_ctl = get_caching_control(block_group);
5680 
5681 	if (!caching_ctl) {
5682 		BUG_ON(!block_group_cache_done(block_group));
5683 		ret = btrfs_remove_free_space(block_group, start, num_bytes);
5684 		BUG_ON(ret);
5685 	} else {
5686 		mutex_lock(&caching_ctl->mutex);
5687 
5688 		if (start >= caching_ctl->progress) {
5689 			ret = add_excluded_extent(root, start, num_bytes);
5690 			BUG_ON(ret);
5691 		} else if (start + num_bytes <= caching_ctl->progress) {
5692 			ret = btrfs_remove_free_space(block_group,
5693 						      start, num_bytes);
5694 			BUG_ON(ret);
5695 		} else {
5696 			num_bytes = caching_ctl->progress - start;
5697 			ret = btrfs_remove_free_space(block_group,
5698 						      start, num_bytes);
5699 			BUG_ON(ret);
5700 
5701 			start = caching_ctl->progress;
5702 			num_bytes = ins->objectid + ins->offset -
5703 				    caching_ctl->progress;
5704 			ret = add_excluded_extent(root, start, num_bytes);
5705 			BUG_ON(ret);
5706 		}
5707 
5708 		mutex_unlock(&caching_ctl->mutex);
5709 		put_caching_control(caching_ctl);
5710 	}
5711 
5712 	ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1);
5713 	BUG_ON(ret);
5714 	btrfs_put_block_group(block_group);
5715 	ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
5716 					 0, owner, offset, ins, 1);
5717 	return ret;
5718 }
5719 
btrfs_init_new_buffer(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytenr,u32 blocksize,int level)5720 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
5721 					    struct btrfs_root *root,
5722 					    u64 bytenr, u32 blocksize,
5723 					    int level)
5724 {
5725 	struct extent_buffer *buf;
5726 
5727 	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
5728 	if (!buf)
5729 		return ERR_PTR(-ENOMEM);
5730 	btrfs_set_header_generation(buf, trans->transid);
5731 	btrfs_set_buffer_lockdep_class(buf, level);
5732 	btrfs_tree_lock(buf);
5733 	clean_tree_block(trans, root, buf);
5734 
5735 	btrfs_set_lock_blocking(buf);
5736 	btrfs_set_buffer_uptodate(buf);
5737 
5738 	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
5739 		/*
5740 		 * we allow two log transactions at a time, use different
5741 		 * EXENT bit to differentiate dirty pages.
5742 		 */
5743 		if (root->log_transid % 2 == 0)
5744 			set_extent_dirty(&root->dirty_log_pages, buf->start,
5745 					buf->start + buf->len - 1, GFP_NOFS);
5746 		else
5747 			set_extent_new(&root->dirty_log_pages, buf->start,
5748 					buf->start + buf->len - 1, GFP_NOFS);
5749 	} else {
5750 		set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
5751 			 buf->start + buf->len - 1, GFP_NOFS);
5752 	}
5753 	trans->blocks_used++;
5754 	/* this returns a buffer locked for blocking */
5755 	return buf;
5756 }
5757 
5758 static struct btrfs_block_rsv *
use_block_rsv(struct btrfs_trans_handle * trans,struct btrfs_root * root,u32 blocksize)5759 use_block_rsv(struct btrfs_trans_handle *trans,
5760 	      struct btrfs_root *root, u32 blocksize)
5761 {
5762 	struct btrfs_block_rsv *block_rsv;
5763 	struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
5764 	int ret;
5765 
5766 	block_rsv = get_block_rsv(trans, root);
5767 
5768 	if (block_rsv->size == 0) {
5769 		ret = reserve_metadata_bytes(trans, root, block_rsv,
5770 					     blocksize, 0);
5771 		/*
5772 		 * If we couldn't reserve metadata bytes try and use some from
5773 		 * the global reserve.
5774 		 */
5775 		if (ret && block_rsv != global_rsv) {
5776 			ret = block_rsv_use_bytes(global_rsv, blocksize);
5777 			if (!ret)
5778 				return global_rsv;
5779 			return ERR_PTR(ret);
5780 		} else if (ret) {
5781 			return ERR_PTR(ret);
5782 		}
5783 		return block_rsv;
5784 	}
5785 
5786 	ret = block_rsv_use_bytes(block_rsv, blocksize);
5787 	if (!ret)
5788 		return block_rsv;
5789 	if (ret) {
5790 		WARN_ON(1);
5791 		ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize,
5792 					     0);
5793 		if (!ret) {
5794 			spin_lock(&block_rsv->lock);
5795 			block_rsv->size += blocksize;
5796 			spin_unlock(&block_rsv->lock);
5797 			return block_rsv;
5798 		} else if (ret && block_rsv != global_rsv) {
5799 			ret = block_rsv_use_bytes(global_rsv, blocksize);
5800 			if (!ret)
5801 				return global_rsv;
5802 		}
5803 	}
5804 
5805 	return ERR_PTR(-ENOSPC);
5806 }
5807 
unuse_block_rsv(struct btrfs_block_rsv * block_rsv,u32 blocksize)5808 static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize)
5809 {
5810 	block_rsv_add_bytes(block_rsv, blocksize, 0);
5811 	block_rsv_release_bytes(block_rsv, NULL, 0);
5812 }
5813 
5814 /*
5815  * finds a free extent and does all the dirty work required for allocation
5816  * returns the key for the extent through ins, and a tree buffer for
5817  * the first block of the extent through buf.
5818  *
5819  * returns the tree buffer or NULL.
5820  */
btrfs_alloc_free_block(struct btrfs_trans_handle * trans,struct btrfs_root * root,u32 blocksize,u64 parent,u64 root_objectid,struct btrfs_disk_key * key,int level,u64 hint,u64 empty_size)5821 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
5822 					struct btrfs_root *root, u32 blocksize,
5823 					u64 parent, u64 root_objectid,
5824 					struct btrfs_disk_key *key, int level,
5825 					u64 hint, u64 empty_size)
5826 {
5827 	struct btrfs_key ins;
5828 	struct btrfs_block_rsv *block_rsv;
5829 	struct extent_buffer *buf;
5830 	u64 flags = 0;
5831 	int ret;
5832 
5833 
5834 	block_rsv = use_block_rsv(trans, root, blocksize);
5835 	if (IS_ERR(block_rsv))
5836 		return ERR_CAST(block_rsv);
5837 
5838 	ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
5839 				   empty_size, hint, (u64)-1, &ins, 0);
5840 	if (ret) {
5841 		unuse_block_rsv(block_rsv, blocksize);
5842 		return ERR_PTR(ret);
5843 	}
5844 
5845 	buf = btrfs_init_new_buffer(trans, root, ins.objectid,
5846 				    blocksize, level);
5847 	BUG_ON(IS_ERR(buf));
5848 
5849 	if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
5850 		if (parent == 0)
5851 			parent = ins.objectid;
5852 		flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
5853 	} else
5854 		BUG_ON(parent > 0);
5855 
5856 	if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
5857 		struct btrfs_delayed_extent_op *extent_op;
5858 		extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
5859 		BUG_ON(!extent_op);
5860 		if (key)
5861 			memcpy(&extent_op->key, key, sizeof(extent_op->key));
5862 		else
5863 			memset(&extent_op->key, 0, sizeof(extent_op->key));
5864 		extent_op->flags_to_set = flags;
5865 		extent_op->update_key = 1;
5866 		extent_op->update_flags = 1;
5867 		extent_op->is_data = 0;
5868 
5869 		ret = btrfs_add_delayed_tree_ref(trans, ins.objectid,
5870 					ins.offset, parent, root_objectid,
5871 					level, BTRFS_ADD_DELAYED_EXTENT,
5872 					extent_op);
5873 		BUG_ON(ret);
5874 	}
5875 	return buf;
5876 }
5877 
5878 struct walk_control {
5879 	u64 refs[BTRFS_MAX_LEVEL];
5880 	u64 flags[BTRFS_MAX_LEVEL];
5881 	struct btrfs_key update_progress;
5882 	int stage;
5883 	int level;
5884 	int shared_level;
5885 	int update_ref;
5886 	int keep_locks;
5887 	int reada_slot;
5888 	int reada_count;
5889 };
5890 
5891 #define DROP_REFERENCE	1
5892 #define UPDATE_BACKREF	2
5893 
reada_walk_down(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct walk_control * wc,struct btrfs_path * path)5894 static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
5895 				     struct btrfs_root *root,
5896 				     struct walk_control *wc,
5897 				     struct btrfs_path *path)
5898 {
5899 	u64 bytenr;
5900 	u64 generation;
5901 	u64 refs;
5902 	u64 flags;
5903 	u32 nritems;
5904 	u32 blocksize;
5905 	struct btrfs_key key;
5906 	struct extent_buffer *eb;
5907 	int ret;
5908 	int slot;
5909 	int nread = 0;
5910 
5911 	if (path->slots[wc->level] < wc->reada_slot) {
5912 		wc->reada_count = wc->reada_count * 2 / 3;
5913 		wc->reada_count = max(wc->reada_count, 2);
5914 	} else {
5915 		wc->reada_count = wc->reada_count * 3 / 2;
5916 		wc->reada_count = min_t(int, wc->reada_count,
5917 					BTRFS_NODEPTRS_PER_BLOCK(root));
5918 	}
5919 
5920 	eb = path->nodes[wc->level];
5921 	nritems = btrfs_header_nritems(eb);
5922 	blocksize = btrfs_level_size(root, wc->level - 1);
5923 
5924 	for (slot = path->slots[wc->level]; slot < nritems; slot++) {
5925 		if (nread >= wc->reada_count)
5926 			break;
5927 
5928 		cond_resched();
5929 		bytenr = btrfs_node_blockptr(eb, slot);
5930 		generation = btrfs_node_ptr_generation(eb, slot);
5931 
5932 		if (slot == path->slots[wc->level])
5933 			goto reada;
5934 
5935 		if (wc->stage == UPDATE_BACKREF &&
5936 		    generation <= root->root_key.offset)
5937 			continue;
5938 
5939 		/* We don't lock the tree block, it's OK to be racy here */
5940 		ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
5941 					       &refs, &flags);
5942 		BUG_ON(ret);
5943 		BUG_ON(refs == 0);
5944 
5945 		if (wc->stage == DROP_REFERENCE) {
5946 			if (refs == 1)
5947 				goto reada;
5948 
5949 			if (wc->level == 1 &&
5950 			    (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5951 				continue;
5952 			if (!wc->update_ref ||
5953 			    generation <= root->root_key.offset)
5954 				continue;
5955 			btrfs_node_key_to_cpu(eb, &key, slot);
5956 			ret = btrfs_comp_cpu_keys(&key,
5957 						  &wc->update_progress);
5958 			if (ret < 0)
5959 				continue;
5960 		} else {
5961 			if (wc->level == 1 &&
5962 			    (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
5963 				continue;
5964 		}
5965 reada:
5966 		ret = readahead_tree_block(root, bytenr, blocksize,
5967 					   generation);
5968 		if (ret)
5969 			break;
5970 		nread++;
5971 	}
5972 	wc->reada_slot = slot;
5973 }
5974 
5975 /*
5976  * hepler to process tree block while walking down the tree.
5977  *
5978  * when wc->stage == UPDATE_BACKREF, this function updates
5979  * back refs for pointers in the block.
5980  *
5981  * NOTE: return value 1 means we should stop walking down.
5982  */
walk_down_proc(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc,int lookup_info)5983 static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
5984 				   struct btrfs_root *root,
5985 				   struct btrfs_path *path,
5986 				   struct walk_control *wc, int lookup_info)
5987 {
5988 	int level = wc->level;
5989 	struct extent_buffer *eb = path->nodes[level];
5990 	u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
5991 	int ret;
5992 
5993 	if (wc->stage == UPDATE_BACKREF &&
5994 	    btrfs_header_owner(eb) != root->root_key.objectid)
5995 		return 1;
5996 
5997 	/*
5998 	 * when reference count of tree block is 1, it won't increase
5999 	 * again. once full backref flag is set, we never clear it.
6000 	 */
6001 	if (lookup_info &&
6002 	    ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
6003 	     (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
6004 		BUG_ON(!path->locks[level]);
6005 		ret = btrfs_lookup_extent_info(trans, root,
6006 					       eb->start, eb->len,
6007 					       &wc->refs[level],
6008 					       &wc->flags[level]);
6009 		BUG_ON(ret);
6010 		BUG_ON(wc->refs[level] == 0);
6011 	}
6012 
6013 	if (wc->stage == DROP_REFERENCE) {
6014 		if (wc->refs[level] > 1)
6015 			return 1;
6016 
6017 		if (path->locks[level] && !wc->keep_locks) {
6018 			btrfs_tree_unlock(eb);
6019 			path->locks[level] = 0;
6020 		}
6021 		return 0;
6022 	}
6023 
6024 	/* wc->stage == UPDATE_BACKREF */
6025 	if (!(wc->flags[level] & flag)) {
6026 		BUG_ON(!path->locks[level]);
6027 		ret = btrfs_inc_ref(trans, root, eb, 1);
6028 		BUG_ON(ret);
6029 		ret = btrfs_dec_ref(trans, root, eb, 0);
6030 		BUG_ON(ret);
6031 		ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
6032 						  eb->len, flag, 0);
6033 		BUG_ON(ret);
6034 		wc->flags[level] |= flag;
6035 	}
6036 
6037 	/*
6038 	 * the block is shared by multiple trees, so it's not good to
6039 	 * keep the tree lock
6040 	 */
6041 	if (path->locks[level] && level > 0) {
6042 		btrfs_tree_unlock(eb);
6043 		path->locks[level] = 0;
6044 	}
6045 	return 0;
6046 }
6047 
6048 /*
6049  * hepler to process tree block pointer.
6050  *
6051  * when wc->stage == DROP_REFERENCE, this function checks
6052  * reference count of the block pointed to. if the block
6053  * is shared and we need update back refs for the subtree
6054  * rooted at the block, this function changes wc->stage to
6055  * UPDATE_BACKREF. if the block is shared and there is no
6056  * need to update back, this function drops the reference
6057  * to the block.
6058  *
6059  * NOTE: return value 1 means we should stop walking down.
6060  */
do_walk_down(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc,int * lookup_info)6061 static noinline int do_walk_down(struct btrfs_trans_handle *trans,
6062 				 struct btrfs_root *root,
6063 				 struct btrfs_path *path,
6064 				 struct walk_control *wc, int *lookup_info)
6065 {
6066 	u64 bytenr;
6067 	u64 generation;
6068 	u64 parent;
6069 	u32 blocksize;
6070 	struct btrfs_key key;
6071 	struct extent_buffer *next;
6072 	int level = wc->level;
6073 	int reada = 0;
6074 	int ret = 0;
6075 
6076 	generation = btrfs_node_ptr_generation(path->nodes[level],
6077 					       path->slots[level]);
6078 	/*
6079 	 * if the lower level block was created before the snapshot
6080 	 * was created, we know there is no need to update back refs
6081 	 * for the subtree
6082 	 */
6083 	if (wc->stage == UPDATE_BACKREF &&
6084 	    generation <= root->root_key.offset) {
6085 		*lookup_info = 1;
6086 		return 1;
6087 	}
6088 
6089 	bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
6090 	blocksize = btrfs_level_size(root, level - 1);
6091 
6092 	next = btrfs_find_tree_block(root, bytenr, blocksize);
6093 	if (!next) {
6094 		next = btrfs_find_create_tree_block(root, bytenr, blocksize);
6095 		if (!next)
6096 			return -ENOMEM;
6097 		reada = 1;
6098 	}
6099 	btrfs_tree_lock(next);
6100 	btrfs_set_lock_blocking(next);
6101 
6102 	ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
6103 				       &wc->refs[level - 1],
6104 				       &wc->flags[level - 1]);
6105 	BUG_ON(ret);
6106 	BUG_ON(wc->refs[level - 1] == 0);
6107 	*lookup_info = 0;
6108 
6109 	if (wc->stage == DROP_REFERENCE) {
6110 		if (wc->refs[level - 1] > 1) {
6111 			if (level == 1 &&
6112 			    (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
6113 				goto skip;
6114 
6115 			if (!wc->update_ref ||
6116 			    generation <= root->root_key.offset)
6117 				goto skip;
6118 
6119 			btrfs_node_key_to_cpu(path->nodes[level], &key,
6120 					      path->slots[level]);
6121 			ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
6122 			if (ret < 0)
6123 				goto skip;
6124 
6125 			wc->stage = UPDATE_BACKREF;
6126 			wc->shared_level = level - 1;
6127 		}
6128 	} else {
6129 		if (level == 1 &&
6130 		    (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
6131 			goto skip;
6132 	}
6133 
6134 	if (!btrfs_buffer_uptodate(next, generation)) {
6135 		btrfs_tree_unlock(next);
6136 		free_extent_buffer(next);
6137 		next = NULL;
6138 		*lookup_info = 1;
6139 	}
6140 
6141 	if (!next) {
6142 		if (reada && level == 1)
6143 			reada_walk_down(trans, root, wc, path);
6144 		next = read_tree_block(root, bytenr, blocksize, generation);
6145 		if (!next)
6146 			return -EIO;
6147 		btrfs_tree_lock(next);
6148 		btrfs_set_lock_blocking(next);
6149 	}
6150 
6151 	level--;
6152 	BUG_ON(level != btrfs_header_level(next));
6153 	path->nodes[level] = next;
6154 	path->slots[level] = 0;
6155 	path->locks[level] = 1;
6156 	wc->level = level;
6157 	if (wc->level == 1)
6158 		wc->reada_slot = 0;
6159 	return 0;
6160 skip:
6161 	wc->refs[level - 1] = 0;
6162 	wc->flags[level - 1] = 0;
6163 	if (wc->stage == DROP_REFERENCE) {
6164 		if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
6165 			parent = path->nodes[level]->start;
6166 		} else {
6167 			BUG_ON(root->root_key.objectid !=
6168 			       btrfs_header_owner(path->nodes[level]));
6169 			parent = 0;
6170 		}
6171 
6172 		ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
6173 					root->root_key.objectid, level - 1, 0);
6174 		BUG_ON(ret);
6175 	}
6176 	btrfs_tree_unlock(next);
6177 	free_extent_buffer(next);
6178 	*lookup_info = 1;
6179 	return 1;
6180 }
6181 
6182 /*
6183  * hepler to process tree block while walking up the tree.
6184  *
6185  * when wc->stage == DROP_REFERENCE, this function drops
6186  * reference count on the block.
6187  *
6188  * when wc->stage == UPDATE_BACKREF, this function changes
6189  * wc->stage back to DROP_REFERENCE if we changed wc->stage
6190  * to UPDATE_BACKREF previously while processing the block.
6191  *
6192  * NOTE: return value 1 means we should stop walking up.
6193  */
walk_up_proc(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc)6194 static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6195 				 struct btrfs_root *root,
6196 				 struct btrfs_path *path,
6197 				 struct walk_control *wc)
6198 {
6199 	int ret;
6200 	int level = wc->level;
6201 	struct extent_buffer *eb = path->nodes[level];
6202 	u64 parent = 0;
6203 
6204 	if (wc->stage == UPDATE_BACKREF) {
6205 		BUG_ON(wc->shared_level < level);
6206 		if (level < wc->shared_level)
6207 			goto out;
6208 
6209 		ret = find_next_key(path, level + 1, &wc->update_progress);
6210 		if (ret > 0)
6211 			wc->update_ref = 0;
6212 
6213 		wc->stage = DROP_REFERENCE;
6214 		wc->shared_level = -1;
6215 		path->slots[level] = 0;
6216 
6217 		/*
6218 		 * check reference count again if the block isn't locked.
6219 		 * we should start walking down the tree again if reference
6220 		 * count is one.
6221 		 */
6222 		if (!path->locks[level]) {
6223 			BUG_ON(level == 0);
6224 			btrfs_tree_lock(eb);
6225 			btrfs_set_lock_blocking(eb);
6226 			path->locks[level] = 1;
6227 
6228 			ret = btrfs_lookup_extent_info(trans, root,
6229 						       eb->start, eb->len,
6230 						       &wc->refs[level],
6231 						       &wc->flags[level]);
6232 			BUG_ON(ret);
6233 			BUG_ON(wc->refs[level] == 0);
6234 			if (wc->refs[level] == 1) {
6235 				btrfs_tree_unlock(eb);
6236 				path->locks[level] = 0;
6237 				return 1;
6238 			}
6239 		}
6240 	}
6241 
6242 	/* wc->stage == DROP_REFERENCE */
6243 	BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
6244 
6245 	if (wc->refs[level] == 1) {
6246 		if (level == 0) {
6247 			if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6248 				ret = btrfs_dec_ref(trans, root, eb, 1);
6249 			else
6250 				ret = btrfs_dec_ref(trans, root, eb, 0);
6251 			BUG_ON(ret);
6252 		}
6253 		/* make block locked assertion in clean_tree_block happy */
6254 		if (!path->locks[level] &&
6255 		    btrfs_header_generation(eb) == trans->transid) {
6256 			btrfs_tree_lock(eb);
6257 			btrfs_set_lock_blocking(eb);
6258 			path->locks[level] = 1;
6259 		}
6260 		clean_tree_block(trans, root, eb);
6261 	}
6262 
6263 	if (eb == root->node) {
6264 		if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6265 			parent = eb->start;
6266 		else
6267 			BUG_ON(root->root_key.objectid !=
6268 			       btrfs_header_owner(eb));
6269 	} else {
6270 		if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
6271 			parent = path->nodes[level + 1]->start;
6272 		else
6273 			BUG_ON(root->root_key.objectid !=
6274 			       btrfs_header_owner(path->nodes[level + 1]));
6275 	}
6276 
6277 	btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
6278 out:
6279 	wc->refs[level] = 0;
6280 	wc->flags[level] = 0;
6281 	return 0;
6282 }
6283 
walk_down_tree(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc)6284 static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
6285 				   struct btrfs_root *root,
6286 				   struct btrfs_path *path,
6287 				   struct walk_control *wc)
6288 {
6289 	int level = wc->level;
6290 	int lookup_info = 1;
6291 	int ret;
6292 
6293 	while (level >= 0) {
6294 		ret = walk_down_proc(trans, root, path, wc, lookup_info);
6295 		if (ret > 0)
6296 			break;
6297 
6298 		if (level == 0)
6299 			break;
6300 
6301 		if (path->slots[level] >=
6302 		    btrfs_header_nritems(path->nodes[level]))
6303 			break;
6304 
6305 		ret = do_walk_down(trans, root, path, wc, &lookup_info);
6306 		if (ret > 0) {
6307 			path->slots[level]++;
6308 			continue;
6309 		} else if (ret < 0)
6310 			return ret;
6311 		level = wc->level;
6312 	}
6313 	return 0;
6314 }
6315 
walk_up_tree(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct btrfs_path * path,struct walk_control * wc,int max_level)6316 static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
6317 				 struct btrfs_root *root,
6318 				 struct btrfs_path *path,
6319 				 struct walk_control *wc, int max_level)
6320 {
6321 	int level = wc->level;
6322 	int ret;
6323 
6324 	path->slots[level] = btrfs_header_nritems(path->nodes[level]);
6325 	while (level < max_level && path->nodes[level]) {
6326 		wc->level = level;
6327 		if (path->slots[level] + 1 <
6328 		    btrfs_header_nritems(path->nodes[level])) {
6329 			path->slots[level]++;
6330 			return 0;
6331 		} else {
6332 			ret = walk_up_proc(trans, root, path, wc);
6333 			if (ret > 0)
6334 				return 0;
6335 
6336 			if (path->locks[level]) {
6337 				btrfs_tree_unlock(path->nodes[level]);
6338 				path->locks[level] = 0;
6339 			}
6340 			free_extent_buffer(path->nodes[level]);
6341 			path->nodes[level] = NULL;
6342 			level++;
6343 		}
6344 	}
6345 	return 1;
6346 }
6347 
6348 /*
6349  * drop a subvolume tree.
6350  *
6351  * this function traverses the tree freeing any blocks that only
6352  * referenced by the tree.
6353  *
6354  * when a shared tree block is found. this function decreases its
6355  * reference count by one. if update_ref is true, this function
6356  * also make sure backrefs for the shared block and all lower level
6357  * blocks are properly updated.
6358  */
btrfs_drop_snapshot(struct btrfs_root * root,struct btrfs_block_rsv * block_rsv,int update_ref)6359 int btrfs_drop_snapshot(struct btrfs_root *root,
6360 			struct btrfs_block_rsv *block_rsv, int update_ref)
6361 {
6362 	struct btrfs_path *path;
6363 	struct btrfs_trans_handle *trans;
6364 	struct btrfs_root *tree_root = root->fs_info->tree_root;
6365 	struct btrfs_root_item *root_item = &root->root_item;
6366 	struct walk_control *wc;
6367 	struct btrfs_key key;
6368 	int err = 0;
6369 	int ret;
6370 	int level;
6371 
6372 	path = btrfs_alloc_path();
6373 	BUG_ON(!path);
6374 
6375 	wc = kzalloc(sizeof(*wc), GFP_NOFS);
6376 	BUG_ON(!wc);
6377 
6378 	trans = btrfs_start_transaction(tree_root, 0);
6379 	BUG_ON(IS_ERR(trans));
6380 
6381 	if (block_rsv)
6382 		trans->block_rsv = block_rsv;
6383 
6384 	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
6385 		level = btrfs_header_level(root->node);
6386 		path->nodes[level] = btrfs_lock_root_node(root);
6387 		btrfs_set_lock_blocking(path->nodes[level]);
6388 		path->slots[level] = 0;
6389 		path->locks[level] = 1;
6390 		memset(&wc->update_progress, 0,
6391 		       sizeof(wc->update_progress));
6392 	} else {
6393 		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
6394 		memcpy(&wc->update_progress, &key,
6395 		       sizeof(wc->update_progress));
6396 
6397 		level = root_item->drop_level;
6398 		BUG_ON(level == 0);
6399 		path->lowest_level = level;
6400 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6401 		path->lowest_level = 0;
6402 		if (ret < 0) {
6403 			err = ret;
6404 			goto out;
6405 		}
6406 		WARN_ON(ret > 0);
6407 
6408 		/*
6409 		 * unlock our path, this is safe because only this
6410 		 * function is allowed to delete this snapshot
6411 		 */
6412 		btrfs_unlock_up_safe(path, 0);
6413 
6414 		level = btrfs_header_level(root->node);
6415 		while (1) {
6416 			btrfs_tree_lock(path->nodes[level]);
6417 			btrfs_set_lock_blocking(path->nodes[level]);
6418 
6419 			ret = btrfs_lookup_extent_info(trans, root,
6420 						path->nodes[level]->start,
6421 						path->nodes[level]->len,
6422 						&wc->refs[level],
6423 						&wc->flags[level]);
6424 			BUG_ON(ret);
6425 			BUG_ON(wc->refs[level] == 0);
6426 
6427 			if (level == root_item->drop_level)
6428 				break;
6429 
6430 			btrfs_tree_unlock(path->nodes[level]);
6431 			WARN_ON(wc->refs[level] != 1);
6432 			level--;
6433 		}
6434 	}
6435 
6436 	wc->level = level;
6437 	wc->shared_level = -1;
6438 	wc->stage = DROP_REFERENCE;
6439 	wc->update_ref = update_ref;
6440 	wc->keep_locks = 0;
6441 	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
6442 
6443 	while (1) {
6444 		ret = walk_down_tree(trans, root, path, wc);
6445 		if (ret < 0) {
6446 			err = ret;
6447 			break;
6448 		}
6449 
6450 		ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
6451 		if (ret < 0) {
6452 			err = ret;
6453 			break;
6454 		}
6455 
6456 		if (ret > 0) {
6457 			BUG_ON(wc->stage != DROP_REFERENCE);
6458 			break;
6459 		}
6460 
6461 		if (wc->stage == DROP_REFERENCE) {
6462 			level = wc->level;
6463 			btrfs_node_key(path->nodes[level],
6464 				       &root_item->drop_progress,
6465 				       path->slots[level]);
6466 			root_item->drop_level = level;
6467 		}
6468 
6469 		BUG_ON(wc->level == 0);
6470 		if (btrfs_should_end_transaction(trans, tree_root)) {
6471 			ret = btrfs_update_root(trans, tree_root,
6472 						&root->root_key,
6473 						root_item);
6474 			BUG_ON(ret);
6475 
6476 			btrfs_end_transaction_throttle(trans, tree_root);
6477 			trans = btrfs_start_transaction(tree_root, 0);
6478 			BUG_ON(IS_ERR(trans));
6479 			if (block_rsv)
6480 				trans->block_rsv = block_rsv;
6481 		}
6482 	}
6483 	btrfs_release_path(root, path);
6484 	BUG_ON(err);
6485 
6486 	ret = btrfs_del_root(trans, tree_root, &root->root_key);
6487 	BUG_ON(ret);
6488 
6489 	if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
6490 		ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
6491 					   NULL, NULL);
6492 		BUG_ON(ret < 0);
6493 		if (ret > 0) {
6494 			/* if we fail to delete the orphan item this time
6495 			 * around, it'll get picked up the next time.
6496 			 *
6497 			 * The most common failure here is just -ENOENT.
6498 			 */
6499 			btrfs_del_orphan_item(trans, tree_root,
6500 					      root->root_key.objectid);
6501 		}
6502 	}
6503 
6504 	if (root->in_radix) {
6505 		btrfs_free_fs_root(tree_root->fs_info, root);
6506 	} else {
6507 		free_extent_buffer(root->node);
6508 		free_extent_buffer(root->commit_root);
6509 		kfree(root);
6510 	}
6511 out:
6512 	btrfs_end_transaction_throttle(trans, tree_root);
6513 	kfree(wc);
6514 	btrfs_free_path(path);
6515 	return err;
6516 }
6517 
6518 /*
6519  * drop subtree rooted at tree block 'node'.
6520  *
6521  * NOTE: this function will unlock and release tree block 'node'
6522  */
btrfs_drop_subtree(struct btrfs_trans_handle * trans,struct btrfs_root * root,struct extent_buffer * node,struct extent_buffer * parent)6523 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
6524 			struct btrfs_root *root,
6525 			struct extent_buffer *node,
6526 			struct extent_buffer *parent)
6527 {
6528 	struct btrfs_path *path;
6529 	struct walk_control *wc;
6530 	int level;
6531 	int parent_level;
6532 	int ret = 0;
6533 	int wret;
6534 
6535 	BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
6536 
6537 	path = btrfs_alloc_path();
6538 	if (!path)
6539 		return -ENOMEM;
6540 
6541 	wc = kzalloc(sizeof(*wc), GFP_NOFS);
6542 	if (!wc) {
6543 		btrfs_free_path(path);
6544 		return -ENOMEM;
6545 	}
6546 
6547 	btrfs_assert_tree_locked(parent);
6548 	parent_level = btrfs_header_level(parent);
6549 	extent_buffer_get(parent);
6550 	path->nodes[parent_level] = parent;
6551 	path->slots[parent_level] = btrfs_header_nritems(parent);
6552 
6553 	btrfs_assert_tree_locked(node);
6554 	level = btrfs_header_level(node);
6555 	path->nodes[level] = node;
6556 	path->slots[level] = 0;
6557 	path->locks[level] = 1;
6558 
6559 	wc->refs[parent_level] = 1;
6560 	wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
6561 	wc->level = level;
6562 	wc->shared_level = -1;
6563 	wc->stage = DROP_REFERENCE;
6564 	wc->update_ref = 0;
6565 	wc->keep_locks = 1;
6566 	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
6567 
6568 	while (1) {
6569 		wret = walk_down_tree(trans, root, path, wc);
6570 		if (wret < 0) {
6571 			ret = wret;
6572 			break;
6573 		}
6574 
6575 		wret = walk_up_tree(trans, root, path, wc, parent_level);
6576 		if (wret < 0)
6577 			ret = wret;
6578 		if (wret != 0)
6579 			break;
6580 	}
6581 
6582 	kfree(wc);
6583 	btrfs_free_path(path);
6584 	return ret;
6585 }
6586 
6587 #if 0
6588 static unsigned long calc_ra(unsigned long start, unsigned long last,
6589 			     unsigned long nr)
6590 {
6591 	return min(last, start + nr - 1);
6592 }
6593 
6594 static noinline int relocate_inode_pages(struct inode *inode, u64 start,
6595 					 u64 len)
6596 {
6597 	u64 page_start;
6598 	u64 page_end;
6599 	unsigned long first_index;
6600 	unsigned long last_index;
6601 	unsigned long i;
6602 	struct page *page;
6603 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
6604 	struct file_ra_state *ra;
6605 	struct btrfs_ordered_extent *ordered;
6606 	unsigned int total_read = 0;
6607 	unsigned int total_dirty = 0;
6608 	int ret = 0;
6609 
6610 	ra = kzalloc(sizeof(*ra), GFP_NOFS);
6611 	if (!ra)
6612 		return -ENOMEM;
6613 
6614 	mutex_lock(&inode->i_mutex);
6615 	first_index = start >> PAGE_CACHE_SHIFT;
6616 	last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
6617 
6618 	/* make sure the dirty trick played by the caller work */
6619 	ret = invalidate_inode_pages2_range(inode->i_mapping,
6620 					    first_index, last_index);
6621 	if (ret)
6622 		goto out_unlock;
6623 
6624 	file_ra_state_init(ra, inode->i_mapping);
6625 
6626 	for (i = first_index ; i <= last_index; i++) {
6627 		if (total_read % ra->ra_pages == 0) {
6628 			btrfs_force_ra(inode->i_mapping, ra, NULL, i,
6629 				       calc_ra(i, last_index, ra->ra_pages));
6630 		}
6631 		total_read++;
6632 again:
6633 		if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
6634 			BUG_ON(1);
6635 		page = grab_cache_page(inode->i_mapping, i);
6636 		if (!page) {
6637 			ret = -ENOMEM;
6638 			goto out_unlock;
6639 		}
6640 		if (!PageUptodate(page)) {
6641 			btrfs_readpage(NULL, page);
6642 			lock_page(page);
6643 			if (!PageUptodate(page)) {
6644 				unlock_page(page);
6645 				page_cache_release(page);
6646 				ret = -EIO;
6647 				goto out_unlock;
6648 			}
6649 		}
6650 		wait_on_page_writeback(page);
6651 
6652 		page_start = (u64)page->index << PAGE_CACHE_SHIFT;
6653 		page_end = page_start + PAGE_CACHE_SIZE - 1;
6654 		lock_extent(io_tree, page_start, page_end, GFP_NOFS);
6655 
6656 		ordered = btrfs_lookup_ordered_extent(inode, page_start);
6657 		if (ordered) {
6658 			unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6659 			unlock_page(page);
6660 			page_cache_release(page);
6661 			btrfs_start_ordered_extent(inode, ordered, 1);
6662 			btrfs_put_ordered_extent(ordered);
6663 			goto again;
6664 		}
6665 		set_page_extent_mapped(page);
6666 
6667 		if (i == first_index)
6668 			set_extent_bits(io_tree, page_start, page_end,
6669 					EXTENT_BOUNDARY, GFP_NOFS);
6670 		btrfs_set_extent_delalloc(inode, page_start, page_end);
6671 
6672 		set_page_dirty(page);
6673 		total_dirty++;
6674 
6675 		unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
6676 		unlock_page(page);
6677 		page_cache_release(page);
6678 	}
6679 
6680 out_unlock:
6681 	kfree(ra);
6682 	mutex_unlock(&inode->i_mutex);
6683 	balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
6684 	return ret;
6685 }
6686 
6687 static noinline int relocate_data_extent(struct inode *reloc_inode,
6688 					 struct btrfs_key *extent_key,
6689 					 u64 offset)
6690 {
6691 	struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
6692 	struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
6693 	struct extent_map *em;
6694 	u64 start = extent_key->objectid - offset;
6695 	u64 end = start + extent_key->offset - 1;
6696 
6697 	em = alloc_extent_map(GFP_NOFS);
6698 	BUG_ON(!em);
6699 
6700 	em->start = start;
6701 	em->len = extent_key->offset;
6702 	em->block_len = extent_key->offset;
6703 	em->block_start = extent_key->objectid;
6704 	em->bdev = root->fs_info->fs_devices->latest_bdev;
6705 	set_bit(EXTENT_FLAG_PINNED, &em->flags);
6706 
6707 	/* setup extent map to cheat btrfs_readpage */
6708 	lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6709 	while (1) {
6710 		int ret;
6711 		write_lock(&em_tree->lock);
6712 		ret = add_extent_mapping(em_tree, em);
6713 		write_unlock(&em_tree->lock);
6714 		if (ret != -EEXIST) {
6715 			free_extent_map(em);
6716 			break;
6717 		}
6718 		btrfs_drop_extent_cache(reloc_inode, start, end, 0);
6719 	}
6720 	unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
6721 
6722 	return relocate_inode_pages(reloc_inode, start, extent_key->offset);
6723 }
6724 
6725 struct btrfs_ref_path {
6726 	u64 extent_start;
6727 	u64 nodes[BTRFS_MAX_LEVEL];
6728 	u64 root_objectid;
6729 	u64 root_generation;
6730 	u64 owner_objectid;
6731 	u32 num_refs;
6732 	int lowest_level;
6733 	int current_level;
6734 	int shared_level;
6735 
6736 	struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
6737 	u64 new_nodes[BTRFS_MAX_LEVEL];
6738 };
6739 
6740 struct disk_extent {
6741 	u64 ram_bytes;
6742 	u64 disk_bytenr;
6743 	u64 disk_num_bytes;
6744 	u64 offset;
6745 	u64 num_bytes;
6746 	u8 compression;
6747 	u8 encryption;
6748 	u16 other_encoding;
6749 };
6750 
6751 static int is_cowonly_root(u64 root_objectid)
6752 {
6753 	if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
6754 	    root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
6755 	    root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
6756 	    root_objectid == BTRFS_DEV_TREE_OBJECTID ||
6757 	    root_objectid == BTRFS_TREE_LOG_OBJECTID ||
6758 	    root_objectid == BTRFS_CSUM_TREE_OBJECTID)
6759 		return 1;
6760 	return 0;
6761 }
6762 
6763 static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
6764 				    struct btrfs_root *extent_root,
6765 				    struct btrfs_ref_path *ref_path,
6766 				    int first_time)
6767 {
6768 	struct extent_buffer *leaf;
6769 	struct btrfs_path *path;
6770 	struct btrfs_extent_ref *ref;
6771 	struct btrfs_key key;
6772 	struct btrfs_key found_key;
6773 	u64 bytenr;
6774 	u32 nritems;
6775 	int level;
6776 	int ret = 1;
6777 
6778 	path = btrfs_alloc_path();
6779 	if (!path)
6780 		return -ENOMEM;
6781 
6782 	if (first_time) {
6783 		ref_path->lowest_level = -1;
6784 		ref_path->current_level = -1;
6785 		ref_path->shared_level = -1;
6786 		goto walk_up;
6787 	}
6788 walk_down:
6789 	level = ref_path->current_level - 1;
6790 	while (level >= -1) {
6791 		u64 parent;
6792 		if (level < ref_path->lowest_level)
6793 			break;
6794 
6795 		if (level >= 0)
6796 			bytenr = ref_path->nodes[level];
6797 		else
6798 			bytenr = ref_path->extent_start;
6799 		BUG_ON(bytenr == 0);
6800 
6801 		parent = ref_path->nodes[level + 1];
6802 		ref_path->nodes[level + 1] = 0;
6803 		ref_path->current_level = level;
6804 		BUG_ON(parent == 0);
6805 
6806 		key.objectid = bytenr;
6807 		key.offset = parent + 1;
6808 		key.type = BTRFS_EXTENT_REF_KEY;
6809 
6810 		ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6811 		if (ret < 0)
6812 			goto out;
6813 		BUG_ON(ret == 0);
6814 
6815 		leaf = path->nodes[0];
6816 		nritems = btrfs_header_nritems(leaf);
6817 		if (path->slots[0] >= nritems) {
6818 			ret = btrfs_next_leaf(extent_root, path);
6819 			if (ret < 0)
6820 				goto out;
6821 			if (ret > 0)
6822 				goto next;
6823 			leaf = path->nodes[0];
6824 		}
6825 
6826 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6827 		if (found_key.objectid == bytenr &&
6828 		    found_key.type == BTRFS_EXTENT_REF_KEY) {
6829 			if (level < ref_path->shared_level)
6830 				ref_path->shared_level = level;
6831 			goto found;
6832 		}
6833 next:
6834 		level--;
6835 		btrfs_release_path(extent_root, path);
6836 		cond_resched();
6837 	}
6838 	/* reached lowest level */
6839 	ret = 1;
6840 	goto out;
6841 walk_up:
6842 	level = ref_path->current_level;
6843 	while (level < BTRFS_MAX_LEVEL - 1) {
6844 		u64 ref_objectid;
6845 
6846 		if (level >= 0)
6847 			bytenr = ref_path->nodes[level];
6848 		else
6849 			bytenr = ref_path->extent_start;
6850 
6851 		BUG_ON(bytenr == 0);
6852 
6853 		key.objectid = bytenr;
6854 		key.offset = 0;
6855 		key.type = BTRFS_EXTENT_REF_KEY;
6856 
6857 		ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
6858 		if (ret < 0)
6859 			goto out;
6860 
6861 		leaf = path->nodes[0];
6862 		nritems = btrfs_header_nritems(leaf);
6863 		if (path->slots[0] >= nritems) {
6864 			ret = btrfs_next_leaf(extent_root, path);
6865 			if (ret < 0)
6866 				goto out;
6867 			if (ret > 0) {
6868 				/* the extent was freed by someone */
6869 				if (ref_path->lowest_level == level)
6870 					goto out;
6871 				btrfs_release_path(extent_root, path);
6872 				goto walk_down;
6873 			}
6874 			leaf = path->nodes[0];
6875 		}
6876 
6877 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
6878 		if (found_key.objectid != bytenr ||
6879 				found_key.type != BTRFS_EXTENT_REF_KEY) {
6880 			/* the extent was freed by someone */
6881 			if (ref_path->lowest_level == level) {
6882 				ret = 1;
6883 				goto out;
6884 			}
6885 			btrfs_release_path(extent_root, path);
6886 			goto walk_down;
6887 		}
6888 found:
6889 		ref = btrfs_item_ptr(leaf, path->slots[0],
6890 				struct btrfs_extent_ref);
6891 		ref_objectid = btrfs_ref_objectid(leaf, ref);
6892 		if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
6893 			if (first_time) {
6894 				level = (int)ref_objectid;
6895 				BUG_ON(level >= BTRFS_MAX_LEVEL);
6896 				ref_path->lowest_level = level;
6897 				ref_path->current_level = level;
6898 				ref_path->nodes[level] = bytenr;
6899 			} else {
6900 				WARN_ON(ref_objectid != level);
6901 			}
6902 		} else {
6903 			WARN_ON(level != -1);
6904 		}
6905 		first_time = 0;
6906 
6907 		if (ref_path->lowest_level == level) {
6908 			ref_path->owner_objectid = ref_objectid;
6909 			ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
6910 		}
6911 
6912 		/*
6913 		 * the block is tree root or the block isn't in reference
6914 		 * counted tree.
6915 		 */
6916 		if (found_key.objectid == found_key.offset ||
6917 		    is_cowonly_root(btrfs_ref_root(leaf, ref))) {
6918 			ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6919 			ref_path->root_generation =
6920 				btrfs_ref_generation(leaf, ref);
6921 			if (level < 0) {
6922 				/* special reference from the tree log */
6923 				ref_path->nodes[0] = found_key.offset;
6924 				ref_path->current_level = 0;
6925 			}
6926 			ret = 0;
6927 			goto out;
6928 		}
6929 
6930 		level++;
6931 		BUG_ON(ref_path->nodes[level] != 0);
6932 		ref_path->nodes[level] = found_key.offset;
6933 		ref_path->current_level = level;
6934 
6935 		/*
6936 		 * the reference was created in the running transaction,
6937 		 * no need to continue walking up.
6938 		 */
6939 		if (btrfs_ref_generation(leaf, ref) == trans->transid) {
6940 			ref_path->root_objectid = btrfs_ref_root(leaf, ref);
6941 			ref_path->root_generation =
6942 				btrfs_ref_generation(leaf, ref);
6943 			ret = 0;
6944 			goto out;
6945 		}
6946 
6947 		btrfs_release_path(extent_root, path);
6948 		cond_resched();
6949 	}
6950 	/* reached max tree level, but no tree root found. */
6951 	BUG();
6952 out:
6953 	btrfs_free_path(path);
6954 	return ret;
6955 }
6956 
6957 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
6958 				struct btrfs_root *extent_root,
6959 				struct btrfs_ref_path *ref_path,
6960 				u64 extent_start)
6961 {
6962 	memset(ref_path, 0, sizeof(*ref_path));
6963 	ref_path->extent_start = extent_start;
6964 
6965 	return __next_ref_path(trans, extent_root, ref_path, 1);
6966 }
6967 
6968 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
6969 			       struct btrfs_root *extent_root,
6970 			       struct btrfs_ref_path *ref_path)
6971 {
6972 	return __next_ref_path(trans, extent_root, ref_path, 0);
6973 }
6974 
6975 static noinline int get_new_locations(struct inode *reloc_inode,
6976 				      struct btrfs_key *extent_key,
6977 				      u64 offset, int no_fragment,
6978 				      struct disk_extent **extents,
6979 				      int *nr_extents)
6980 {
6981 	struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
6982 	struct btrfs_path *path;
6983 	struct btrfs_file_extent_item *fi;
6984 	struct extent_buffer *leaf;
6985 	struct disk_extent *exts = *extents;
6986 	struct btrfs_key found_key;
6987 	u64 cur_pos;
6988 	u64 last_byte;
6989 	u32 nritems;
6990 	int nr = 0;
6991 	int max = *nr_extents;
6992 	int ret;
6993 
6994 	WARN_ON(!no_fragment && *extents);
6995 	if (!exts) {
6996 		max = 1;
6997 		exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
6998 		if (!exts)
6999 			return -ENOMEM;
7000 	}
7001 
7002 	path = btrfs_alloc_path();
7003 	if (!path) {
7004 		if (exts != *extents)
7005 			kfree(exts);
7006 		return -ENOMEM;
7007 	}
7008 
7009 	cur_pos = extent_key->objectid - offset;
7010 	last_byte = extent_key->objectid + extent_key->offset;
7011 	ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
7012 				       cur_pos, 0);
7013 	if (ret < 0)
7014 		goto out;
7015 	if (ret > 0) {
7016 		ret = -ENOENT;
7017 		goto out;
7018 	}
7019 
7020 	while (1) {
7021 		leaf = path->nodes[0];
7022 		nritems = btrfs_header_nritems(leaf);
7023 		if (path->slots[0] >= nritems) {
7024 			ret = btrfs_next_leaf(root, path);
7025 			if (ret < 0)
7026 				goto out;
7027 			if (ret > 0)
7028 				break;
7029 			leaf = path->nodes[0];
7030 		}
7031 
7032 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
7033 		if (found_key.offset != cur_pos ||
7034 		    found_key.type != BTRFS_EXTENT_DATA_KEY ||
7035 		    found_key.objectid != reloc_inode->i_ino)
7036 			break;
7037 
7038 		fi = btrfs_item_ptr(leaf, path->slots[0],
7039 				    struct btrfs_file_extent_item);
7040 		if (btrfs_file_extent_type(leaf, fi) !=
7041 		    BTRFS_FILE_EXTENT_REG ||
7042 		    btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
7043 			break;
7044 
7045 		if (nr == max) {
7046 			struct disk_extent *old = exts;
7047 			max *= 2;
7048 			exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
7049 			if (!exts) {
7050 				ret = -ENOMEM;
7051 				goto out;
7052 			}
7053 			memcpy(exts, old, sizeof(*exts) * nr);
7054 			if (old != *extents)
7055 				kfree(old);
7056 		}
7057 
7058 		exts[nr].disk_bytenr =
7059 			btrfs_file_extent_disk_bytenr(leaf, fi);
7060 		exts[nr].disk_num_bytes =
7061 			btrfs_file_extent_disk_num_bytes(leaf, fi);
7062 		exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
7063 		exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7064 		exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
7065 		exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
7066 		exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
7067 		exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
7068 									   fi);
7069 		BUG_ON(exts[nr].offset > 0);
7070 		BUG_ON(exts[nr].compression || exts[nr].encryption);
7071 		BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
7072 
7073 		cur_pos += exts[nr].num_bytes;
7074 		nr++;
7075 
7076 		if (cur_pos + offset >= last_byte)
7077 			break;
7078 
7079 		if (no_fragment) {
7080 			ret = 1;
7081 			goto out;
7082 		}
7083 		path->slots[0]++;
7084 	}
7085 
7086 	BUG_ON(cur_pos + offset > last_byte);
7087 	if (cur_pos + offset < last_byte) {
7088 		ret = -ENOENT;
7089 		goto out;
7090 	}
7091 	ret = 0;
7092 out:
7093 	btrfs_free_path(path);
7094 	if (ret) {
7095 		if (exts != *extents)
7096 			kfree(exts);
7097 	} else {
7098 		*extents = exts;
7099 		*nr_extents = nr;
7100 	}
7101 	return ret;
7102 }
7103 
7104 static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
7105 					struct btrfs_root *root,
7106 					struct btrfs_path *path,
7107 					struct btrfs_key *extent_key,
7108 					struct btrfs_key *leaf_key,
7109 					struct btrfs_ref_path *ref_path,
7110 					struct disk_extent *new_extents,
7111 					int nr_extents)
7112 {
7113 	struct extent_buffer *leaf;
7114 	struct btrfs_file_extent_item *fi;
7115 	struct inode *inode = NULL;
7116 	struct btrfs_key key;
7117 	u64 lock_start = 0;
7118 	u64 lock_end = 0;
7119 	u64 num_bytes;
7120 	u64 ext_offset;
7121 	u64 search_end = (u64)-1;
7122 	u32 nritems;
7123 	int nr_scaned = 0;
7124 	int extent_locked = 0;
7125 	int extent_type;
7126 	int ret;
7127 
7128 	memcpy(&key, leaf_key, sizeof(key));
7129 	if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
7130 		if (key.objectid < ref_path->owner_objectid ||
7131 		    (key.objectid == ref_path->owner_objectid &&
7132 		     key.type < BTRFS_EXTENT_DATA_KEY)) {
7133 			key.objectid = ref_path->owner_objectid;
7134 			key.type = BTRFS_EXTENT_DATA_KEY;
7135 			key.offset = 0;
7136 		}
7137 	}
7138 
7139 	while (1) {
7140 		ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
7141 		if (ret < 0)
7142 			goto out;
7143 
7144 		leaf = path->nodes[0];
7145 		nritems = btrfs_header_nritems(leaf);
7146 next:
7147 		if (extent_locked && ret > 0) {
7148 			/*
7149 			 * the file extent item was modified by someone
7150 			 * before the extent got locked.
7151 			 */
7152 			unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7153 				      lock_end, GFP_NOFS);
7154 			extent_locked = 0;
7155 		}
7156 
7157 		if (path->slots[0] >= nritems) {
7158 			if (++nr_scaned > 2)
7159 				break;
7160 
7161 			BUG_ON(extent_locked);
7162 			ret = btrfs_next_leaf(root, path);
7163 			if (ret < 0)
7164 				goto out;
7165 			if (ret > 0)
7166 				break;
7167 			leaf = path->nodes[0];
7168 			nritems = btrfs_header_nritems(leaf);
7169 		}
7170 
7171 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
7172 
7173 		if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
7174 			if ((key.objectid > ref_path->owner_objectid) ||
7175 			    (key.objectid == ref_path->owner_objectid &&
7176 			     key.type > BTRFS_EXTENT_DATA_KEY) ||
7177 			    key.offset >= search_end)
7178 				break;
7179 		}
7180 
7181 		if (inode && key.objectid != inode->i_ino) {
7182 			BUG_ON(extent_locked);
7183 			btrfs_release_path(root, path);
7184 			mutex_unlock(&inode->i_mutex);
7185 			iput(inode);
7186 			inode = NULL;
7187 			continue;
7188 		}
7189 
7190 		if (key.type != BTRFS_EXTENT_DATA_KEY) {
7191 			path->slots[0]++;
7192 			ret = 1;
7193 			goto next;
7194 		}
7195 		fi = btrfs_item_ptr(leaf, path->slots[0],
7196 				    struct btrfs_file_extent_item);
7197 		extent_type = btrfs_file_extent_type(leaf, fi);
7198 		if ((extent_type != BTRFS_FILE_EXTENT_REG &&
7199 		     extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
7200 		    (btrfs_file_extent_disk_bytenr(leaf, fi) !=
7201 		     extent_key->objectid)) {
7202 			path->slots[0]++;
7203 			ret = 1;
7204 			goto next;
7205 		}
7206 
7207 		num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7208 		ext_offset = btrfs_file_extent_offset(leaf, fi);
7209 
7210 		if (search_end == (u64)-1) {
7211 			search_end = key.offset - ext_offset +
7212 				btrfs_file_extent_ram_bytes(leaf, fi);
7213 		}
7214 
7215 		if (!extent_locked) {
7216 			lock_start = key.offset;
7217 			lock_end = lock_start + num_bytes - 1;
7218 		} else {
7219 			if (lock_start > key.offset ||
7220 			    lock_end + 1 < key.offset + num_bytes) {
7221 				unlock_extent(&BTRFS_I(inode)->io_tree,
7222 					      lock_start, lock_end, GFP_NOFS);
7223 				extent_locked = 0;
7224 			}
7225 		}
7226 
7227 		if (!inode) {
7228 			btrfs_release_path(root, path);
7229 
7230 			inode = btrfs_iget_locked(root->fs_info->sb,
7231 						  key.objectid, root);
7232 			if (inode->i_state & I_NEW) {
7233 				BTRFS_I(inode)->root = root;
7234 				BTRFS_I(inode)->location.objectid =
7235 					key.objectid;
7236 				BTRFS_I(inode)->location.type =
7237 					BTRFS_INODE_ITEM_KEY;
7238 				BTRFS_I(inode)->location.offset = 0;
7239 				btrfs_read_locked_inode(inode);
7240 				unlock_new_inode(inode);
7241 			}
7242 			/*
7243 			 * some code call btrfs_commit_transaction while
7244 			 * holding the i_mutex, so we can't use mutex_lock
7245 			 * here.
7246 			 */
7247 			if (is_bad_inode(inode) ||
7248 			    !mutex_trylock(&inode->i_mutex)) {
7249 				iput(inode);
7250 				inode = NULL;
7251 				key.offset = (u64)-1;
7252 				goto skip;
7253 			}
7254 		}
7255 
7256 		if (!extent_locked) {
7257 			struct btrfs_ordered_extent *ordered;
7258 
7259 			btrfs_release_path(root, path);
7260 
7261 			lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7262 				    lock_end, GFP_NOFS);
7263 			ordered = btrfs_lookup_first_ordered_extent(inode,
7264 								    lock_end);
7265 			if (ordered &&
7266 			    ordered->file_offset <= lock_end &&
7267 			    ordered->file_offset + ordered->len > lock_start) {
7268 				unlock_extent(&BTRFS_I(inode)->io_tree,
7269 					      lock_start, lock_end, GFP_NOFS);
7270 				btrfs_start_ordered_extent(inode, ordered, 1);
7271 				btrfs_put_ordered_extent(ordered);
7272 				key.offset += num_bytes;
7273 				goto skip;
7274 			}
7275 			if (ordered)
7276 				btrfs_put_ordered_extent(ordered);
7277 
7278 			extent_locked = 1;
7279 			continue;
7280 		}
7281 
7282 		if (nr_extents == 1) {
7283 			/* update extent pointer in place */
7284 			btrfs_set_file_extent_disk_bytenr(leaf, fi,
7285 						new_extents[0].disk_bytenr);
7286 			btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7287 						new_extents[0].disk_num_bytes);
7288 			btrfs_mark_buffer_dirty(leaf);
7289 
7290 			btrfs_drop_extent_cache(inode, key.offset,
7291 						key.offset + num_bytes - 1, 0);
7292 
7293 			ret = btrfs_inc_extent_ref(trans, root,
7294 						new_extents[0].disk_bytenr,
7295 						new_extents[0].disk_num_bytes,
7296 						leaf->start,
7297 						root->root_key.objectid,
7298 						trans->transid,
7299 						key.objectid);
7300 			BUG_ON(ret);
7301 
7302 			ret = btrfs_free_extent(trans, root,
7303 						extent_key->objectid,
7304 						extent_key->offset,
7305 						leaf->start,
7306 						btrfs_header_owner(leaf),
7307 						btrfs_header_generation(leaf),
7308 						key.objectid, 0);
7309 			BUG_ON(ret);
7310 
7311 			btrfs_release_path(root, path);
7312 			key.offset += num_bytes;
7313 		} else {
7314 			BUG_ON(1);
7315 #if 0
7316 			u64 alloc_hint;
7317 			u64 extent_len;
7318 			int i;
7319 			/*
7320 			 * drop old extent pointer at first, then insert the
7321 			 * new pointers one bye one
7322 			 */
7323 			btrfs_release_path(root, path);
7324 			ret = btrfs_drop_extents(trans, root, inode, key.offset,
7325 						 key.offset + num_bytes,
7326 						 key.offset, &alloc_hint);
7327 			BUG_ON(ret);
7328 
7329 			for (i = 0; i < nr_extents; i++) {
7330 				if (ext_offset >= new_extents[i].num_bytes) {
7331 					ext_offset -= new_extents[i].num_bytes;
7332 					continue;
7333 				}
7334 				extent_len = min(new_extents[i].num_bytes -
7335 						 ext_offset, num_bytes);
7336 
7337 				ret = btrfs_insert_empty_item(trans, root,
7338 							      path, &key,
7339 							      sizeof(*fi));
7340 				BUG_ON(ret);
7341 
7342 				leaf = path->nodes[0];
7343 				fi = btrfs_item_ptr(leaf, path->slots[0],
7344 						struct btrfs_file_extent_item);
7345 				btrfs_set_file_extent_generation(leaf, fi,
7346 							trans->transid);
7347 				btrfs_set_file_extent_type(leaf, fi,
7348 							BTRFS_FILE_EXTENT_REG);
7349 				btrfs_set_file_extent_disk_bytenr(leaf, fi,
7350 						new_extents[i].disk_bytenr);
7351 				btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7352 						new_extents[i].disk_num_bytes);
7353 				btrfs_set_file_extent_ram_bytes(leaf, fi,
7354 						new_extents[i].ram_bytes);
7355 
7356 				btrfs_set_file_extent_compression(leaf, fi,
7357 						new_extents[i].compression);
7358 				btrfs_set_file_extent_encryption(leaf, fi,
7359 						new_extents[i].encryption);
7360 				btrfs_set_file_extent_other_encoding(leaf, fi,
7361 						new_extents[i].other_encoding);
7362 
7363 				btrfs_set_file_extent_num_bytes(leaf, fi,
7364 							extent_len);
7365 				ext_offset += new_extents[i].offset;
7366 				btrfs_set_file_extent_offset(leaf, fi,
7367 							ext_offset);
7368 				btrfs_mark_buffer_dirty(leaf);
7369 
7370 				btrfs_drop_extent_cache(inode, key.offset,
7371 						key.offset + extent_len - 1, 0);
7372 
7373 				ret = btrfs_inc_extent_ref(trans, root,
7374 						new_extents[i].disk_bytenr,
7375 						new_extents[i].disk_num_bytes,
7376 						leaf->start,
7377 						root->root_key.objectid,
7378 						trans->transid, key.objectid);
7379 				BUG_ON(ret);
7380 				btrfs_release_path(root, path);
7381 
7382 				inode_add_bytes(inode, extent_len);
7383 
7384 				ext_offset = 0;
7385 				num_bytes -= extent_len;
7386 				key.offset += extent_len;
7387 
7388 				if (num_bytes == 0)
7389 					break;
7390 			}
7391 			BUG_ON(i >= nr_extents);
7392 #endif
7393 		}
7394 
7395 		if (extent_locked) {
7396 			unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7397 				      lock_end, GFP_NOFS);
7398 			extent_locked = 0;
7399 		}
7400 skip:
7401 		if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
7402 		    key.offset >= search_end)
7403 			break;
7404 
7405 		cond_resched();
7406 	}
7407 	ret = 0;
7408 out:
7409 	btrfs_release_path(root, path);
7410 	if (inode) {
7411 		mutex_unlock(&inode->i_mutex);
7412 		if (extent_locked) {
7413 			unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
7414 				      lock_end, GFP_NOFS);
7415 		}
7416 		iput(inode);
7417 	}
7418 	return ret;
7419 }
7420 
7421 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
7422 			       struct btrfs_root *root,
7423 			       struct extent_buffer *buf, u64 orig_start)
7424 {
7425 	int level;
7426 	int ret;
7427 
7428 	BUG_ON(btrfs_header_generation(buf) != trans->transid);
7429 	BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
7430 
7431 	level = btrfs_header_level(buf);
7432 	if (level == 0) {
7433 		struct btrfs_leaf_ref *ref;
7434 		struct btrfs_leaf_ref *orig_ref;
7435 
7436 		orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
7437 		if (!orig_ref)
7438 			return -ENOENT;
7439 
7440 		ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
7441 		if (!ref) {
7442 			btrfs_free_leaf_ref(root, orig_ref);
7443 			return -ENOMEM;
7444 		}
7445 
7446 		ref->nritems = orig_ref->nritems;
7447 		memcpy(ref->extents, orig_ref->extents,
7448 			sizeof(ref->extents[0]) * ref->nritems);
7449 
7450 		btrfs_free_leaf_ref(root, orig_ref);
7451 
7452 		ref->root_gen = trans->transid;
7453 		ref->bytenr = buf->start;
7454 		ref->owner = btrfs_header_owner(buf);
7455 		ref->generation = btrfs_header_generation(buf);
7456 
7457 		ret = btrfs_add_leaf_ref(root, ref, 0);
7458 		WARN_ON(ret);
7459 		btrfs_free_leaf_ref(root, ref);
7460 	}
7461 	return 0;
7462 }
7463 
7464 static noinline int invalidate_extent_cache(struct btrfs_root *root,
7465 					struct extent_buffer *leaf,
7466 					struct btrfs_block_group_cache *group,
7467 					struct btrfs_root *target_root)
7468 {
7469 	struct btrfs_key key;
7470 	struct inode *inode = NULL;
7471 	struct btrfs_file_extent_item *fi;
7472 	struct extent_state *cached_state = NULL;
7473 	u64 num_bytes;
7474 	u64 skip_objectid = 0;
7475 	u32 nritems;
7476 	u32 i;
7477 
7478 	nritems = btrfs_header_nritems(leaf);
7479 	for (i = 0; i < nritems; i++) {
7480 		btrfs_item_key_to_cpu(leaf, &key, i);
7481 		if (key.objectid == skip_objectid ||
7482 		    key.type != BTRFS_EXTENT_DATA_KEY)
7483 			continue;
7484 		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
7485 		if (btrfs_file_extent_type(leaf, fi) ==
7486 		    BTRFS_FILE_EXTENT_INLINE)
7487 			continue;
7488 		if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
7489 			continue;
7490 		if (!inode || inode->i_ino != key.objectid) {
7491 			iput(inode);
7492 			inode = btrfs_ilookup(target_root->fs_info->sb,
7493 					      key.objectid, target_root, 1);
7494 		}
7495 		if (!inode) {
7496 			skip_objectid = key.objectid;
7497 			continue;
7498 		}
7499 		num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
7500 
7501 		lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset,
7502 				 key.offset + num_bytes - 1, 0, &cached_state,
7503 				 GFP_NOFS);
7504 		btrfs_drop_extent_cache(inode, key.offset,
7505 					key.offset + num_bytes - 1, 1);
7506 		unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset,
7507 				     key.offset + num_bytes - 1, &cached_state,
7508 				     GFP_NOFS);
7509 		cond_resched();
7510 	}
7511 	iput(inode);
7512 	return 0;
7513 }
7514 
7515 static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
7516 					struct btrfs_root *root,
7517 					struct extent_buffer *leaf,
7518 					struct btrfs_block_group_cache *group,
7519 					struct inode *reloc_inode)
7520 {
7521 	struct btrfs_key key;
7522 	struct btrfs_key extent_key;
7523 	struct btrfs_file_extent_item *fi;
7524 	struct btrfs_leaf_ref *ref;
7525 	struct disk_extent *new_extent;
7526 	u64 bytenr;
7527 	u64 num_bytes;
7528 	u32 nritems;
7529 	u32 i;
7530 	int ext_index;
7531 	int nr_extent;
7532 	int ret;
7533 
7534 	new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
7535 	if (!new_extent)
7536 		return -ENOMEM;
7537 
7538 	ref = btrfs_lookup_leaf_ref(root, leaf->start);
7539 	BUG_ON(!ref);
7540 
7541 	ext_index = -1;
7542 	nritems = btrfs_header_nritems(leaf);
7543 	for (i = 0; i < nritems; i++) {
7544 		btrfs_item_key_to_cpu(leaf, &key, i);
7545 		if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
7546 			continue;
7547 		fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
7548 		if (btrfs_file_extent_type(leaf, fi) ==
7549 		    BTRFS_FILE_EXTENT_INLINE)
7550 			continue;
7551 		bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
7552 		num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
7553 		if (bytenr == 0)
7554 			continue;
7555 
7556 		ext_index++;
7557 		if (bytenr >= group->key.objectid + group->key.offset ||
7558 		    bytenr + num_bytes <= group->key.objectid)
7559 			continue;
7560 
7561 		extent_key.objectid = bytenr;
7562 		extent_key.offset = num_bytes;
7563 		extent_key.type = BTRFS_EXTENT_ITEM_KEY;
7564 		nr_extent = 1;
7565 		ret = get_new_locations(reloc_inode, &extent_key,
7566 					group->key.objectid, 1,
7567 					&new_extent, &nr_extent);
7568 		if (ret > 0)
7569 			continue;
7570 		BUG_ON(ret < 0);
7571 
7572 		BUG_ON(ref->extents[ext_index].bytenr != bytenr);
7573 		BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
7574 		ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
7575 		ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
7576 
7577 		btrfs_set_file_extent_disk_bytenr(leaf, fi,
7578 						new_extent->disk_bytenr);
7579 		btrfs_set_file_extent_disk_num_bytes(leaf, fi,
7580 						new_extent->disk_num_bytes);
7581 		btrfs_mark_buffer_dirty(leaf);
7582 
7583 		ret = btrfs_inc_extent_ref(trans, root,
7584 					new_extent->disk_bytenr,
7585 					new_extent->disk_num_bytes,
7586 					leaf->start,
7587 					root->root_key.objectid,
7588 					trans->transid, key.objectid);
7589 		BUG_ON(ret);
7590 
7591 		ret = btrfs_free_extent(trans, root,
7592 					bytenr, num_bytes, leaf->start,
7593 					btrfs_header_owner(leaf),
7594 					btrfs_header_generation(leaf),
7595 					key.objectid, 0);
7596 		BUG_ON(ret);
7597 		cond_resched();
7598 	}
7599 	kfree(new_extent);
7600 	BUG_ON(ext_index + 1 != ref->nritems);
7601 	btrfs_free_leaf_ref(root, ref);
7602 	return 0;
7603 }
7604 
7605 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
7606 			  struct btrfs_root *root)
7607 {
7608 	struct btrfs_root *reloc_root;
7609 	int ret;
7610 
7611 	if (root->reloc_root) {
7612 		reloc_root = root->reloc_root;
7613 		root->reloc_root = NULL;
7614 		list_add(&reloc_root->dead_list,
7615 			 &root->fs_info->dead_reloc_roots);
7616 
7617 		btrfs_set_root_bytenr(&reloc_root->root_item,
7618 				      reloc_root->node->start);
7619 		btrfs_set_root_level(&root->root_item,
7620 				     btrfs_header_level(reloc_root->node));
7621 		memset(&reloc_root->root_item.drop_progress, 0,
7622 			sizeof(struct btrfs_disk_key));
7623 		reloc_root->root_item.drop_level = 0;
7624 
7625 		ret = btrfs_update_root(trans, root->fs_info->tree_root,
7626 					&reloc_root->root_key,
7627 					&reloc_root->root_item);
7628 		BUG_ON(ret);
7629 	}
7630 	return 0;
7631 }
7632 
7633 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
7634 {
7635 	struct btrfs_trans_handle *trans;
7636 	struct btrfs_root *reloc_root;
7637 	struct btrfs_root *prev_root = NULL;
7638 	struct list_head dead_roots;
7639 	int ret;
7640 	unsigned long nr;
7641 
7642 	INIT_LIST_HEAD(&dead_roots);
7643 	list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
7644 
7645 	while (!list_empty(&dead_roots)) {
7646 		reloc_root = list_entry(dead_roots.prev,
7647 					struct btrfs_root, dead_list);
7648 		list_del_init(&reloc_root->dead_list);
7649 
7650 		BUG_ON(reloc_root->commit_root != NULL);
7651 		while (1) {
7652 			trans = btrfs_join_transaction(root, 1);
7653 			BUG_ON(IS_ERR(trans));
7654 
7655 			mutex_lock(&root->fs_info->drop_mutex);
7656 			ret = btrfs_drop_snapshot(trans, reloc_root);
7657 			if (ret != -EAGAIN)
7658 				break;
7659 			mutex_unlock(&root->fs_info->drop_mutex);
7660 
7661 			nr = trans->blocks_used;
7662 			ret = btrfs_end_transaction(trans, root);
7663 			BUG_ON(ret);
7664 			btrfs_btree_balance_dirty(root, nr);
7665 		}
7666 
7667 		free_extent_buffer(reloc_root->node);
7668 
7669 		ret = btrfs_del_root(trans, root->fs_info->tree_root,
7670 				     &reloc_root->root_key);
7671 		BUG_ON(ret);
7672 		mutex_unlock(&root->fs_info->drop_mutex);
7673 
7674 		nr = trans->blocks_used;
7675 		ret = btrfs_end_transaction(trans, root);
7676 		BUG_ON(ret);
7677 		btrfs_btree_balance_dirty(root, nr);
7678 
7679 		kfree(prev_root);
7680 		prev_root = reloc_root;
7681 	}
7682 	if (prev_root) {
7683 		btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
7684 		kfree(prev_root);
7685 	}
7686 	return 0;
7687 }
7688 
7689 int btrfs_add_dead_reloc_root(struct btrfs_root *root)
7690 {
7691 	list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
7692 	return 0;
7693 }
7694 
7695 int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
7696 {
7697 	struct btrfs_root *reloc_root;
7698 	struct btrfs_trans_handle *trans;
7699 	struct btrfs_key location;
7700 	int found;
7701 	int ret;
7702 
7703 	mutex_lock(&root->fs_info->tree_reloc_mutex);
7704 	ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
7705 	BUG_ON(ret);
7706 	found = !list_empty(&root->fs_info->dead_reloc_roots);
7707 	mutex_unlock(&root->fs_info->tree_reloc_mutex);
7708 
7709 	if (found) {
7710 		trans = btrfs_start_transaction(root, 1);
7711 		BUG_ON(IS_ERR(trans));
7712 		ret = btrfs_commit_transaction(trans, root);
7713 		BUG_ON(ret);
7714 	}
7715 
7716 	location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
7717 	location.offset = (u64)-1;
7718 	location.type = BTRFS_ROOT_ITEM_KEY;
7719 
7720 	reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
7721 	BUG_ON(!reloc_root);
7722 	ret = btrfs_orphan_cleanup(reloc_root);
7723 	BUG_ON(ret);
7724 	return 0;
7725 }
7726 
7727 static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
7728 				    struct btrfs_root *root)
7729 {
7730 	struct btrfs_root *reloc_root;
7731 	struct extent_buffer *eb;
7732 	struct btrfs_root_item *root_item;
7733 	struct btrfs_key root_key;
7734 	int ret;
7735 
7736 	BUG_ON(!root->ref_cows);
7737 	if (root->reloc_root)
7738 		return 0;
7739 
7740 	root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
7741 	if (!root_item)
7742 		return -ENOMEM;
7743 
7744 	ret = btrfs_copy_root(trans, root, root->commit_root,
7745 			      &eb, BTRFS_TREE_RELOC_OBJECTID);
7746 	BUG_ON(ret);
7747 
7748 	root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
7749 	root_key.offset = root->root_key.objectid;
7750 	root_key.type = BTRFS_ROOT_ITEM_KEY;
7751 
7752 	memcpy(root_item, &root->root_item, sizeof(root_item));
7753 	btrfs_set_root_refs(root_item, 0);
7754 	btrfs_set_root_bytenr(root_item, eb->start);
7755 	btrfs_set_root_level(root_item, btrfs_header_level(eb));
7756 	btrfs_set_root_generation(root_item, trans->transid);
7757 
7758 	btrfs_tree_unlock(eb);
7759 	free_extent_buffer(eb);
7760 
7761 	ret = btrfs_insert_root(trans, root->fs_info->tree_root,
7762 				&root_key, root_item);
7763 	BUG_ON(ret);
7764 	kfree(root_item);
7765 
7766 	reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
7767 						 &root_key);
7768 	BUG_ON(IS_ERR(reloc_root));
7769 	reloc_root->last_trans = trans->transid;
7770 	reloc_root->commit_root = NULL;
7771 	reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
7772 
7773 	root->reloc_root = reloc_root;
7774 	return 0;
7775 }
7776 
7777 /*
7778  * Core function of space balance.
7779  *
7780  * The idea is using reloc trees to relocate tree blocks in reference
7781  * counted roots. There is one reloc tree for each subvol, and all
7782  * reloc trees share same root key objectid. Reloc trees are snapshots
7783  * of the latest committed roots of subvols (root->commit_root).
7784  *
7785  * To relocate a tree block referenced by a subvol, there are two steps.
7786  * COW the block through subvol's reloc tree, then update block pointer
7787  * in the subvol to point to the new block. Since all reloc trees share
7788  * same root key objectid, doing special handing for tree blocks owned
7789  * by them is easy. Once a tree block has been COWed in one reloc tree,
7790  * we can use the resulting new block directly when the same block is
7791  * required to COW again through other reloc trees. By this way, relocated
7792  * tree blocks are shared between reloc trees, so they are also shared
7793  * between subvols.
7794  */
7795 static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
7796 				      struct btrfs_root *root,
7797 				      struct btrfs_path *path,
7798 				      struct btrfs_key *first_key,
7799 				      struct btrfs_ref_path *ref_path,
7800 				      struct btrfs_block_group_cache *group,
7801 				      struct inode *reloc_inode)
7802 {
7803 	struct btrfs_root *reloc_root;
7804 	struct extent_buffer *eb = NULL;
7805 	struct btrfs_key *keys;
7806 	u64 *nodes;
7807 	int level;
7808 	int shared_level;
7809 	int lowest_level = 0;
7810 	int ret;
7811 
7812 	if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
7813 		lowest_level = ref_path->owner_objectid;
7814 
7815 	if (!root->ref_cows) {
7816 		path->lowest_level = lowest_level;
7817 		ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
7818 		BUG_ON(ret < 0);
7819 		path->lowest_level = 0;
7820 		btrfs_release_path(root, path);
7821 		return 0;
7822 	}
7823 
7824 	mutex_lock(&root->fs_info->tree_reloc_mutex);
7825 	ret = init_reloc_tree(trans, root);
7826 	BUG_ON(ret);
7827 	reloc_root = root->reloc_root;
7828 
7829 	shared_level = ref_path->shared_level;
7830 	ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
7831 
7832 	keys = ref_path->node_keys;
7833 	nodes = ref_path->new_nodes;
7834 	memset(&keys[shared_level + 1], 0,
7835 	       sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
7836 	memset(&nodes[shared_level + 1], 0,
7837 	       sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
7838 
7839 	if (nodes[lowest_level] == 0) {
7840 		path->lowest_level = lowest_level;
7841 		ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7842 					0, 1);
7843 		BUG_ON(ret);
7844 		for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
7845 			eb = path->nodes[level];
7846 			if (!eb || eb == reloc_root->node)
7847 				break;
7848 			nodes[level] = eb->start;
7849 			if (level == 0)
7850 				btrfs_item_key_to_cpu(eb, &keys[level], 0);
7851 			else
7852 				btrfs_node_key_to_cpu(eb, &keys[level], 0);
7853 		}
7854 		if (nodes[0] &&
7855 		    ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7856 			eb = path->nodes[0];
7857 			ret = replace_extents_in_leaf(trans, reloc_root, eb,
7858 						      group, reloc_inode);
7859 			BUG_ON(ret);
7860 		}
7861 		btrfs_release_path(reloc_root, path);
7862 	} else {
7863 		ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
7864 				       lowest_level);
7865 		BUG_ON(ret);
7866 	}
7867 
7868 	/*
7869 	 * replace tree blocks in the fs tree with tree blocks in
7870 	 * the reloc tree.
7871 	 */
7872 	ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
7873 	BUG_ON(ret < 0);
7874 
7875 	if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7876 		ret = btrfs_search_slot(trans, reloc_root, first_key, path,
7877 					0, 0);
7878 		BUG_ON(ret);
7879 		extent_buffer_get(path->nodes[0]);
7880 		eb = path->nodes[0];
7881 		btrfs_release_path(reloc_root, path);
7882 		ret = invalidate_extent_cache(reloc_root, eb, group, root);
7883 		BUG_ON(ret);
7884 		free_extent_buffer(eb);
7885 	}
7886 
7887 	mutex_unlock(&root->fs_info->tree_reloc_mutex);
7888 	path->lowest_level = 0;
7889 	return 0;
7890 }
7891 
7892 static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
7893 					struct btrfs_root *root,
7894 					struct btrfs_path *path,
7895 					struct btrfs_key *first_key,
7896 					struct btrfs_ref_path *ref_path)
7897 {
7898 	int ret;
7899 
7900 	ret = relocate_one_path(trans, root, path, first_key,
7901 				ref_path, NULL, NULL);
7902 	BUG_ON(ret);
7903 
7904 	return 0;
7905 }
7906 
7907 static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
7908 				    struct btrfs_root *extent_root,
7909 				    struct btrfs_path *path,
7910 				    struct btrfs_key *extent_key)
7911 {
7912 	int ret;
7913 
7914 	ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
7915 	if (ret)
7916 		goto out;
7917 	ret = btrfs_del_item(trans, extent_root, path);
7918 out:
7919 	btrfs_release_path(extent_root, path);
7920 	return ret;
7921 }
7922 
7923 static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
7924 						struct btrfs_ref_path *ref_path)
7925 {
7926 	struct btrfs_key root_key;
7927 
7928 	root_key.objectid = ref_path->root_objectid;
7929 	root_key.type = BTRFS_ROOT_ITEM_KEY;
7930 	if (is_cowonly_root(ref_path->root_objectid))
7931 		root_key.offset = 0;
7932 	else
7933 		root_key.offset = (u64)-1;
7934 
7935 	return btrfs_read_fs_root_no_name(fs_info, &root_key);
7936 }
7937 
7938 static noinline int relocate_one_extent(struct btrfs_root *extent_root,
7939 					struct btrfs_path *path,
7940 					struct btrfs_key *extent_key,
7941 					struct btrfs_block_group_cache *group,
7942 					struct inode *reloc_inode, int pass)
7943 {
7944 	struct btrfs_trans_handle *trans;
7945 	struct btrfs_root *found_root;
7946 	struct btrfs_ref_path *ref_path = NULL;
7947 	struct disk_extent *new_extents = NULL;
7948 	int nr_extents = 0;
7949 	int loops;
7950 	int ret;
7951 	int level;
7952 	struct btrfs_key first_key;
7953 	u64 prev_block = 0;
7954 
7955 
7956 	trans = btrfs_start_transaction(extent_root, 1);
7957 	BUG_ON(IS_ERR(trans));
7958 
7959 	if (extent_key->objectid == 0) {
7960 		ret = del_extent_zero(trans, extent_root, path, extent_key);
7961 		goto out;
7962 	}
7963 
7964 	ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
7965 	if (!ref_path) {
7966 		ret = -ENOMEM;
7967 		goto out;
7968 	}
7969 
7970 	for (loops = 0; ; loops++) {
7971 		if (loops == 0) {
7972 			ret = btrfs_first_ref_path(trans, extent_root, ref_path,
7973 						   extent_key->objectid);
7974 		} else {
7975 			ret = btrfs_next_ref_path(trans, extent_root, ref_path);
7976 		}
7977 		if (ret < 0)
7978 			goto out;
7979 		if (ret > 0)
7980 			break;
7981 
7982 		if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
7983 		    ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
7984 			continue;
7985 
7986 		found_root = read_ref_root(extent_root->fs_info, ref_path);
7987 		BUG_ON(!found_root);
7988 		/*
7989 		 * for reference counted tree, only process reference paths
7990 		 * rooted at the latest committed root.
7991 		 */
7992 		if (found_root->ref_cows &&
7993 		    ref_path->root_generation != found_root->root_key.offset)
7994 			continue;
7995 
7996 		if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
7997 			if (pass == 0) {
7998 				/*
7999 				 * copy data extents to new locations
8000 				 */
8001 				u64 group_start = group->key.objectid;
8002 				ret = relocate_data_extent(reloc_inode,
8003 							   extent_key,
8004 							   group_start);
8005 				if (ret < 0)
8006 					goto out;
8007 				break;
8008 			}
8009 			level = 0;
8010 		} else {
8011 			level = ref_path->owner_objectid;
8012 		}
8013 
8014 		if (prev_block != ref_path->nodes[level]) {
8015 			struct extent_buffer *eb;
8016 			u64 block_start = ref_path->nodes[level];
8017 			u64 block_size = btrfs_level_size(found_root, level);
8018 
8019 			eb = read_tree_block(found_root, block_start,
8020 					     block_size, 0);
8021 			if (!eb) {
8022 				ret = -EIO;
8023 				goto out;
8024 			}
8025 			btrfs_tree_lock(eb);
8026 			BUG_ON(level != btrfs_header_level(eb));
8027 
8028 			if (level == 0)
8029 				btrfs_item_key_to_cpu(eb, &first_key, 0);
8030 			else
8031 				btrfs_node_key_to_cpu(eb, &first_key, 0);
8032 
8033 			btrfs_tree_unlock(eb);
8034 			free_extent_buffer(eb);
8035 			prev_block = block_start;
8036 		}
8037 
8038 		mutex_lock(&extent_root->fs_info->trans_mutex);
8039 		btrfs_record_root_in_trans(found_root);
8040 		mutex_unlock(&extent_root->fs_info->trans_mutex);
8041 		if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
8042 			/*
8043 			 * try to update data extent references while
8044 			 * keeping metadata shared between snapshots.
8045 			 */
8046 			if (pass == 1) {
8047 				ret = relocate_one_path(trans, found_root,
8048 						path, &first_key, ref_path,
8049 						group, reloc_inode);
8050 				if (ret < 0)
8051 					goto out;
8052 				continue;
8053 			}
8054 			/*
8055 			 * use fallback method to process the remaining
8056 			 * references.
8057 			 */
8058 			if (!new_extents) {
8059 				u64 group_start = group->key.objectid;
8060 				new_extents = kmalloc(sizeof(*new_extents),
8061 						      GFP_NOFS);
8062 				if (!new_extents) {
8063 					ret = -ENOMEM;
8064 					goto out;
8065 				}
8066 				nr_extents = 1;
8067 				ret = get_new_locations(reloc_inode,
8068 							extent_key,
8069 							group_start, 1,
8070 							&new_extents,
8071 							&nr_extents);
8072 				if (ret)
8073 					goto out;
8074 			}
8075 			ret = replace_one_extent(trans, found_root,
8076 						path, extent_key,
8077 						&first_key, ref_path,
8078 						new_extents, nr_extents);
8079 		} else {
8080 			ret = relocate_tree_block(trans, found_root, path,
8081 						  &first_key, ref_path);
8082 		}
8083 		if (ret < 0)
8084 			goto out;
8085 	}
8086 	ret = 0;
8087 out:
8088 	btrfs_end_transaction(trans, extent_root);
8089 	kfree(new_extents);
8090 	kfree(ref_path);
8091 	return ret;
8092 }
8093 #endif
8094 
update_block_group_flags(struct btrfs_root * root,u64 flags)8095 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
8096 {
8097 	u64 num_devices;
8098 	u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
8099 		BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
8100 
8101 	/*
8102 	 * we add in the count of missing devices because we want
8103 	 * to make sure that any RAID levels on a degraded FS
8104 	 * continue to be honored.
8105 	 */
8106 	num_devices = root->fs_info->fs_devices->rw_devices +
8107 		root->fs_info->fs_devices->missing_devices;
8108 
8109 	if (num_devices == 1) {
8110 		stripped |= BTRFS_BLOCK_GROUP_DUP;
8111 		stripped = flags & ~stripped;
8112 
8113 		/* turn raid0 into single device chunks */
8114 		if (flags & BTRFS_BLOCK_GROUP_RAID0)
8115 			return stripped;
8116 
8117 		/* turn mirroring into duplication */
8118 		if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
8119 			     BTRFS_BLOCK_GROUP_RAID10))
8120 			return stripped | BTRFS_BLOCK_GROUP_DUP;
8121 		return flags;
8122 	} else {
8123 		/* they already had raid on here, just return */
8124 		if (flags & stripped)
8125 			return flags;
8126 
8127 		stripped |= BTRFS_BLOCK_GROUP_DUP;
8128 		stripped = flags & ~stripped;
8129 
8130 		/* switch duplicated blocks with raid1 */
8131 		if (flags & BTRFS_BLOCK_GROUP_DUP)
8132 			return stripped | BTRFS_BLOCK_GROUP_RAID1;
8133 
8134 		/* turn single device chunks into raid0 */
8135 		return stripped | BTRFS_BLOCK_GROUP_RAID0;
8136 	}
8137 	return flags;
8138 }
8139 
set_block_group_ro(struct btrfs_block_group_cache * cache)8140 static int set_block_group_ro(struct btrfs_block_group_cache *cache)
8141 {
8142 	struct btrfs_space_info *sinfo = cache->space_info;
8143 	u64 num_bytes;
8144 	int ret = -ENOSPC;
8145 
8146 	if (cache->ro)
8147 		return 0;
8148 
8149 	spin_lock(&sinfo->lock);
8150 	spin_lock(&cache->lock);
8151 	num_bytes = cache->key.offset - cache->reserved - cache->pinned -
8152 		    cache->bytes_super - btrfs_block_group_used(&cache->item);
8153 
8154 	if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
8155 	    sinfo->bytes_may_use + sinfo->bytes_readonly +
8156 	    cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
8157 		sinfo->bytes_readonly += num_bytes;
8158 		sinfo->bytes_reserved += cache->reserved_pinned;
8159 		cache->reserved_pinned = 0;
8160 		cache->ro = 1;
8161 		ret = 0;
8162 	}
8163 
8164 	spin_unlock(&cache->lock);
8165 	spin_unlock(&sinfo->lock);
8166 	return ret;
8167 }
8168 
btrfs_set_block_group_ro(struct btrfs_root * root,struct btrfs_block_group_cache * cache)8169 int btrfs_set_block_group_ro(struct btrfs_root *root,
8170 			     struct btrfs_block_group_cache *cache)
8171 
8172 {
8173 	struct btrfs_trans_handle *trans;
8174 	u64 alloc_flags;
8175 	int ret;
8176 
8177 	BUG_ON(cache->ro);
8178 
8179 	trans = btrfs_join_transaction(root, 1);
8180 	BUG_ON(IS_ERR(trans));
8181 
8182 	alloc_flags = update_block_group_flags(root, cache->flags);
8183 	if (alloc_flags != cache->flags)
8184 		do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8185 			       CHUNK_ALLOC_FORCE);
8186 
8187 	ret = set_block_group_ro(cache);
8188 	if (!ret)
8189 		goto out;
8190 	alloc_flags = get_alloc_profile(root, cache->space_info->flags);
8191 	ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8192 			     CHUNK_ALLOC_FORCE);
8193 	if (ret < 0)
8194 		goto out;
8195 	ret = set_block_group_ro(cache);
8196 out:
8197 	btrfs_end_transaction(trans, root);
8198 	return ret;
8199 }
8200 
btrfs_force_chunk_alloc(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 type)8201 int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
8202 			    struct btrfs_root *root, u64 type)
8203 {
8204 	u64 alloc_flags = get_alloc_profile(root, type);
8205 	return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
8206 			      CHUNK_ALLOC_FORCE);
8207 }
8208 
8209 /*
8210  * helper to account the unused space of all the readonly block group in the
8211  * list. takes mirrors into account.
8212  */
__btrfs_get_ro_block_group_free_space(struct list_head * groups_list)8213 static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
8214 {
8215 	struct btrfs_block_group_cache *block_group;
8216 	u64 free_bytes = 0;
8217 	int factor;
8218 
8219 	list_for_each_entry(block_group, groups_list, list) {
8220 		spin_lock(&block_group->lock);
8221 
8222 		if (!block_group->ro) {
8223 			spin_unlock(&block_group->lock);
8224 			continue;
8225 		}
8226 
8227 		if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
8228 					  BTRFS_BLOCK_GROUP_RAID10 |
8229 					  BTRFS_BLOCK_GROUP_DUP))
8230 			factor = 2;
8231 		else
8232 			factor = 1;
8233 
8234 		free_bytes += (block_group->key.offset -
8235 			       btrfs_block_group_used(&block_group->item)) *
8236 			       factor;
8237 
8238 		spin_unlock(&block_group->lock);
8239 	}
8240 
8241 	return free_bytes;
8242 }
8243 
8244 /*
8245  * helper to account the unused space of all the readonly block group in the
8246  * space_info. takes mirrors into account.
8247  */
btrfs_account_ro_block_groups_free_space(struct btrfs_space_info * sinfo)8248 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
8249 {
8250 	int i;
8251 	u64 free_bytes = 0;
8252 
8253 	spin_lock(&sinfo->lock);
8254 
8255 	for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
8256 		if (!list_empty(&sinfo->block_groups[i]))
8257 			free_bytes += __btrfs_get_ro_block_group_free_space(
8258 						&sinfo->block_groups[i]);
8259 
8260 	spin_unlock(&sinfo->lock);
8261 
8262 	return free_bytes;
8263 }
8264 
btrfs_set_block_group_rw(struct btrfs_root * root,struct btrfs_block_group_cache * cache)8265 int btrfs_set_block_group_rw(struct btrfs_root *root,
8266 			      struct btrfs_block_group_cache *cache)
8267 {
8268 	struct btrfs_space_info *sinfo = cache->space_info;
8269 	u64 num_bytes;
8270 
8271 	BUG_ON(!cache->ro);
8272 
8273 	spin_lock(&sinfo->lock);
8274 	spin_lock(&cache->lock);
8275 	num_bytes = cache->key.offset - cache->reserved - cache->pinned -
8276 		    cache->bytes_super - btrfs_block_group_used(&cache->item);
8277 	sinfo->bytes_readonly -= num_bytes;
8278 	cache->ro = 0;
8279 	spin_unlock(&cache->lock);
8280 	spin_unlock(&sinfo->lock);
8281 	return 0;
8282 }
8283 
8284 /*
8285  * checks to see if its even possible to relocate this block group.
8286  *
8287  * @return - -1 if it's not a good idea to relocate this block group, 0 if its
8288  * ok to go ahead and try.
8289  */
btrfs_can_relocate(struct btrfs_root * root,u64 bytenr)8290 int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
8291 {
8292 	struct btrfs_block_group_cache *block_group;
8293 	struct btrfs_space_info *space_info;
8294 	struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
8295 	struct btrfs_device *device;
8296 	int full = 0;
8297 	int ret = 0;
8298 
8299 	block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
8300 
8301 	/* odd, couldn't find the block group, leave it alone */
8302 	if (!block_group)
8303 		return -1;
8304 
8305 	/* no bytes used, we're good */
8306 	if (!btrfs_block_group_used(&block_group->item))
8307 		goto out;
8308 
8309 	space_info = block_group->space_info;
8310 	spin_lock(&space_info->lock);
8311 
8312 	full = space_info->full;
8313 
8314 	/*
8315 	 * if this is the last block group we have in this space, we can't
8316 	 * relocate it unless we're able to allocate a new chunk below.
8317 	 *
8318 	 * Otherwise, we need to make sure we have room in the space to handle
8319 	 * all of the extents from this block group.  If we can, we're good
8320 	 */
8321 	if ((space_info->total_bytes != block_group->key.offset) &&
8322 	   (space_info->bytes_used + space_info->bytes_reserved +
8323 	    space_info->bytes_pinned + space_info->bytes_readonly +
8324 	    btrfs_block_group_used(&block_group->item) <
8325 	    space_info->total_bytes)) {
8326 		spin_unlock(&space_info->lock);
8327 		goto out;
8328 	}
8329 	spin_unlock(&space_info->lock);
8330 
8331 	/*
8332 	 * ok we don't have enough space, but maybe we have free space on our
8333 	 * devices to allocate new chunks for relocation, so loop through our
8334 	 * alloc devices and guess if we have enough space.  However, if we
8335 	 * were marked as full, then we know there aren't enough chunks, and we
8336 	 * can just return.
8337 	 */
8338 	ret = -1;
8339 	if (full)
8340 		goto out;
8341 
8342 	mutex_lock(&root->fs_info->chunk_mutex);
8343 	list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
8344 		u64 min_free = btrfs_block_group_used(&block_group->item);
8345 		u64 dev_offset;
8346 
8347 		/*
8348 		 * check to make sure we can actually find a chunk with enough
8349 		 * space to fit our block group in.
8350 		 */
8351 		if (device->total_bytes > device->bytes_used + min_free) {
8352 			ret = find_free_dev_extent(NULL, device, min_free,
8353 						   &dev_offset, NULL);
8354 			if (!ret)
8355 				break;
8356 			ret = -1;
8357 		}
8358 	}
8359 	mutex_unlock(&root->fs_info->chunk_mutex);
8360 out:
8361 	btrfs_put_block_group(block_group);
8362 	return ret;
8363 }
8364 
find_first_block_group(struct btrfs_root * root,struct btrfs_path * path,struct btrfs_key * key)8365 static int find_first_block_group(struct btrfs_root *root,
8366 		struct btrfs_path *path, struct btrfs_key *key)
8367 {
8368 	int ret = 0;
8369 	struct btrfs_key found_key;
8370 	struct extent_buffer *leaf;
8371 	int slot;
8372 
8373 	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
8374 	if (ret < 0)
8375 		goto out;
8376 
8377 	while (1) {
8378 		slot = path->slots[0];
8379 		leaf = path->nodes[0];
8380 		if (slot >= btrfs_header_nritems(leaf)) {
8381 			ret = btrfs_next_leaf(root, path);
8382 			if (ret == 0)
8383 				continue;
8384 			if (ret < 0)
8385 				goto out;
8386 			break;
8387 		}
8388 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
8389 
8390 		if (found_key.objectid >= key->objectid &&
8391 		    found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
8392 			ret = 0;
8393 			goto out;
8394 		}
8395 		path->slots[0]++;
8396 	}
8397 out:
8398 	return ret;
8399 }
8400 
btrfs_put_block_group_cache(struct btrfs_fs_info * info)8401 void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
8402 {
8403 	struct btrfs_block_group_cache *block_group;
8404 	u64 last = 0;
8405 
8406 	while (1) {
8407 		struct inode *inode;
8408 
8409 		block_group = btrfs_lookup_first_block_group(info, last);
8410 		while (block_group) {
8411 			spin_lock(&block_group->lock);
8412 			if (block_group->iref)
8413 				break;
8414 			spin_unlock(&block_group->lock);
8415 			block_group = next_block_group(info->tree_root,
8416 						       block_group);
8417 		}
8418 		if (!block_group) {
8419 			if (last == 0)
8420 				break;
8421 			last = 0;
8422 			continue;
8423 		}
8424 
8425 		inode = block_group->inode;
8426 		block_group->iref = 0;
8427 		block_group->inode = NULL;
8428 		spin_unlock(&block_group->lock);
8429 		iput(inode);
8430 		last = block_group->key.objectid + block_group->key.offset;
8431 		btrfs_put_block_group(block_group);
8432 	}
8433 }
8434 
btrfs_free_block_groups(struct btrfs_fs_info * info)8435 int btrfs_free_block_groups(struct btrfs_fs_info *info)
8436 {
8437 	struct btrfs_block_group_cache *block_group;
8438 	struct btrfs_space_info *space_info;
8439 	struct btrfs_caching_control *caching_ctl;
8440 	struct rb_node *n;
8441 
8442 	down_write(&info->extent_commit_sem);
8443 	while (!list_empty(&info->caching_block_groups)) {
8444 		caching_ctl = list_entry(info->caching_block_groups.next,
8445 					 struct btrfs_caching_control, list);
8446 		list_del(&caching_ctl->list);
8447 		put_caching_control(caching_ctl);
8448 	}
8449 	up_write(&info->extent_commit_sem);
8450 
8451 	spin_lock(&info->block_group_cache_lock);
8452 	while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
8453 		block_group = rb_entry(n, struct btrfs_block_group_cache,
8454 				       cache_node);
8455 		rb_erase(&block_group->cache_node,
8456 			 &info->block_group_cache_tree);
8457 		spin_unlock(&info->block_group_cache_lock);
8458 
8459 		down_write(&block_group->space_info->groups_sem);
8460 		list_del(&block_group->list);
8461 		up_write(&block_group->space_info->groups_sem);
8462 
8463 		if (block_group->cached == BTRFS_CACHE_STARTED)
8464 			wait_block_group_cache_done(block_group);
8465 
8466 		/*
8467 		 * We haven't cached this block group, which means we could
8468 		 * possibly have excluded extents on this block group.
8469 		 */
8470 		if (block_group->cached == BTRFS_CACHE_NO)
8471 			free_excluded_extents(info->extent_root, block_group);
8472 
8473 		btrfs_remove_free_space_cache(block_group);
8474 		btrfs_put_block_group(block_group);
8475 
8476 		spin_lock(&info->block_group_cache_lock);
8477 	}
8478 	spin_unlock(&info->block_group_cache_lock);
8479 
8480 	/* now that all the block groups are freed, go through and
8481 	 * free all the space_info structs.  This is only called during
8482 	 * the final stages of unmount, and so we know nobody is
8483 	 * using them.  We call synchronize_rcu() once before we start,
8484 	 * just to be on the safe side.
8485 	 */
8486 	synchronize_rcu();
8487 
8488 	release_global_block_rsv(info);
8489 
8490 	while(!list_empty(&info->space_info)) {
8491 		space_info = list_entry(info->space_info.next,
8492 					struct btrfs_space_info,
8493 					list);
8494 		if (space_info->bytes_pinned > 0 ||
8495 		    space_info->bytes_reserved > 0) {
8496 			WARN_ON(1);
8497 			dump_space_info(space_info, 0, 0);
8498 		}
8499 		list_del(&space_info->list);
8500 		kfree(space_info);
8501 	}
8502 	return 0;
8503 }
8504 
__link_block_group(struct btrfs_space_info * space_info,struct btrfs_block_group_cache * cache)8505 static void __link_block_group(struct btrfs_space_info *space_info,
8506 			       struct btrfs_block_group_cache *cache)
8507 {
8508 	int index = get_block_group_index(cache);
8509 
8510 	down_write(&space_info->groups_sem);
8511 	list_add_tail(&cache->list, &space_info->block_groups[index]);
8512 	up_write(&space_info->groups_sem);
8513 }
8514 
btrfs_read_block_groups(struct btrfs_root * root)8515 int btrfs_read_block_groups(struct btrfs_root *root)
8516 {
8517 	struct btrfs_path *path;
8518 	int ret;
8519 	struct btrfs_block_group_cache *cache;
8520 	struct btrfs_fs_info *info = root->fs_info;
8521 	struct btrfs_space_info *space_info;
8522 	struct btrfs_key key;
8523 	struct btrfs_key found_key;
8524 	struct extent_buffer *leaf;
8525 	int need_clear = 0;
8526 	u64 cache_gen;
8527 
8528 	root = info->extent_root;
8529 	key.objectid = 0;
8530 	key.offset = 0;
8531 	btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
8532 	path = btrfs_alloc_path();
8533 	if (!path)
8534 		return -ENOMEM;
8535 
8536 	cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
8537 	if (cache_gen != 0 &&
8538 	    btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
8539 		need_clear = 1;
8540 	if (btrfs_test_opt(root, CLEAR_CACHE))
8541 		need_clear = 1;
8542 	if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
8543 		printk(KERN_INFO "btrfs: disk space caching is enabled\n");
8544 
8545 	while (1) {
8546 		ret = find_first_block_group(root, path, &key);
8547 		if (ret > 0)
8548 			break;
8549 		if (ret != 0)
8550 			goto error;
8551 		leaf = path->nodes[0];
8552 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8553 		cache = kzalloc(sizeof(*cache), GFP_NOFS);
8554 		if (!cache) {
8555 			ret = -ENOMEM;
8556 			goto error;
8557 		}
8558 
8559 		atomic_set(&cache->count, 1);
8560 		spin_lock_init(&cache->lock);
8561 		spin_lock_init(&cache->tree_lock);
8562 		cache->fs_info = info;
8563 		INIT_LIST_HEAD(&cache->list);
8564 		INIT_LIST_HEAD(&cache->cluster_list);
8565 
8566 		if (need_clear)
8567 			cache->disk_cache_state = BTRFS_DC_CLEAR;
8568 
8569 		/*
8570 		 * we only want to have 32k of ram per block group for keeping
8571 		 * track of free space, and if we pass 1/2 of that we want to
8572 		 * start converting things over to using bitmaps
8573 		 */
8574 		cache->extents_thresh = ((1024 * 32) / 2) /
8575 			sizeof(struct btrfs_free_space);
8576 
8577 		read_extent_buffer(leaf, &cache->item,
8578 				   btrfs_item_ptr_offset(leaf, path->slots[0]),
8579 				   sizeof(cache->item));
8580 		memcpy(&cache->key, &found_key, sizeof(found_key));
8581 
8582 		key.objectid = found_key.objectid + found_key.offset;
8583 		btrfs_release_path(root, path);
8584 		cache->flags = btrfs_block_group_flags(&cache->item);
8585 		cache->sectorsize = root->sectorsize;
8586 
8587 		/*
8588 		 * We need to exclude the super stripes now so that the space
8589 		 * info has super bytes accounted for, otherwise we'll think
8590 		 * we have more space than we actually do.
8591 		 */
8592 		exclude_super_stripes(root, cache);
8593 
8594 		/*
8595 		 * check for two cases, either we are full, and therefore
8596 		 * don't need to bother with the caching work since we won't
8597 		 * find any space, or we are empty, and we can just add all
8598 		 * the space in and be done with it.  This saves us _alot_ of
8599 		 * time, particularly in the full case.
8600 		 */
8601 		if (found_key.offset == btrfs_block_group_used(&cache->item)) {
8602 			cache->last_byte_to_unpin = (u64)-1;
8603 			cache->cached = BTRFS_CACHE_FINISHED;
8604 			free_excluded_extents(root, cache);
8605 		} else if (btrfs_block_group_used(&cache->item) == 0) {
8606 			cache->last_byte_to_unpin = (u64)-1;
8607 			cache->cached = BTRFS_CACHE_FINISHED;
8608 			add_new_free_space(cache, root->fs_info,
8609 					   found_key.objectid,
8610 					   found_key.objectid +
8611 					   found_key.offset);
8612 			free_excluded_extents(root, cache);
8613 		}
8614 
8615 		ret = update_space_info(info, cache->flags, found_key.offset,
8616 					btrfs_block_group_used(&cache->item),
8617 					&space_info);
8618 		BUG_ON(ret);
8619 		cache->space_info = space_info;
8620 		spin_lock(&cache->space_info->lock);
8621 		cache->space_info->bytes_readonly += cache->bytes_super;
8622 		spin_unlock(&cache->space_info->lock);
8623 
8624 		__link_block_group(space_info, cache);
8625 
8626 		ret = btrfs_add_block_group_cache(root->fs_info, cache);
8627 		BUG_ON(ret);
8628 
8629 		set_avail_alloc_bits(root->fs_info, cache->flags);
8630 		if (btrfs_chunk_readonly(root, cache->key.objectid))
8631 			set_block_group_ro(cache);
8632 	}
8633 
8634 	list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
8635 		if (!(get_alloc_profile(root, space_info->flags) &
8636 		      (BTRFS_BLOCK_GROUP_RAID10 |
8637 		       BTRFS_BLOCK_GROUP_RAID1 |
8638 		       BTRFS_BLOCK_GROUP_DUP)))
8639 			continue;
8640 		/*
8641 		 * avoid allocating from un-mirrored block group if there are
8642 		 * mirrored block groups.
8643 		 */
8644 		list_for_each_entry(cache, &space_info->block_groups[3], list)
8645 			set_block_group_ro(cache);
8646 		list_for_each_entry(cache, &space_info->block_groups[4], list)
8647 			set_block_group_ro(cache);
8648 	}
8649 
8650 	init_global_block_rsv(info);
8651 	ret = 0;
8652 error:
8653 	btrfs_free_path(path);
8654 	return ret;
8655 }
8656 
btrfs_make_block_group(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 bytes_used,u64 type,u64 chunk_objectid,u64 chunk_offset,u64 size)8657 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8658 			   struct btrfs_root *root, u64 bytes_used,
8659 			   u64 type, u64 chunk_objectid, u64 chunk_offset,
8660 			   u64 size)
8661 {
8662 	int ret;
8663 	struct btrfs_root *extent_root;
8664 	struct btrfs_block_group_cache *cache;
8665 
8666 	extent_root = root->fs_info->extent_root;
8667 
8668 	root->fs_info->last_trans_log_full_commit = trans->transid;
8669 
8670 	cache = kzalloc(sizeof(*cache), GFP_NOFS);
8671 	if (!cache)
8672 		return -ENOMEM;
8673 
8674 	cache->key.objectid = chunk_offset;
8675 	cache->key.offset = size;
8676 	cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8677 	cache->sectorsize = root->sectorsize;
8678 	cache->fs_info = root->fs_info;
8679 
8680 	/*
8681 	 * we only want to have 32k of ram per block group for keeping track
8682 	 * of free space, and if we pass 1/2 of that we want to start
8683 	 * converting things over to using bitmaps
8684 	 */
8685 	cache->extents_thresh = ((1024 * 32) / 2) /
8686 		sizeof(struct btrfs_free_space);
8687 	atomic_set(&cache->count, 1);
8688 	spin_lock_init(&cache->lock);
8689 	spin_lock_init(&cache->tree_lock);
8690 	INIT_LIST_HEAD(&cache->list);
8691 	INIT_LIST_HEAD(&cache->cluster_list);
8692 
8693 	btrfs_set_block_group_used(&cache->item, bytes_used);
8694 	btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
8695 	cache->flags = type;
8696 	btrfs_set_block_group_flags(&cache->item, type);
8697 
8698 	cache->last_byte_to_unpin = (u64)-1;
8699 	cache->cached = BTRFS_CACHE_FINISHED;
8700 	exclude_super_stripes(root, cache);
8701 
8702 	add_new_free_space(cache, root->fs_info, chunk_offset,
8703 			   chunk_offset + size);
8704 
8705 	free_excluded_extents(root, cache);
8706 
8707 	ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
8708 				&cache->space_info);
8709 	BUG_ON(ret);
8710 
8711 	spin_lock(&cache->space_info->lock);
8712 	cache->space_info->bytes_readonly += cache->bytes_super;
8713 	spin_unlock(&cache->space_info->lock);
8714 
8715 	__link_block_group(cache->space_info, cache);
8716 
8717 	ret = btrfs_add_block_group_cache(root->fs_info, cache);
8718 	BUG_ON(ret);
8719 
8720 	ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
8721 				sizeof(cache->item));
8722 	BUG_ON(ret);
8723 
8724 	set_avail_alloc_bits(extent_root->fs_info, type);
8725 
8726 	return 0;
8727 }
8728 
btrfs_remove_block_group(struct btrfs_trans_handle * trans,struct btrfs_root * root,u64 group_start)8729 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8730 			     struct btrfs_root *root, u64 group_start)
8731 {
8732 	struct btrfs_path *path;
8733 	struct btrfs_block_group_cache *block_group;
8734 	struct btrfs_free_cluster *cluster;
8735 	struct btrfs_root *tree_root = root->fs_info->tree_root;
8736 	struct btrfs_key key;
8737 	struct inode *inode;
8738 	int ret;
8739 	int factor;
8740 
8741 	root = root->fs_info->extent_root;
8742 
8743 	block_group = btrfs_lookup_block_group(root->fs_info, group_start);
8744 	BUG_ON(!block_group);
8745 	BUG_ON(!block_group->ro);
8746 
8747 	/*
8748 	 * Free the reserved super bytes from this block group before
8749 	 * remove it.
8750 	 */
8751 	free_excluded_extents(root, block_group);
8752 
8753 	memcpy(&key, &block_group->key, sizeof(key));
8754 	if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
8755 				  BTRFS_BLOCK_GROUP_RAID1 |
8756 				  BTRFS_BLOCK_GROUP_RAID10))
8757 		factor = 2;
8758 	else
8759 		factor = 1;
8760 
8761 	/* make sure this block group isn't part of an allocation cluster */
8762 	cluster = &root->fs_info->data_alloc_cluster;
8763 	spin_lock(&cluster->refill_lock);
8764 	btrfs_return_cluster_to_free_space(block_group, cluster);
8765 	spin_unlock(&cluster->refill_lock);
8766 
8767 	/*
8768 	 * make sure this block group isn't part of a metadata
8769 	 * allocation cluster
8770 	 */
8771 	cluster = &root->fs_info->meta_alloc_cluster;
8772 	spin_lock(&cluster->refill_lock);
8773 	btrfs_return_cluster_to_free_space(block_group, cluster);
8774 	spin_unlock(&cluster->refill_lock);
8775 
8776 	path = btrfs_alloc_path();
8777 	BUG_ON(!path);
8778 
8779 	inode = lookup_free_space_inode(root, block_group, path);
8780 	if (!IS_ERR(inode)) {
8781 		btrfs_orphan_add(trans, inode);
8782 		clear_nlink(inode);
8783 		/* One for the block groups ref */
8784 		spin_lock(&block_group->lock);
8785 		if (block_group->iref) {
8786 			block_group->iref = 0;
8787 			block_group->inode = NULL;
8788 			spin_unlock(&block_group->lock);
8789 			iput(inode);
8790 		} else {
8791 			spin_unlock(&block_group->lock);
8792 		}
8793 		/* One for our lookup ref */
8794 		iput(inode);
8795 	}
8796 
8797 	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
8798 	key.offset = block_group->key.objectid;
8799 	key.type = 0;
8800 
8801 	ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
8802 	if (ret < 0)
8803 		goto out;
8804 	if (ret > 0)
8805 		btrfs_release_path(tree_root, path);
8806 	if (ret == 0) {
8807 		ret = btrfs_del_item(trans, tree_root, path);
8808 		if (ret)
8809 			goto out;
8810 		btrfs_release_path(tree_root, path);
8811 	}
8812 
8813 	spin_lock(&root->fs_info->block_group_cache_lock);
8814 	rb_erase(&block_group->cache_node,
8815 		 &root->fs_info->block_group_cache_tree);
8816 	spin_unlock(&root->fs_info->block_group_cache_lock);
8817 
8818 	down_write(&block_group->space_info->groups_sem);
8819 	/*
8820 	 * we must use list_del_init so people can check to see if they
8821 	 * are still on the list after taking the semaphore
8822 	 */
8823 	list_del_init(&block_group->list);
8824 	up_write(&block_group->space_info->groups_sem);
8825 
8826 	if (block_group->cached == BTRFS_CACHE_STARTED)
8827 		wait_block_group_cache_done(block_group);
8828 
8829 	btrfs_remove_free_space_cache(block_group);
8830 
8831 	spin_lock(&block_group->space_info->lock);
8832 	block_group->space_info->total_bytes -= block_group->key.offset;
8833 	block_group->space_info->bytes_readonly -= block_group->key.offset;
8834 	block_group->space_info->disk_total -= block_group->key.offset * factor;
8835 	spin_unlock(&block_group->space_info->lock);
8836 
8837 	memcpy(&key, &block_group->key, sizeof(key));
8838 
8839 	btrfs_clear_space_info_full(root->fs_info);
8840 
8841 	btrfs_put_block_group(block_group);
8842 	btrfs_put_block_group(block_group);
8843 
8844 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
8845 	if (ret > 0)
8846 		ret = -EIO;
8847 	if (ret < 0)
8848 		goto out;
8849 
8850 	ret = btrfs_del_item(trans, root, path);
8851 out:
8852 	btrfs_free_path(path);
8853 	return ret;
8854 }
8855 
btrfs_init_space_info(struct btrfs_fs_info * fs_info)8856 int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
8857 {
8858 	struct btrfs_space_info *space_info;
8859 	struct btrfs_super_block *disk_super;
8860 	u64 features;
8861 	u64 flags;
8862 	int mixed = 0;
8863 	int ret;
8864 
8865 	disk_super = &fs_info->super_copy;
8866 	if (!btrfs_super_root(disk_super))
8867 		return 1;
8868 
8869 	features = btrfs_super_incompat_flags(disk_super);
8870 	if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
8871 		mixed = 1;
8872 
8873 	flags = BTRFS_BLOCK_GROUP_SYSTEM;
8874 	ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8875 	if (ret)
8876 		goto out;
8877 
8878 	if (mixed) {
8879 		flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
8880 		ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8881 	} else {
8882 		flags = BTRFS_BLOCK_GROUP_METADATA;
8883 		ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8884 		if (ret)
8885 			goto out;
8886 
8887 		flags = BTRFS_BLOCK_GROUP_DATA;
8888 		ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8889 	}
8890 out:
8891 	return ret;
8892 }
8893 
btrfs_error_unpin_extent_range(struct btrfs_root * root,u64 start,u64 end)8894 int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
8895 {
8896 	return unpin_extent_range(root, start, end);
8897 }
8898 
btrfs_error_discard_extent(struct btrfs_root * root,u64 bytenr,u64 num_bytes,u64 * actual_bytes)8899 int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
8900 			       u64 num_bytes, u64 *actual_bytes)
8901 {
8902 	return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
8903 }
8904 
btrfs_trim_fs(struct btrfs_root * root,struct fstrim_range * range)8905 int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
8906 {
8907 	struct btrfs_fs_info *fs_info = root->fs_info;
8908 	struct btrfs_block_group_cache *cache = NULL;
8909 	u64 group_trimmed;
8910 	u64 start;
8911 	u64 end;
8912 	u64 trimmed = 0;
8913 	int ret = 0;
8914 
8915 	cache = btrfs_lookup_block_group(fs_info, range->start);
8916 
8917 	while (cache) {
8918 		if (cache->key.objectid >= (range->start + range->len)) {
8919 			btrfs_put_block_group(cache);
8920 			break;
8921 		}
8922 
8923 		start = max(range->start, cache->key.objectid);
8924 		end = min(range->start + range->len,
8925 				cache->key.objectid + cache->key.offset);
8926 
8927 		if (end - start >= range->minlen) {
8928 			if (!block_group_cache_done(cache)) {
8929 				ret = cache_block_group(cache, NULL, root, 0);
8930 				if (!ret)
8931 					wait_block_group_cache_done(cache);
8932 			}
8933 			ret = btrfs_trim_block_group(cache,
8934 						     &group_trimmed,
8935 						     start,
8936 						     end,
8937 						     range->minlen);
8938 
8939 			trimmed += group_trimmed;
8940 			if (ret) {
8941 				btrfs_put_block_group(cache);
8942 				break;
8943 			}
8944 		}
8945 
8946 		cache = next_block_group(fs_info->tree_root, cache);
8947 	}
8948 
8949 	range->len = trimmed;
8950 	return ret;
8951 }
8952