1 /*
2  *  linux/fs/ext3/balloc.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10  *  Big-endian to little-endian byte-swapping/bitmaps by
11  *        David S. Miller (davem@caip.rutgers.edu), 1995
12  */
13 
14 #include <linux/config.h>
15 #include <linux/sched.h>
16 #include <linux/fs.h>
17 #include <linux/jbd.h>
18 #include <linux/ext3_fs.h>
19 #include <linux/ext3_jbd.h>
20 #include <linux/locks.h>
21 #include <linux/quotaops.h>
22 
23 /*
24  * balloc.c contains the blocks allocation and deallocation routines
25  */
26 
27 /*
28  * The free blocks are managed by bitmaps.  A file system contains several
29  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
30  * block for inodes, N blocks for the inode table and data blocks.
31  *
32  * The file system contains group descriptors which are located after the
33  * super block.  Each descriptor contains the number of the bitmap block and
34  * the free blocks count in the block.  The descriptors are loaded in memory
35  * when a file system is mounted (see ext3_read_super).
36  */
37 
38 
39 #define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
40 
ext3_get_group_desc(struct super_block * sb,unsigned int block_group,struct buffer_head ** bh)41 struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
42 					     unsigned int block_group,
43 					     struct buffer_head ** bh)
44 {
45 	unsigned long group_desc;
46 	unsigned long desc;
47 	struct ext3_group_desc * gdp;
48 
49 	if (block_group >= sb->u.ext3_sb.s_groups_count) {
50 		ext3_error (sb, "ext3_get_group_desc",
51 			    "block_group >= groups_count - "
52 			    "block_group = %d, groups_count = %lu",
53 			    block_group, sb->u.ext3_sb.s_groups_count);
54 
55 		return NULL;
56 	}
57 
58 	group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
59 	desc = block_group % EXT3_DESC_PER_BLOCK(sb);
60 	if (!sb->u.ext3_sb.s_group_desc[group_desc]) {
61 		ext3_error (sb, "ext3_get_group_desc",
62 			    "Group descriptor not loaded - "
63 			    "block_group = %d, group_desc = %lu, desc = %lu",
64 			     block_group, group_desc, desc);
65 		return NULL;
66 	}
67 
68 	gdp = (struct ext3_group_desc *)
69 	      sb->u.ext3_sb.s_group_desc[group_desc]->b_data;
70 	if (bh)
71 		*bh = sb->u.ext3_sb.s_group_desc[group_desc];
72 	return gdp + desc;
73 }
74 
75 /*
76  * Read the bitmap for a given block_group, reading into the specified
77  * slot in the superblock's bitmap cache.
78  *
79  * Return >=0 on success or a -ve error code.
80  */
81 
read_block_bitmap(struct super_block * sb,unsigned int block_group,unsigned long bitmap_nr)82 static int read_block_bitmap (struct super_block * sb,
83 			       unsigned int block_group,
84 			       unsigned long bitmap_nr)
85 {
86 	struct ext3_group_desc * gdp;
87 	struct buffer_head * bh = NULL;
88 	int retval = -EIO;
89 
90 	gdp = ext3_get_group_desc (sb, block_group, NULL);
91 	if (!gdp)
92 		goto error_out;
93 	retval = 0;
94 	bh = sb_bread(sb, le32_to_cpu(gdp->bg_block_bitmap));
95 	if (!bh) {
96 		ext3_error (sb, "read_block_bitmap",
97 			    "Cannot read block bitmap - "
98 			    "block_group = %d, block_bitmap = %lu",
99 			    block_group, (unsigned long) gdp->bg_block_bitmap);
100 		retval = -EIO;
101 	}
102 	/*
103 	 * On IO error, just leave a zero in the superblock's block pointer for
104 	 * this group.  The IO will be retried next time.
105 	 */
106 error_out:
107 	sb->u.ext3_sb.s_block_bitmap_number[bitmap_nr] = block_group;
108 	sb->u.ext3_sb.s_block_bitmap[bitmap_nr] = bh;
109 	return retval;
110 }
111 
112 /*
113  * load_block_bitmap loads the block bitmap for a blocks group
114  *
115  * It maintains a cache for the last bitmaps loaded.  This cache is managed
116  * with a LRU algorithm.
117  *
118  * Notes:
119  * 1/ There is one cache per mounted file system.
120  * 2/ If the file system contains less than EXT3_MAX_GROUP_LOADED groups,
121  *    this function reads the bitmap without maintaining a LRU cache.
122  *
123  * Return the slot used to store the bitmap, or a -ve error code.
124  */
__load_block_bitmap(struct super_block * sb,unsigned int block_group)125 static int __load_block_bitmap (struct super_block * sb,
126 			        unsigned int block_group)
127 {
128 	int i, j, retval = 0;
129 	unsigned long block_bitmap_number;
130 	struct buffer_head * block_bitmap;
131 
132 	if (block_group >= sb->u.ext3_sb.s_groups_count)
133 		ext3_panic (sb, "load_block_bitmap",
134 			    "block_group >= groups_count - "
135 			    "block_group = %d, groups_count = %lu",
136 			    block_group, sb->u.ext3_sb.s_groups_count);
137 
138 	if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED) {
139 		if (sb->u.ext3_sb.s_block_bitmap[block_group]) {
140 			if (sb->u.ext3_sb.s_block_bitmap_number[block_group] ==
141 			    block_group)
142 				return block_group;
143 			ext3_error (sb, "__load_block_bitmap",
144 				    "block_group != block_bitmap_number");
145 		}
146 		retval = read_block_bitmap (sb, block_group, block_group);
147 		if (retval < 0)
148 			return retval;
149 		return block_group;
150 	}
151 
152 	for (i = 0; i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
153 		    sb->u.ext3_sb.s_block_bitmap_number[i] != block_group; i++)
154 		;
155 	if (i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
156   	    sb->u.ext3_sb.s_block_bitmap_number[i] == block_group) {
157 		block_bitmap_number = sb->u.ext3_sb.s_block_bitmap_number[i];
158 		block_bitmap = sb->u.ext3_sb.s_block_bitmap[i];
159 		for (j = i; j > 0; j--) {
160 			sb->u.ext3_sb.s_block_bitmap_number[j] =
161 				sb->u.ext3_sb.s_block_bitmap_number[j - 1];
162 			sb->u.ext3_sb.s_block_bitmap[j] =
163 				sb->u.ext3_sb.s_block_bitmap[j - 1];
164 		}
165 		sb->u.ext3_sb.s_block_bitmap_number[0] = block_bitmap_number;
166 		sb->u.ext3_sb.s_block_bitmap[0] = block_bitmap;
167 
168 		/*
169 		 * There's still one special case here --- if block_bitmap == 0
170 		 * then our last attempt to read the bitmap failed and we have
171 		 * just ended up caching that failure.  Try again to read it.
172 		 */
173 		if (!block_bitmap)
174 			retval = read_block_bitmap (sb, block_group, 0);
175 	} else {
176 		if (sb->u.ext3_sb.s_loaded_block_bitmaps<EXT3_MAX_GROUP_LOADED)
177 			sb->u.ext3_sb.s_loaded_block_bitmaps++;
178 		else
179 			brelse (sb->u.ext3_sb.s_block_bitmap
180 					[EXT3_MAX_GROUP_LOADED - 1]);
181 		for (j = sb->u.ext3_sb.s_loaded_block_bitmaps - 1;
182 					j > 0;  j--) {
183 			sb->u.ext3_sb.s_block_bitmap_number[j] =
184 				sb->u.ext3_sb.s_block_bitmap_number[j - 1];
185 			sb->u.ext3_sb.s_block_bitmap[j] =
186 				sb->u.ext3_sb.s_block_bitmap[j - 1];
187 		}
188 		retval = read_block_bitmap (sb, block_group, 0);
189 	}
190 	return retval;
191 }
192 
193 /*
194  * Load the block bitmap for a given block group.  First of all do a couple
195  * of fast lookups for common cases and then pass the request onto the guts
196  * of the bitmap loader.
197  *
198  * Return the slot number of the group in the superblock bitmap cache's on
199  * success, or a -ve error code.
200  *
201  * There is still one inconsistency here --- if the number of groups in this
202  * filesystems is <= EXT3_MAX_GROUP_LOADED, then we have no way of
203  * differentiating between a group for which we have never performed a bitmap
204  * IO request, and a group for which the last bitmap read request failed.
205  */
load_block_bitmap(struct super_block * sb,unsigned int block_group)206 static inline int load_block_bitmap (struct super_block * sb,
207 				     unsigned int block_group)
208 {
209 	int slot;
210 
211 	/*
212 	 * Do the lookup for the slot.  First of all, check if we're asking
213 	 * for the same slot as last time, and did we succeed that last time?
214 	 */
215 	if (sb->u.ext3_sb.s_loaded_block_bitmaps > 0 &&
216 	    sb->u.ext3_sb.s_block_bitmap_number[0] == block_group &&
217 	    sb->u.ext3_sb.s_block_bitmap[0]) {
218 		return 0;
219 	}
220 	/*
221 	 * Or can we do a fast lookup based on a loaded group on a filesystem
222 	 * small enough to be mapped directly into the superblock?
223 	 */
224 	else if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED &&
225 		 sb->u.ext3_sb.s_block_bitmap_number[block_group]==block_group
226 			&& sb->u.ext3_sb.s_block_bitmap[block_group]) {
227 		slot = block_group;
228 	}
229 	/*
230 	 * If not, then do a full lookup for this block group.
231 	 */
232 	else {
233 		slot = __load_block_bitmap (sb, block_group);
234 	}
235 
236 	/*
237 	 * <0 means we just got an error
238 	 */
239 	if (slot < 0)
240 		return slot;
241 
242 	/*
243 	 * If it's a valid slot, we may still have cached a previous IO error,
244 	 * in which case the bh in the superblock cache will be zero.
245 	 */
246 	if (!sb->u.ext3_sb.s_block_bitmap[slot])
247 		return -EIO;
248 
249 	/*
250 	 * Must have been read in OK to get this far.
251 	 */
252 	return slot;
253 }
254 
255 /* Free given blocks, update quota and i_blocks field */
ext3_free_blocks(handle_t * handle,struct inode * inode,unsigned long block,unsigned long count)256 void ext3_free_blocks (handle_t *handle, struct inode * inode,
257 			unsigned long block, unsigned long count)
258 {
259 	struct buffer_head *bitmap_bh;
260 	struct buffer_head *gd_bh;
261 	unsigned long block_group;
262 	unsigned long bit;
263 	unsigned long i;
264 	int bitmap_nr;
265 	unsigned long overflow;
266 	struct super_block * sb;
267 	struct ext3_group_desc * gdp;
268 	struct ext3_super_block * es;
269 	int err = 0, ret;
270 	int dquot_freed_blocks = 0;
271 
272 	sb = inode->i_sb;
273 	if (!sb) {
274 		printk ("ext3_free_blocks: nonexistent device");
275 		return;
276 	}
277 	lock_super (sb);
278 	es = sb->u.ext3_sb.s_es;
279 	if (block < le32_to_cpu(es->s_first_data_block) ||
280 	    block + count < block ||
281 	    (block + count) > le32_to_cpu(es->s_blocks_count)) {
282 		ext3_error (sb, "ext3_free_blocks",
283 			    "Freeing blocks not in datazone - "
284 			    "block = %lu, count = %lu", block, count);
285 		goto error_return;
286 	}
287 
288 	ext3_debug ("freeing block %lu\n", block);
289 
290 do_more:
291 	overflow = 0;
292 	block_group = (block - le32_to_cpu(es->s_first_data_block)) /
293 		      EXT3_BLOCKS_PER_GROUP(sb);
294 	bit = (block - le32_to_cpu(es->s_first_data_block)) %
295 		      EXT3_BLOCKS_PER_GROUP(sb);
296 	/*
297 	 * Check to see if we are freeing blocks across a group
298 	 * boundary.
299 	 */
300 	if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
301 		overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
302 		count -= overflow;
303 	}
304 	bitmap_nr = load_block_bitmap (sb, block_group);
305 	if (bitmap_nr < 0)
306 		goto error_return;
307 
308 	bitmap_bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
309 	gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
310 	if (!gdp)
311 		goto error_return;
312 
313 	/*
314 	 * We are about to start releasing blocks in the bitmap,
315 	 * so we need undo access.
316 	 */
317 	/* @@@ check errors */
318 	BUFFER_TRACE(bitmap_bh, "getting undo access");
319 	err = ext3_journal_get_undo_access(handle, bitmap_bh);
320 	if (err)
321 		goto error_return;
322 
323 	/*
324 	 * We are about to modify some metadata.  Call the journal APIs
325 	 * to unshare ->b_data if a currently-committing transaction is
326 	 * using it
327 	 */
328 	BUFFER_TRACE(gd_bh, "get_write_access");
329 	err = ext3_journal_get_write_access(handle, gd_bh);
330 	if (err)
331 		goto error_return;
332 
333 	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
334 	err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
335 	if (err)
336 		goto error_return;
337 
338 	for (i = 0; i < count; i++, block++) {
339 		if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
340 		    block == le32_to_cpu(gdp->bg_inode_bitmap) ||
341 		    in_range(block, le32_to_cpu(gdp->bg_inode_table),
342 			     EXT3_SB(sb)->s_itb_per_group)) {
343 			ext3_error(sb, __FUNCTION__,
344 				   "Freeing block in system zone - block = %lu",
345 				   block);
346 			continue;
347 		}
348 
349 		/*
350 		 * An HJ special.  This is expensive...
351 		 */
352 #ifdef CONFIG_JBD_DEBUG
353 		{
354 			struct buffer_head *debug_bh;
355 			debug_bh = sb_get_hash_table(sb, block);
356 			if (debug_bh) {
357 				BUFFER_TRACE(debug_bh, "Deleted!");
358 				if (!bh2jh(bitmap_bh)->b_committed_data)
359 					BUFFER_TRACE(debug_bh,
360 						"No commited data in bitmap");
361 				BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
362 				__brelse(debug_bh);
363 			}
364 		}
365 #endif
366 		BUFFER_TRACE(bitmap_bh, "clear bit");
367 		if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) {
368 			ext3_error(sb, __FUNCTION__,
369 				   "bit already cleared for block %lu", block);
370 			BUFFER_TRACE(bitmap_bh, "bit already cleared");
371 		} else {
372 			dquot_freed_blocks++;
373 			gdp->bg_free_blocks_count =
374 			  cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)+1);
375 			es->s_free_blocks_count =
376 			  cpu_to_le32(le32_to_cpu(es->s_free_blocks_count)+1);
377 		}
378 		/* @@@ This prevents newly-allocated data from being
379 		 * freed and then reallocated within the same
380 		 * transaction.
381 		 *
382 		 * Ideally we would want to allow that to happen, but to
383 		 * do so requires making journal_forget() capable of
384 		 * revoking the queued write of a data block, which
385 		 * implies blocking on the journal lock.  *forget()
386 		 * cannot block due to truncate races.
387 		 *
388 		 * Eventually we can fix this by making journal_forget()
389 		 * return a status indicating whether or not it was able
390 		 * to revoke the buffer.  On successful revoke, it is
391 		 * safe not to set the allocation bit in the committed
392 		 * bitmap, because we know that there is no outstanding
393 		 * activity on the buffer any more and so it is safe to
394 		 * reallocate it.
395 		 */
396 		BUFFER_TRACE(bitmap_bh, "clear in b_committed_data");
397 		J_ASSERT_BH(bitmap_bh,
398 				bh2jh(bitmap_bh)->b_committed_data != NULL);
399 		ext3_set_bit(bit + i, bh2jh(bitmap_bh)->b_committed_data);
400 	}
401 
402 	/* We dirtied the bitmap block */
403 	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
404 	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
405 
406 	/* And the group descriptor block */
407 	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
408 	ret = ext3_journal_dirty_metadata(handle, gd_bh);
409 	if (!err) err = ret;
410 
411 	/* And the superblock */
412 	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "dirtied superblock");
413 	ret = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
414 	if (!err) err = ret;
415 
416 	if (overflow && !err) {
417 		count = overflow;
418 		goto do_more;
419 	}
420 	sb->s_dirt = 1;
421 error_return:
422 	ext3_std_error(sb, err);
423 	unlock_super(sb);
424 	if (dquot_freed_blocks)
425 		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
426 	return;
427 }
428 
429 /* For ext3 allocations, we must not reuse any blocks which are
430  * allocated in the bitmap buffer's "last committed data" copy.  This
431  * prevents deletes from freeing up the page for reuse until we have
432  * committed the delete transaction.
433  *
434  * If we didn't do this, then deleting something and reallocating it as
435  * data would allow the old block to be overwritten before the
436  * transaction committed (because we force data to disk before commit).
437  * This would lead to corruption if we crashed between overwriting the
438  * data and committing the delete.
439  *
440  * @@@ We may want to make this allocation behaviour conditional on
441  * data-writes at some point, and disable it for metadata allocations or
442  * sync-data inodes.
443  */
ext3_test_allocatable(int nr,struct buffer_head * bh)444 static int ext3_test_allocatable(int nr, struct buffer_head *bh)
445 {
446 	if (ext3_test_bit(nr, bh->b_data))
447 		return 0;
448 	if (!buffer_jbd(bh) || !bh2jh(bh)->b_committed_data)
449 		return 1;
450 	return !ext3_test_bit(nr, bh2jh(bh)->b_committed_data);
451 }
452 
453 /*
454  * Find an allocatable block in a bitmap.  We honour both the bitmap and
455  * its last-committed copy (if that exists), and perform the "most
456  * appropriate allocation" algorithm of looking for a free block near
457  * the initial goal; then for a free byte somewhere in the bitmap; then
458  * for any free bit in the bitmap.
459  */
find_next_usable_block(int start,struct buffer_head * bh,int maxblocks)460 static int find_next_usable_block(int start,
461 			struct buffer_head *bh, int maxblocks)
462 {
463 	int here, next;
464 	char *p, *r;
465 
466 	if (start > 0) {
467 		/*
468 		 * The goal was occupied; search forward for a free
469 		 * block within the next XX blocks.
470 		 *
471 		 * end_goal is more or less random, but it has to be
472 		 * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
473 		 * next 64-bit boundary is simple..
474 		 */
475 		int end_goal = (start + 63) & ~63;
476 		here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
477 		if (here < end_goal && ext3_test_allocatable(here, bh))
478 			return here;
479 
480 		ext3_debug ("Bit not found near goal\n");
481 
482 	}
483 
484 	here = start;
485 	if (here < 0)
486 		here = 0;
487 
488 	/*
489 	 * There has been no free block found in the near vicinity of
490 	 * the goal: do a search forward through the block groups,
491 	 * searching in each group first for an entire free byte in the
492 	 * bitmap and then for any free bit.
493 	 *
494 	 * Search first in the remainder of the current group
495 	 */
496 	p = ((char *) bh->b_data) + (here >> 3);
497 	r = memscan(p, 0, (maxblocks - here + 7) >> 3);
498 	next = (r - ((char *) bh->b_data)) << 3;
499 
500 	if (next < maxblocks && ext3_test_allocatable(next, bh))
501 		return next;
502 
503 	/* The bitmap search --- search forward alternately
504 	 * through the actual bitmap and the last-committed copy
505 	 * until we find a bit free in both. */
506 
507 	while (here < maxblocks) {
508 		next  = ext3_find_next_zero_bit ((unsigned long *) bh->b_data,
509 						 maxblocks, here);
510 		if (next >= maxblocks)
511 			return -1;
512 		if (ext3_test_allocatable(next, bh))
513 			return next;
514 
515 		J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data);
516 		here = ext3_find_next_zero_bit
517 			((unsigned long *) bh2jh(bh)->b_committed_data,
518 			 maxblocks, next);
519 	}
520 	return -1;
521 }
522 
523 /*
524  * ext3_new_block uses a goal block to assist allocation.  If the goal is
525  * free, or there is a free block within 32 blocks of the goal, that block
526  * is allocated.  Otherwise a forward search is made for a free block; within
527  * each block group the search first looks for an entire free byte in the block
528  * bitmap, and then for any free bit if that fails.
529  * This function also updates quota and i_blocks field.
530  */
ext3_new_block(handle_t * handle,struct inode * inode,unsigned long goal,u32 * prealloc_count,u32 * prealloc_block,int * errp)531 int ext3_new_block (handle_t *handle, struct inode * inode,
532 		unsigned long goal, u32 * prealloc_count,
533 		u32 * prealloc_block, int * errp)
534 {
535 	struct buffer_head * bh, *bhtmp;
536 	struct buffer_head * bh2;
537 #if 0
538 	char * p, * r;
539 #endif
540 	int i, j, k, tmp, alloctmp;
541 	int bitmap_nr;
542 	int fatal = 0, err;
543 	int performed_allocation = 0;
544 	struct super_block * sb;
545 	struct ext3_group_desc * gdp;
546 	struct ext3_super_block * es;
547 #ifdef EXT3FS_DEBUG
548 	static int goal_hits = 0, goal_attempts = 0;
549 #endif
550 	*errp = -ENOSPC;
551 	sb = inode->i_sb;
552 	if (!sb) {
553 		printk ("ext3_new_block: nonexistent device");
554 		return 0;
555 	}
556 
557 	/*
558 	 * Check quota for allocation of this block.
559 	 */
560 	if (DQUOT_ALLOC_BLOCK(inode, 1)) {
561 		*errp = -EDQUOT;
562 		return 0;
563 	}
564 
565 	lock_super (sb);
566 	es = sb->u.ext3_sb.s_es;
567 	if (le32_to_cpu(es->s_free_blocks_count) <=
568 			le32_to_cpu(es->s_r_blocks_count) &&
569 	    ((sb->u.ext3_sb.s_resuid != current->fsuid) &&
570 	     (sb->u.ext3_sb.s_resgid == 0 ||
571 	      !in_group_p (sb->u.ext3_sb.s_resgid)) &&
572 	     !capable(CAP_SYS_RESOURCE)))
573 		goto out;
574 
575 	ext3_debug ("goal=%lu.\n", goal);
576 
577 repeat:
578 	/*
579 	 * First, test whether the goal block is free.
580 	 */
581 	if (goal < le32_to_cpu(es->s_first_data_block) ||
582 	    goal >= le32_to_cpu(es->s_blocks_count))
583 		goal = le32_to_cpu(es->s_first_data_block);
584 	i = (goal - le32_to_cpu(es->s_first_data_block)) /
585 			EXT3_BLOCKS_PER_GROUP(sb);
586 	gdp = ext3_get_group_desc (sb, i, &bh2);
587 	if (!gdp)
588 		goto io_error;
589 
590 	if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
591 		j = ((goal - le32_to_cpu(es->s_first_data_block)) %
592 				EXT3_BLOCKS_PER_GROUP(sb));
593 #ifdef EXT3FS_DEBUG
594 		if (j)
595 			goal_attempts++;
596 #endif
597 		bitmap_nr = load_block_bitmap (sb, i);
598 		if (bitmap_nr < 0)
599 			goto io_error;
600 
601 		bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
602 
603 		ext3_debug ("goal is at %d:%d.\n", i, j);
604 
605 		if (ext3_test_allocatable(j, bh)) {
606 #ifdef EXT3FS_DEBUG
607 			goal_hits++;
608 			ext3_debug ("goal bit allocated.\n");
609 #endif
610 			goto got_block;
611 		}
612 
613 		j = find_next_usable_block(j, bh, EXT3_BLOCKS_PER_GROUP(sb));
614 		if (j >= 0)
615 			goto search_back;
616 	}
617 
618 	ext3_debug ("Bit not found in block group %d.\n", i);
619 
620 	/*
621 	 * Now search the rest of the groups.  We assume that
622 	 * i and gdp correctly point to the last group visited.
623 	 */
624 	for (k = 0; k < sb->u.ext3_sb.s_groups_count; k++) {
625 		i++;
626 		if (i >= sb->u.ext3_sb.s_groups_count)
627 			i = 0;
628 		gdp = ext3_get_group_desc (sb, i, &bh2);
629 		if (!gdp) {
630 			*errp = -EIO;
631 			goto out;
632 		}
633 		if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
634 			bitmap_nr = load_block_bitmap (sb, i);
635 			if (bitmap_nr < 0)
636 				goto io_error;
637 
638 			bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
639 			j = find_next_usable_block(-1, bh,
640 						   EXT3_BLOCKS_PER_GROUP(sb));
641 			if (j >= 0)
642 				goto search_back;
643 		}
644 	}
645 
646 	/* No space left on the device */
647 	goto out;
648 
649 search_back:
650 	/*
651 	 * We have succeeded in finding a free byte in the block
652 	 * bitmap.  Now search backwards up to 7 bits to find the
653 	 * start of this group of free blocks.
654 	 */
655 	for (	k = 0;
656 		k < 7 && j > 0 && ext3_test_allocatable(j - 1, bh);
657 		k++, j--)
658 		;
659 
660 got_block:
661 
662 	ext3_debug ("using block group %d(%d)\n", i, gdp->bg_free_blocks_count);
663 
664 	/* Make sure we use undo access for the bitmap, because it is
665            critical that we do the frozen_data COW on bitmap buffers in
666            all cases even if the buffer is in BJ_Forget state in the
667            committing transaction.  */
668 	BUFFER_TRACE(bh, "get undo access for marking new block");
669 	fatal = ext3_journal_get_undo_access(handle, bh);
670 	if (fatal) goto out;
671 
672 	BUFFER_TRACE(bh2, "get_write_access");
673 	fatal = ext3_journal_get_write_access(handle, bh2);
674 	if (fatal) goto out;
675 
676 	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
677 	fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
678 	if (fatal) goto out;
679 
680 	tmp = j + i * EXT3_BLOCKS_PER_GROUP(sb)
681 				+ le32_to_cpu(es->s_first_data_block);
682 
683 	if (tmp == le32_to_cpu(gdp->bg_block_bitmap) ||
684 	    tmp == le32_to_cpu(gdp->bg_inode_bitmap) ||
685 	    in_range (tmp, le32_to_cpu(gdp->bg_inode_table),
686 		      EXT3_SB(sb)->s_itb_per_group)) {
687 		ext3_error(sb, __FUNCTION__,
688 			   "Allocating block in system zone - block = %u", tmp);
689 
690 		/* Note: This will potentially use up one of the handle's
691 		 * buffer credits.  Normally we have way too many credits,
692 		 * so that is OK.  In _very_ rare cases it might not be OK.
693 		 * We will trigger an assertion if we run out of credits,
694 		 * and we will have to do a full fsck of the filesystem -
695 		 * better than randomly corrupting filesystem metadata.
696 		 */
697 		ext3_set_bit(j, bh->b_data);
698 		goto repeat;
699 	}
700 
701 
702 	/* The superblock lock should guard against anybody else beating
703 	 * us to this point! */
704 	J_ASSERT_BH(bh, !ext3_test_bit(j, bh->b_data));
705 	BUFFER_TRACE(bh, "setting bitmap bit");
706 	ext3_set_bit(j, bh->b_data);
707 	performed_allocation = 1;
708 
709 #ifdef CONFIG_JBD_DEBUG
710 	{
711 		struct buffer_head *debug_bh;
712 
713 		/* Record bitmap buffer state in the newly allocated block */
714 		debug_bh = sb_get_hash_table(sb, tmp);
715 		if (debug_bh) {
716 			BUFFER_TRACE(debug_bh, "state when allocated");
717 			BUFFER_TRACE2(debug_bh, bh, "bitmap state");
718 			brelse(debug_bh);
719 		}
720 	}
721 #endif
722 	if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data)
723 		J_ASSERT_BH(bh, !ext3_test_bit(j, bh2jh(bh)->b_committed_data));
724 	bhtmp = bh;
725 	alloctmp = j;
726 
727 	ext3_debug ("found bit %d\n", j);
728 
729 	/*
730 	 * Do block preallocation now if required.
731 	 */
732 #ifdef EXT3_PREALLOCATE
733 	/*
734 	 * akpm: this is not enabled for ext3.  Need to use
735 	 * ext3_test_allocatable()
736 	 */
737 	/* Writer: ->i_prealloc* */
738 	if (prealloc_count && !*prealloc_count) {
739 		int	prealloc_goal;
740 		unsigned long next_block = tmp + 1;
741 
742 		prealloc_goal = es->s_prealloc_blocks ?
743 			es->s_prealloc_blocks : EXT3_DEFAULT_PREALLOC_BLOCKS;
744 
745 		*prealloc_block = next_block;
746 		/* Writer: end */
747 		for (k = 1;
748 		     k < prealloc_goal && (j + k) < EXT3_BLOCKS_PER_GROUP(sb);
749 		     k++, next_block++) {
750 			if (DQUOT_PREALLOC_BLOCK(inode, 1))
751 				break;
752 			/* Writer: ->i_prealloc* */
753 			if (*prealloc_block + *prealloc_count != next_block ||
754 			    ext3_set_bit (j + k, bh->b_data)) {
755 				/* Writer: end */
756 				DQUOT_FREE_BLOCK(inode, 1);
757  				break;
758 			}
759 			(*prealloc_count)++;
760 			/* Writer: end */
761 		}
762 		/*
763 		 * As soon as we go for per-group spinlocks we'll need these
764 		 * done inside the loop above.
765 		 */
766 		gdp->bg_free_blocks_count =
767 			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
768 			       (k - 1));
769 		es->s_free_blocks_count =
770 			cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) -
771 			       (k - 1));
772 		ext3_debug ("Preallocated a further %lu bits.\n",
773 			       (k - 1));
774 	}
775 #endif
776 
777 	j = tmp;
778 
779 	BUFFER_TRACE(bh, "journal_dirty_metadata for bitmap block");
780 	err = ext3_journal_dirty_metadata(handle, bh);
781 	if (!fatal) fatal = err;
782 
783 	if (j >= le32_to_cpu(es->s_blocks_count)) {
784 		ext3_error (sb, "ext3_new_block",
785 			    "block(%d) >= blocks count(%d) - "
786 			    "block_group = %d, es == %p ",j,
787 			le32_to_cpu(es->s_blocks_count), i, es);
788 		goto out;
789 	}
790 
791 	/*
792 	 * It is up to the caller to add the new buffer to a journal
793 	 * list of some description.  We don't know in advance whether
794 	 * the caller wants to use it as metadata or data.
795 	 */
796 
797 	ext3_debug ("allocating block %d. "
798 		    "Goal hits %d of %d.\n", j, goal_hits, goal_attempts);
799 
800 	gdp->bg_free_blocks_count =
801 			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
802 	es->s_free_blocks_count =
803 			cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - 1);
804 
805 	BUFFER_TRACE(bh2, "journal_dirty_metadata for group descriptor");
806 	err = ext3_journal_dirty_metadata(handle, bh2);
807 	if (!fatal) fatal = err;
808 
809 	BUFFER_TRACE(bh, "journal_dirty_metadata for superblock");
810 	err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
811 	if (!fatal) fatal = err;
812 
813 	sb->s_dirt = 1;
814 	if (fatal)
815 		goto out;
816 
817 	unlock_super (sb);
818 	*errp = 0;
819 	return j;
820 
821 io_error:
822 	*errp = -EIO;
823 out:
824 	if (fatal) {
825 		*errp = fatal;
826 		ext3_std_error(sb, fatal);
827 	}
828 	unlock_super (sb);
829 	/*
830 	 * Undo the block allocation
831 	 */
832 	if (!performed_allocation)
833 		DQUOT_FREE_BLOCK(inode, 1);
834 	return 0;
835 
836 }
837 
ext3_count_free_blocks(struct super_block * sb)838 unsigned long ext3_count_free_blocks (struct super_block * sb)
839 {
840 #ifdef EXT3FS_DEBUG
841 	struct ext3_super_block * es;
842 	unsigned long desc_count, bitmap_count, x;
843 	int bitmap_nr;
844 	struct ext3_group_desc * gdp;
845 	int i;
846 
847 	lock_super (sb);
848 	es = sb->u.ext3_sb.s_es;
849 	desc_count = 0;
850 	bitmap_count = 0;
851 	gdp = NULL;
852 	for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
853 		gdp = ext3_get_group_desc (sb, i, NULL);
854 		if (!gdp)
855 			continue;
856 		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
857 		bitmap_nr = load_block_bitmap (sb, i);
858 		if (bitmap_nr < 0)
859 			continue;
860 
861 		x = ext3_count_free (sb->u.ext3_sb.s_block_bitmap[bitmap_nr],
862 				     sb->s_blocksize);
863 		printk ("group %d: stored = %d, counted = %lu\n",
864 			i, le16_to_cpu(gdp->bg_free_blocks_count), x);
865 		bitmap_count += x;
866 	}
867 	printk("ext3_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
868 	       le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count);
869 	unlock_super (sb);
870 	return bitmap_count;
871 #else
872 	return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count);
873 #endif
874 }
875 
block_in_use(unsigned long block,struct super_block * sb,unsigned char * map)876 static inline int block_in_use (unsigned long block,
877 				struct super_block * sb,
878 				unsigned char * map)
879 {
880 	return ext3_test_bit ((block -
881 		le32_to_cpu(sb->u.ext3_sb.s_es->s_first_data_block)) %
882 			 EXT3_BLOCKS_PER_GROUP(sb), map);
883 }
884 
test_root(int a,int b)885 static inline int test_root(int a, int b)
886 {
887 	if (a == 0)
888 		return 1;
889 	while (1) {
890 		if (a == 1)
891 			return 1;
892 		if (a % b)
893 			return 0;
894 		a = a / b;
895 	}
896 }
897 
ext3_group_sparse(int group)898 int ext3_group_sparse(int group)
899 {
900 	return (test_root(group, 3) || test_root(group, 5) ||
901 		test_root(group, 7));
902 }
903 
904 /**
905  *	ext3_bg_has_super - number of blocks used by the superblock in group
906  *	@sb: superblock for filesystem
907  *	@group: group number to check
908  *
909  *	Return the number of blocks used by the superblock (primary or backup)
910  *	in this group.  Currently this will be only 0 or 1.
911  */
ext3_bg_has_super(struct super_block * sb,int group)912 int ext3_bg_has_super(struct super_block *sb, int group)
913 {
914 	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
915 	    !ext3_group_sparse(group))
916 		return 0;
917 	return 1;
918 }
919 
920 /**
921  *	ext3_bg_num_gdb - number of blocks used by the group table in group
922  *	@sb: superblock for filesystem
923  *	@group: group number to check
924  *
925  *	Return the number of blocks used by the group descriptor table
926  *	(primary or backup) in this group.  In the future there may be a
927  *	different number of descriptor blocks in each group.
928  */
ext3_bg_num_gdb(struct super_block * sb,int group)929 unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
930 {
931 	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
932 	    !ext3_group_sparse(group))
933 		return 0;
934 	return EXT3_SB(sb)->s_gdb_count;
935 }
936 
937 #ifdef CONFIG_EXT3_CHECK
938 /* Called at mount-time, super-block is locked */
ext3_check_blocks_bitmap(struct super_block * sb)939 void ext3_check_blocks_bitmap (struct super_block * sb)
940 {
941 	struct buffer_head * bh;
942 	struct ext3_super_block * es;
943 	unsigned long desc_count, bitmap_count, x, j;
944 	unsigned long desc_blocks;
945 	int bitmap_nr;
946 	struct ext3_group_desc * gdp;
947 	int i;
948 
949 	es = sb->u.ext3_sb.s_es;
950 	desc_count = 0;
951 	bitmap_count = 0;
952 	gdp = NULL;
953 	for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
954 		gdp = ext3_get_group_desc (sb, i, NULL);
955 		if (!gdp)
956 			continue;
957 		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
958 		bitmap_nr = load_block_bitmap (sb, i);
959 		if (bitmap_nr < 0)
960 			continue;
961 
962 		bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
963 
964 		if (ext3_bg_has_super(sb, i) && !ext3_test_bit(0, bh->b_data))
965 			ext3_error(sb, __FUNCTION__,
966 				   "Superblock in group %d is marked free", i);
967 
968 		desc_blocks = ext3_bg_num_gdb(sb, i);
969 		for (j = 0; j < desc_blocks; j++)
970 			if (!ext3_test_bit(j + 1, bh->b_data))
971 				ext3_error(sb, __FUNCTION__,
972 					   "Descriptor block #%ld in group "
973 					   "%d is marked free", j, i);
974 
975 		if (!block_in_use (le32_to_cpu(gdp->bg_block_bitmap),
976 						sb, bh->b_data))
977 			ext3_error (sb, "ext3_check_blocks_bitmap",
978 				    "Block bitmap for group %d is marked free",
979 				    i);
980 
981 		if (!block_in_use (le32_to_cpu(gdp->bg_inode_bitmap),
982 						sb, bh->b_data))
983 			ext3_error (sb, "ext3_check_blocks_bitmap",
984 				    "Inode bitmap for group %d is marked free",
985 				    i);
986 
987 		for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++)
988 			if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
989 							sb, bh->b_data))
990 				ext3_error (sb, "ext3_check_blocks_bitmap",
991 					    "Block #%d of the inode table in "
992 					    "group %d is marked free", j, i);
993 
994 		x = ext3_count_free (bh, sb->s_blocksize);
995 		if (le16_to_cpu(gdp->bg_free_blocks_count) != x)
996 			ext3_error (sb, "ext3_check_blocks_bitmap",
997 				    "Wrong free blocks count for group %d, "
998 				    "stored = %d, counted = %lu", i,
999 				    le16_to_cpu(gdp->bg_free_blocks_count), x);
1000 		bitmap_count += x;
1001 	}
1002 	if (le32_to_cpu(es->s_free_blocks_count) != bitmap_count)
1003 		ext3_error (sb, "ext3_check_blocks_bitmap",
1004 			"Wrong free blocks count in super block, "
1005 			"stored = %lu, counted = %lu",
1006 			(unsigned long)le32_to_cpu(es->s_free_blocks_count),
1007 			bitmap_count);
1008 }
1009 #endif
1010