1 /*
2  * Copyright 2000-2002 by Hans Reiser, licensing governed by reiserfs/README
3  */
4 
5 /* Reiserfs block (de)allocator, bitmap-based. */
6 
7 #include <linux/config.h>
8 #include <linux/sched.h>
9 #include <linux/vmalloc.h>
10 #include <linux/errno.h>
11 #include <linux/locks.h>
12 #include <linux/kernel.h>
13 
14 #include <linux/reiserfs_fs.h>
15 #include <linux/reiserfs_fs_sb.h>
16 #include <linux/reiserfs_fs_i.h>
17 
18 #define PREALLOCATION_SIZE 9
19 
20 #define INODE_INFO(inode) (&(inode)->u.reiserfs_i)
21 
22 /* different reiserfs block allocator options */
23 
24 #define SB_ALLOC_OPTS(s) ((s)->u.reiserfs_sb.s_alloc_options.bits)
25 
26 #define  _ALLOC_concentrating_formatted_nodes 0
27 #define  _ALLOC_displacing_large_files 1
28 #define  _ALLOC_displacing_new_packing_localities 2
29 #define  _ALLOC_old_hashed_relocation 3
30 #define  _ALLOC_new_hashed_relocation 4
31 #define  _ALLOC_skip_busy 5
32 #define  _ALLOC_displace_based_on_dirid 6
33 #define  _ALLOC_hashed_formatted_nodes 7
34 #define  _ALLOC_old_way 8
35 #define  _ALLOC_hundredth_slices 9
36 
37 #define  concentrating_formatted_nodes(s)     test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
38 #define  displacing_large_files(s)            test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
39 #define  displacing_new_packing_localities(s) test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
40 
41 #define SET_OPTION(optname) \
42    do { \
43         reiserfs_warning(s, "reiserfs: option \"%s\" is set\n", #optname); \
44         set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
45     } while(0)
46 #define TEST_OPTION(optname, s) \
47     test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
48 
49 
50 /* #define LIMIT(a,b) do { if ((a) > (b)) (a) = (b); } while(0) */
51 
get_bit_address(struct super_block * s,unsigned long block,int * bmap_nr,int * offset)52 static inline void get_bit_address (struct super_block * s,
53 				    unsigned long block, int * bmap_nr, int * offset)
54 {
55     /* It is in the bitmap block number equal to the block
56      * number divided by the number of bits in a block. */
57     *bmap_nr = block / (s->s_blocksize << 3);
58     /* Within that bitmap block it is located at bit offset *offset. */
59     *offset = block & ((s->s_blocksize << 3) - 1 );
60     return;
61 }
62 
63 #ifdef CONFIG_REISERFS_CHECK
is_reusable(struct super_block * s,unsigned long block,int bit_value)64 int is_reusable (struct super_block * s, unsigned long block, int bit_value)
65 {
66     int i, j;
67 
68     if (block == 0 || block >= SB_BLOCK_COUNT (s)) {
69 	reiserfs_warning (s, "vs-4010: is_reusable: block number is out of range %lu (%u)\n",
70 			  block, SB_BLOCK_COUNT (s));
71 	return 0;
72     }
73 
74     /* it can't be one of the bitmap blocks */
75     for (i = 0; i < SB_BMAP_NR (s); i ++)
76 	if (block == SB_AP_BITMAP (s)[i].bh->b_blocknr) {
77 	    reiserfs_warning (s, "vs: 4020: is_reusable: "
78 			      "bitmap block %lu(%u) can't be freed or reused\n",
79 			      block, SB_BMAP_NR (s));
80 	    return 0;
81 	}
82 
83     get_bit_address (s, block, &i, &j);
84 
85     if (i >= SB_BMAP_NR (s)) {
86 	reiserfs_warning (s, "vs-4030: is_reusable: there is no so many bitmap blocks: "
87 			  "block=%lu, bitmap_nr=%d\n", block, i);
88 	return 0;
89     }
90 
91     if ((bit_value == 0 &&
92          reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data)) ||
93 	(bit_value == 1 &&
94 	 reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i].bh->b_data) == 0)) {
95 	reiserfs_warning (s, "vs-4040: is_reusable: corresponding bit of block %lu does not "
96 			  "match required value (i==%d, j==%d) test_bit==%d\n",
97 		block, i, j, reiserfs_test_le_bit (j, SB_AP_BITMAP (s)[i].bh->b_data));
98 
99 	return 0;
100     }
101 
102     if (bit_value == 0 && block == SB_ROOT_BLOCK (s)) {
103 	reiserfs_warning (s, "vs-4050: is_reusable: this is root block (%u), "
104 			  "it must be busy\n", SB_ROOT_BLOCK (s));
105 	return 0;
106     }
107 
108     return 1;
109 }
110 #endif /* CONFIG_REISERFS_CHECK */
111 
112 /* searches in journal structures for a given block number (bmap, off). If block
113    is found in reiserfs journal it suggests next free block candidate to test. */
is_block_in_journal(struct super_block * s,int bmap,int off,int * next)114 static inline  int is_block_in_journal (struct super_block * s, int bmap, int off, int *next)
115 {
116     unsigned int tmp;
117 
118     if (reiserfs_in_journal (s, s->s_dev, bmap, off, s->s_blocksize, 1, &tmp)) {
119 	if (tmp) {		/* hint supplied */
120 	    *next = tmp;
121 	    PROC_INFO_INC( s, scan_bitmap.in_journal_hint );
122 	} else {
123 	    (*next) = off + 1;		/* inc offset to avoid looping. */
124 	    PROC_INFO_INC( s, scan_bitmap.in_journal_nohint );
125 	}
126 	PROC_INFO_INC( s, scan_bitmap.retry );
127 	return 1;
128     }
129     return 0;
130 }
131 
132 /* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
133  * block; */
scan_bitmap_block(struct reiserfs_transaction_handle * th,int bmap_n,int * beg,int boundary,int min,int max,int unfm)134 static int scan_bitmap_block (struct reiserfs_transaction_handle *th,
135 			      int bmap_n, int *beg, int boundary, int min, int max, int unfm)
136 {
137     struct super_block *s = th->t_super;
138     struct reiserfs_bitmap_info *bi=&SB_AP_BITMAP(s)[bmap_n];
139     int end, next;
140     int org = *beg;
141 
142     RFALSE(bmap_n >= SB_BMAP_NR (s), "Bitmap %d is out of range (0..%d)\n",bmap_n, SB_BMAP_NR (s) - 1);
143     PROC_INFO_INC( s, scan_bitmap.bmap );
144 /* this is unclear and lacks comments, explain how journal bitmaps
145    work here for the reader.  Convey a sense of the design here. What
146    is a window? */
147 /* - I mean `a window of zero bits' as in description of this function - Zam. */
148 
149     if ( !bi ) {
150 	printk("Hey, bitmap info pointer is zero for bitmap %d!\n",bmap_n);
151 	return 0;
152     }
153     if (buffer_locked (bi->bh)) {
154        PROC_INFO_INC( s, scan_bitmap.wait );
155        __wait_on_buffer (bi->bh);
156     }
157 
158     /* If we know that first zero bit is only one or first zero bit is
159        closer to the end of bitmap than our start pointer */
160     if (bi->first_zero_hint > *beg || bi->free_count == 1)
161 	*beg = bi->first_zero_hint;
162 
163     while (1) {
164 	cont:
165 	if (bi->free_count < min)
166 		return 0; // No free blocks in this bitmap
167 
168 	/* search for a first zero bit -- beggining of a window */
169 	*beg = reiserfs_find_next_zero_le_bit
170 	        ((unsigned long*)(bi->bh->b_data), boundary, *beg);
171 
172 	if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block
173 				      * cannot contain a zero window of minimum size */
174 	    return 0;
175 	}
176 
177 	if (unfm && is_block_in_journal(s,bmap_n, *beg, beg))
178 	    continue;
179 	/* first zero bit found; we check next bits */
180 	for (end = *beg + 1;; end ++) {
181 	    if (end >= *beg + max || end >= boundary || reiserfs_test_le_bit (end, bi->bh->b_data)) {
182 		next = end;
183 		break;
184 	    }
185 	    /* finding the other end of zero bit window requires looking into journal structures (in
186 	     * case of searching for free blocks for unformatted nodes) */
187 	    if (unfm && is_block_in_journal(s, bmap_n, end, &next))
188 		break;
189 	}
190 
191 	/* now (*beg) points to beginning of zero bits window,
192 	 * (end) points to one bit after the window end */
193 	if (end - *beg >= min) { /* it seems we have found window of proper size */
194 	    int i;
195 	    reiserfs_prepare_for_journal (s, bi->bh, 1);
196 	    /* try to set all blocks used checking are they still free */
197 	    for (i = *beg; i < end; i++) {
198 		/* It seems that we should not check in journal again. */
199 		if (reiserfs_test_and_set_le_bit (i, bi->bh->b_data)) {
200 		    /* bit was set by another process
201 		     * while we slept in prepare_for_journal() */
202 		    PROC_INFO_INC( s, scan_bitmap.stolen );
203 		    if (i >= *beg + min)	{ /* we can continue with smaller set of allocated blocks,
204 					   * if length of this set is more or equal to `min' */
205 			end = i;
206 			break;
207 		    }
208 		    /* otherwise we clear all bit were set ... */
209 		    while (--i >= *beg)
210 			reiserfs_test_and_clear_le_bit (i, bi->bh->b_data);
211 		    reiserfs_restore_prepared_buffer (s, bi->bh);
212 		    *beg = max(org, (int)bi->first_zero_hint);
213 		    /* ... and search again in current block from beginning */
214 		    goto cont;
215 		}
216 	    }
217 	    bi->free_count -= (end - *beg);
218 
219 	    /* if search started from zero_hint bit, and zero hint have not
220                 changed since, then we need to update first_zero_hint */
221 	    if ( bi->first_zero_hint >= *beg)
222 		/* no point in looking for free bit if there is not any */
223 		bi->first_zero_hint = (bi->free_count > 0 ) ?
224 			reiserfs_find_next_zero_le_bit
225 			((unsigned long*)(bi->bh->b_data), s->s_blocksize << 3, end) : (s->s_blocksize << 3);
226 
227 	    journal_mark_dirty (th, s, bi->bh);
228 
229 	    /* free block count calculation */
230 	    reiserfs_prepare_for_journal (s, SB_BUFFER_WITH_SB(s), 1);
231 	    PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
232 	    journal_mark_dirty (th, s, SB_BUFFER_WITH_SB(s));
233 
234 	    return end - (*beg);
235 	} else {
236 	    *beg = next;
237 	}
238     }
239 }
240 
241 /* Tries to find contiguous zero bit window (given size) in given region of
242  * bitmap and place new blocks there. Returns number of allocated blocks. */
scan_bitmap(struct reiserfs_transaction_handle * th,unsigned long * start,unsigned long finish,int min,int max,int unfm,unsigned long file_block)243 static int scan_bitmap (struct reiserfs_transaction_handle *th,
244 			unsigned long *start, unsigned long finish,
245 			int min, int max, int unfm, unsigned long file_block)
246 {
247     int nr_allocated=0;
248     struct super_block * s = th->t_super;
249     /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
250      * - Hans, it is not a block number - Zam. */
251 
252     int bm, off;
253     int end_bm, end_off;
254     int off_max = s->s_blocksize << 3;
255 
256     PROC_INFO_INC( s, scan_bitmap.call );
257     if ( SB_FREE_BLOCKS(s) <= 0)
258 	return 0; // No point in looking for more free blocks
259 
260     get_bit_address (s, *start, &bm, &off);
261     get_bit_address (s, finish, &end_bm, &end_off);
262 
263     // With this option set first we try to find a bitmap that is at least 10%
264     // free, and if that fails, then we fall back to old whole bitmap scanning
265     if ( TEST_OPTION(skip_busy, s) && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s)/20 ) {
266 	for (;bm < end_bm; bm++, off = 0) {
267 	    if ( ( off && (!unfm || (file_block != 0))) || SB_AP_BITMAP(s)[bm].free_count > (s->s_blocksize << 3) / 10 )
268 		nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
269 	    if (nr_allocated)
270 		goto ret;
271         }
272 	get_bit_address (s, *start, &bm, &off);
273     }
274 
275     for (;bm < end_bm; bm++, off = 0) {
276 	nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
277 	if (nr_allocated)
278 	    goto ret;
279     }
280 
281     nr_allocated = scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
282 
283  ret:
284     *start = bm * off_max + off;
285     return nr_allocated;
286 
287 }
288 
_reiserfs_free_block(struct reiserfs_transaction_handle * th,b_blocknr_t block)289 static void _reiserfs_free_block (struct reiserfs_transaction_handle *th,
290 			  b_blocknr_t block)
291 {
292     struct super_block * s = th->t_super;
293     struct reiserfs_super_block * rs;
294     struct buffer_head * sbh;
295     struct reiserfs_bitmap_info *apbi;
296     int nr, offset;
297 
298     PROC_INFO_INC( s, free_block );
299 
300     rs = SB_DISK_SUPER_BLOCK (s);
301     sbh = SB_BUFFER_WITH_SB (s);
302     apbi = SB_AP_BITMAP(s);
303 
304     get_bit_address (s, block, &nr, &offset);
305 
306     if (nr >= sb_bmap_nr (rs)) {
307 	reiserfs_warning (s, "vs-4075: reiserfs_free_block: "
308 			  "block %lu is out of range on %s\n",
309 			  block, bdevname(s->s_dev));
310 	return;
311     }
312 
313     reiserfs_prepare_for_journal(s, apbi[nr].bh, 1 ) ;
314 
315     /* clear bit for the given block in bit map */
316     if (!reiserfs_test_and_clear_le_bit (offset, apbi[nr].bh->b_data)) {
317 	reiserfs_warning (s, "vs-4080: reiserfs_free_block: "
318 			  "free_block (%04x:%lu)[dev:blocknr]: bit already cleared\n",
319 			  s->s_dev, block);
320     }
321     if (offset < apbi[nr].first_zero_hint) {
322 	apbi[nr].first_zero_hint = offset;
323     }
324     apbi[nr].free_count ++;
325     journal_mark_dirty (th, s, apbi[nr].bh);
326 
327     reiserfs_prepare_for_journal(s, sbh, 1) ;
328     /* update super block */
329     set_sb_free_blocks( rs, sb_free_blocks(rs) + 1 );
330 
331     journal_mark_dirty (th, s, sbh);
332 }
333 
reiserfs_free_block(struct reiserfs_transaction_handle * th,unsigned long block)334 void reiserfs_free_block (struct reiserfs_transaction_handle *th,
335 			  unsigned long block) {
336     struct super_block * s = th->t_super;
337 
338     RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
339     RFALSE(is_reusable (s, block, 1) == 0, "vs-4071: can not free such block");
340     /* mark it before we clear it, just in case */
341     journal_mark_freed(th, s, block) ;
342     _reiserfs_free_block(th, block) ;
343 }
344 
345 /* preallocated blocks don't need to be run through journal_mark_freed */
reiserfs_free_prealloc_block(struct reiserfs_transaction_handle * th,unsigned long block)346 void reiserfs_free_prealloc_block (struct reiserfs_transaction_handle *th,
347                           unsigned long block) {
348     RFALSE(!th->t_super, "vs-4060: trying to free block on nonexistent device");
349     RFALSE(is_reusable (th->t_super, block, 1) == 0, "vs-4070: can not free such block");
350     _reiserfs_free_block(th, block) ;
351 }
352 
__discard_prealloc(struct reiserfs_transaction_handle * th,struct inode * inode)353 static void __discard_prealloc (struct reiserfs_transaction_handle * th,
354 				struct inode * inode)
355 {
356     unsigned long save = inode->u.reiserfs_i.i_prealloc_block ;
357 #ifdef CONFIG_REISERFS_CHECK
358     if (inode->u.reiserfs_i.i_prealloc_count < 0)
359 	reiserfs_warning(th->t_super, "zam-4001:%s: inode has negative prealloc blocks count.\n", __FUNCTION__ );
360 #endif
361     while (inode->u.reiserfs_i.i_prealloc_count > 0) {
362 	reiserfs_free_prealloc_block(th,inode->u.reiserfs_i.i_prealloc_block);
363 	inode->u.reiserfs_i.i_prealloc_block++;
364 	inode->u.reiserfs_i.i_prealloc_count --;
365     }
366     inode->u.reiserfs_i.i_prealloc_block = save ;
367     list_del (&(inode->u.reiserfs_i.i_prealloc_list));
368 }
369 
370 /* FIXME: It should be inline function */
reiserfs_discard_prealloc(struct reiserfs_transaction_handle * th,struct inode * inode)371 void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th,
372 				struct inode * inode)
373 {
374     if (inode->u.reiserfs_i.i_prealloc_count) {
375 	__discard_prealloc(th, inode);
376     }
377 }
378 
reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle * th)379 void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th)
380 {
381   struct list_head * plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
382   struct inode * inode;
383 
384   while (!list_empty(plist)) {
385     inode = list_entry(plist->next, struct inode, u.reiserfs_i.i_prealloc_list);
386 #ifdef CONFIG_REISERFS_CHECK
387     if (!inode->u.reiserfs_i.i_prealloc_count) {
388       reiserfs_warning(th->t_super, "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.\n", __FUNCTION__ );
389     }
390 #endif
391     __discard_prealloc(th, inode);
392   }
393 }
394 
395 /* block allocator related options are parsed here */
reiserfs_parse_alloc_options(struct super_block * s,char * options)396 int reiserfs_parse_alloc_options(struct super_block * s, char * options)
397 {
398     char * this_char, * value;
399 
400     s->u.reiserfs_sb.s_alloc_options.bits = 0; /* clear default settings */
401 
402     for (this_char = strtok (options, ":"); this_char != NULL; this_char = strtok (NULL, ":")) {
403 	if ((value = strchr (this_char, '=')) != NULL)
404 	    *value++ = 0;
405 
406 	if (!strcmp(this_char, "concentrating_formatted_nodes")) {
407 	    int temp;
408 	    SET_OPTION(concentrating_formatted_nodes);
409 	    temp = (value && *value) ? simple_strtoul (value, &value, 0) : 10;
410 	    if (temp <= 0 || temp > 100) {
411 		s->u.reiserfs_sb.s_alloc_options.border = 10;
412 	    } else {
413 		s->u.reiserfs_sb.s_alloc_options.border = 100 / temp;
414 	   }
415 	    continue;
416 	}
417 	if (!strcmp(this_char, "displacing_large_files")) {
418 	    SET_OPTION(displacing_large_files);
419 	    s->u.reiserfs_sb.s_alloc_options.large_file_size =
420 		(value && *value) ? simple_strtoul (value, &value, 0) : 16;
421 	    continue;
422 	}
423 	if (!strcmp(this_char, "displacing_new_packing_localities")) {
424 	    SET_OPTION(displacing_new_packing_localities);
425 	    continue;
426 	};
427 
428 	if (!strcmp(this_char, "old_hashed_relocation")) {
429 	    SET_OPTION(old_hashed_relocation);
430 	    continue;
431 	}
432 
433 	if (!strcmp(this_char, "new_hashed_relocation")) {
434 	    SET_OPTION(new_hashed_relocation);
435 	    continue;
436 	}
437 
438 	if (!strcmp(this_char, "hashed_formatted_nodes")) {
439 	    SET_OPTION(hashed_formatted_nodes);
440 	    continue;
441 	}
442 
443 	if (!strcmp(this_char, "skip_busy")) {
444 	    SET_OPTION(skip_busy);
445 	    continue;
446 	}
447 
448 	if (!strcmp(this_char, "hundredth_slices")) {
449 	    SET_OPTION(hundredth_slices);
450 	    continue;
451 	}
452 
453 	if (!strcmp(this_char, "old_way")) {
454 	    SET_OPTION(old_way);
455 	    continue;
456 	}
457 
458 	if (!strcmp(this_char, "displace_based_on_dirid")) {
459 	    SET_OPTION(displace_based_on_dirid);
460 	    continue;
461 	}
462 
463 	if (!strcmp(this_char, "preallocmin")) {
464 	    s->u.reiserfs_sb.s_alloc_options.preallocmin =
465 		(value && *value) ? simple_strtoul (value, &value, 0) : 4;
466 	    continue;
467 	}
468 
469 	if (!strcmp(this_char, "preallocsize")) {
470 	    s->u.reiserfs_sb.s_alloc_options.preallocsize =
471 		(value && *value) ? simple_strtoul (value, &value, 0) : PREALLOCATION_SIZE;
472 	    continue;
473 	}
474 
475 	reiserfs_warning(s, "zam-4001: %s : unknown option - %s\n", __FUNCTION__ , this_char);
476 	return 1;
477     }
478 
479     return 0;
480 }
481 
new_hashed_relocation(reiserfs_blocknr_hint_t * hint)482 static void inline new_hashed_relocation (reiserfs_blocknr_hint_t * hint)
483 {
484     char * hash_in;
485     if (hint->formatted_node) {
486 	    hash_in = (char*)&hint->key.k_dir_id;
487     } else {
488 	if (!hint->inode) {
489 	    //hint->search_start = hint->beg;
490 	    hash_in = (char*)&hint->key.k_dir_id;
491 	} else
492 	    if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
493 		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
494 	    else
495 		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
496     }
497 
498     hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
499 }
500 
get_left_neighbor(reiserfs_blocknr_hint_t * hint)501 static void inline get_left_neighbor(reiserfs_blocknr_hint_t *hint)
502 {
503     struct path * path;
504     struct buffer_head * bh;
505     struct item_head * ih;
506     int pos_in_item;
507     __u32 * item;
508 
509     if (!hint->path)		/* reiserfs code can call this function w/o pointer to path
510 				 * structure supplied; then we rely on supplied search_start */
511 	return;
512 
513     path = hint->path;
514     bh = get_last_bh(path);
515     RFALSE( !bh, "green-4002: Illegal path specified to get_left_neighbor\n");
516     ih = get_ih(path);
517     pos_in_item = path->pos_in_item;
518     item = get_item (path);
519 
520     hint->search_start = bh->b_blocknr;
521 
522     if (!hint->formatted_node && is_indirect_le_ih (ih)) {
523 	/* for indirect item: go to left and look for the first non-hole entry
524 	   in the indirect item */
525 	if (pos_in_item == I_UNFM_NUM (ih))
526 	    pos_in_item--;
527 //	    pos_in_item = I_UNFM_NUM (ih) - 1;
528 	while (pos_in_item >= 0) {
529 	    int t=get_block_num(item,pos_in_item);
530 	    if (t) {
531 		hint->search_start = t;
532 		break;
533 	    }
534 	    pos_in_item --;
535 	}
536     } else {
537     }
538 
539     /* does result value fit into specified region? */
540     return;
541 }
542 
543 /* should be, if formatted node, then try to put on first part of the device
544    specified as number of percent with mount option device, else try to put
545    on last of device.  This is not to say it is good code to do so,
546    but the effect should be measured.  */
set_border_in_hint(struct super_block * s,reiserfs_blocknr_hint_t * hint)547 static void inline set_border_in_hint(struct super_block *s, reiserfs_blocknr_hint_t *hint)
548 {
549     b_blocknr_t border = SB_BLOCK_COUNT(hint->th->t_super) / s->u.reiserfs_sb.s_alloc_options.border;
550 
551     if (hint->formatted_node)
552 	hint->end = border - 1;
553     else
554 	hint->beg = border;
555 }
556 
displace_large_file(reiserfs_blocknr_hint_t * hint)557 static void inline displace_large_file(reiserfs_blocknr_hint_t *hint)
558 {
559     if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
560 	hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id), 4) % (hint->end - hint->beg);
561     else
562 	hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid), 4) % (hint->end - hint->beg);
563 }
564 
hash_formatted_node(reiserfs_blocknr_hint_t * hint)565 static void inline hash_formatted_node(reiserfs_blocknr_hint_t *hint)
566 {
567    char * hash_in;
568 
569    if (!hint->inode)
570 	hash_in = (char*)&hint->key.k_dir_id;
571     else if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
572 	hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
573     else
574 	hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
575 
576 	hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
577 }
578 
this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t * hint)579 static int inline this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *hint)
580 {
581     return hint->block == hint->th->t_super->u.reiserfs_sb.s_alloc_options.large_file_size;
582 }
583 
584 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)585 static void inline displace_new_packing_locality (reiserfs_blocknr_hint_t *hint)
586 {
587     struct key * key = &hint->key;
588 
589     hint->th->displace_new_blocks = 0;
590     hint->search_start = hint->beg + keyed_hash((char*)(&key->k_objectid),4) % (hint->end - hint->beg);
591 }
592 #endif
593 
old_hashed_relocation(reiserfs_blocknr_hint_t * hint)594 static int inline old_hashed_relocation (reiserfs_blocknr_hint_t * hint)
595 {
596     unsigned long border;
597     unsigned long hash_in;
598 
599     if (hint->formatted_node || hint->inode == NULL) {
600 	return 0;
601     }
602 
603     hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
604     border = hint->beg + (unsigned long) keyed_hash(((char *) (&hash_in)), 4) % (hint->end - hint->beg - 1);
605     if (border > hint->search_start)
606 	hint->search_start = border;
607 
608     return 1;
609 }
610 
old_way(reiserfs_blocknr_hint_t * hint)611 static int inline old_way (reiserfs_blocknr_hint_t * hint)
612 {
613     unsigned long border;
614 
615     if (hint->formatted_node || hint->inode == NULL) {
616 	return 0;
617     }
618 
619       border = hint->beg + le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end  - hint->beg);
620     if (border > hint->search_start)
621 	hint->search_start = border;
622 
623     return 1;
624 }
625 
hundredth_slices(reiserfs_blocknr_hint_t * hint)626 static void inline hundredth_slices (reiserfs_blocknr_hint_t * hint)
627 {
628     struct key * key = &hint->key;
629     unsigned long slice_start;
630 
631     slice_start = (keyed_hash((char*)(&key->k_dir_id),4) % 100) * (hint->end / 100);
632     if ( slice_start > hint->search_start || slice_start + (hint->end / 100) <= hint->search_start) {
633 	hint->search_start = slice_start;
634     }
635 }
636 
determine_search_start(reiserfs_blocknr_hint_t * hint,int amount_needed)637 static void inline determine_search_start(reiserfs_blocknr_hint_t *hint,
638 					  int amount_needed)
639 {
640     struct super_block *s = hint->th->t_super;
641     hint->beg = 0;
642     hint->end = SB_BLOCK_COUNT(s) - 1;
643 
644     /* This is former border algorithm. Now with tunable border offset */
645     if (concentrating_formatted_nodes(s))
646 	set_border_in_hint(s, hint);
647 
648 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
649     /* whenever we create a new directory, we displace it.  At first we will
650        hash for location, later we might look for a moderately empty place for
651        it */
652     if (displacing_new_packing_localities(s)
653 	&& hint->th->displace_new_blocks) {
654 	displace_new_packing_locality(hint);
655 
656 	/* we do not continue determine_search_start,
657 	 * if new packing locality is being displaced */
658 	return;
659     }
660 #endif
661 
662     /* all persons should feel encouraged to add more special cases here and
663      * test them */
664 
665     if (displacing_large_files(s) && !hint->formatted_node
666 	&& this_blocknr_allocation_would_make_it_a_large_file(hint)) {
667 	displace_large_file(hint);
668 	return;
669     }
670 
671     /* attempt to copy a feature from old block allocator code */
672     if (TEST_OPTION(old_hashed_relocation, s) && !hint->formatted_node) {
673 	old_hashed_relocation(hint);
674     }
675 
676     /* if none of our special cases is relevant, use the left neighbor in the
677        tree order of the new node we are allocating for */
678     if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes,s)) {
679 	hash_formatted_node(hint);
680 	return;
681     }
682 
683     get_left_neighbor(hint);
684 
685     /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
686        new blocks are displaced based on directory ID. Also, if suggested search_start
687        is less than last preallocated block, we start searching from it, assuming that
688        HDD dataflow is faster in forward direction */
689     if ( TEST_OPTION(old_way, s)) {
690 	if (!hint->formatted_node) {
691 	    if ( !reiserfs_hashed_relocation(s))
692 		old_way(hint);
693 	    else if (!reiserfs_no_unhashed_relocation(s))
694 		old_hashed_relocation(hint);
695 
696 	    if ( hint->inode && hint->search_start < hint->inode->u.reiserfs_i.i_prealloc_block)
697 		hint->search_start = hint->inode->u.reiserfs_i.i_prealloc_block;
698 	}
699 	return;
700     }
701 
702     /* This is an approach proposed by Hans */
703     if ( TEST_OPTION(hundredth_slices, s) && ! (displacing_large_files(s) && !hint->formatted_node)) {
704 	hundredth_slices(hint);
705 	return;
706     }
707 
708     if (TEST_OPTION(old_hashed_relocation, s))
709 	old_hashed_relocation(hint);
710     if (TEST_OPTION(new_hashed_relocation, s))
711 	new_hashed_relocation(hint);
712     return;
713 }
714 
determine_prealloc_size(reiserfs_blocknr_hint_t * hint)715 static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
716 {
717     /* make minimum size a mount option and benchmark both ways */
718     /* we preallocate blocks only for regular files, specific size */
719     /* benchmark preallocating always and see what happens */
720 
721     hint->prealloc_size = 0;
722 
723     if (!hint->formatted_node && hint->preallocate) {
724 	if (S_ISREG(hint->inode->i_mode)
725 	    && hint->inode->i_size >= hint->th->t_super->u.reiserfs_sb.s_alloc_options.preallocmin * hint->inode->i_sb->s_blocksize)
726 	    hint->prealloc_size = hint->th->t_super->u.reiserfs_sb.s_alloc_options.preallocsize - 1;
727     }
728     return CARRY_ON;
729 }
730 
731 /* XXX I know it could be merged with upper-level function;
732    but may be result function would be too complex. */
allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,b_blocknr_t * new_blocknrs,b_blocknr_t start,b_blocknr_t finish,int amount_needed,int prealloc_size)733 static inline int allocate_without_wrapping_disk (reiserfs_blocknr_hint_t * hint,
734 					 b_blocknr_t * new_blocknrs,
735 					 b_blocknr_t start, b_blocknr_t finish,
736 					 int amount_needed, int prealloc_size)
737 {
738     int rest = amount_needed;
739     int nr_allocated;
740 
741     while (rest > 0) {
742 	nr_allocated = scan_bitmap (hint->th, &start, finish, 1,
743 				    rest + prealloc_size, !hint->formatted_node,
744 				    hint->block);
745 
746 	if (nr_allocated == 0)	/* no new blocks allocated, return */
747 	    break;
748 
749 	/* fill free_blocknrs array first */
750 	while (rest > 0 && nr_allocated > 0) {
751 	    * new_blocknrs ++ = start ++;
752 	    rest --; nr_allocated --;
753 	}
754 
755 	/* do we have something to fill prealloc. array also ? */
756 	if (nr_allocated > 0) {
757 	    /* it means prealloc_size was greater that 0 and we do preallocation */
758 	    list_add(&INODE_INFO(hint->inode)->i_prealloc_list,
759 		     &SB_JOURNAL(hint->th->t_super)->j_prealloc_list);
760 	    INODE_INFO(hint->inode)->i_prealloc_block = start;
761 	    INODE_INFO(hint->inode)->i_prealloc_count = nr_allocated;
762 	    break;
763 	}
764     }
765 
766     return (amount_needed - rest);
767 }
768 
blocknrs_and_prealloc_arrays_from_search_start(reiserfs_blocknr_hint_t * hint,b_blocknr_t * new_blocknrs,int amount_needed)769 static inline int blocknrs_and_prealloc_arrays_from_search_start
770     (reiserfs_blocknr_hint_t *hint, b_blocknr_t *new_blocknrs, int amount_needed)
771 {
772     struct super_block *s = hint->th->t_super;
773     b_blocknr_t start = hint->search_start;
774     b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
775     int second_pass = 0;
776     int nr_allocated = 0;
777 
778     determine_prealloc_size(hint);
779     while((nr_allocated
780 	  += allocate_without_wrapping_disk(hint, new_blocknrs + nr_allocated, start, finish,
781 					  amount_needed - nr_allocated, hint->prealloc_size))
782 	  < amount_needed) {
783 
784 	/* not all blocks were successfully allocated yet*/
785 	if (second_pass) {	/* it was a second pass; we must free all blocks */
786 	    while (nr_allocated --)
787 		reiserfs_free_block(hint->th, new_blocknrs[nr_allocated]);
788 
789 	    return NO_DISK_SPACE;
790 	} else {		/* refine search parameters for next pass */
791 	    second_pass = 1;
792 	    finish = start;
793 	    start = 0;
794 	    continue;
795 	}
796     }
797     return CARRY_ON;
798 }
799 
800 /* grab new blocknrs from preallocated list */
801 /* return amount still needed after using them */
use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,b_blocknr_t * new_blocknrs,int amount_needed)802 static int use_preallocated_list_if_available (reiserfs_blocknr_hint_t *hint,
803 					       b_blocknr_t *new_blocknrs, int amount_needed)
804 {
805     struct inode * inode = hint->inode;
806 
807     if (INODE_INFO(inode)->i_prealloc_count > 0) {
808 	while (amount_needed) {
809 
810 	    *new_blocknrs ++ = INODE_INFO(inode)->i_prealloc_block ++;
811 	    INODE_INFO(inode)->i_prealloc_count --;
812 
813 	    amount_needed --;
814 
815 	    if (INODE_INFO(inode)->i_prealloc_count <= 0) {
816 		list_del(&inode->u.reiserfs_i.i_prealloc_list);
817 		break;
818 	    }
819 	}
820     }
821     /* return amount still needed after using preallocated blocks */
822     return amount_needed;
823 }
824 
reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint,b_blocknr_t * new_blocknrs,int amount_needed,int reserved_by_us)825 int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
826 			       b_blocknr_t * new_blocknrs, int amount_needed,
827 			       int reserved_by_us /* Amount of blocks we have
828 						      already reserved */)
829 {
830     int initial_amount_needed = amount_needed;
831     int ret;
832 
833     /* Check if there is enough space, taking into account reserved space */
834     if ( SB_FREE_BLOCKS(hint->th->t_super) - hint->th->t_super->u.reiserfs_sb.reserved_blocks <
835 	 amount_needed - reserved_by_us)
836         return NO_DISK_SPACE;
837     /* should this be if !hint->inode &&  hint->preallocate? */
838     /* do you mean hint->formatted_node can be removed ? - Zam */
839     /* hint->formatted_node cannot be removed because we try to access
840        inode information here, and there is often no inode assotiated with
841        metadata allocations - green */
842 
843     if (!hint->formatted_node && hint->preallocate) {
844 	amount_needed = use_preallocated_list_if_available
845 	    (hint, new_blocknrs, amount_needed);
846 	if (amount_needed == 0)	/* all blocknrs we need we got from
847                                    prealloc. list */
848 	    return CARRY_ON;
849 	new_blocknrs += (initial_amount_needed - amount_needed);
850     }
851 
852     /* find search start and save it in hint structure */
853     determine_search_start(hint, amount_needed);
854 
855     /* allocation itself; fill new_blocknrs and preallocation arrays */
856     ret = blocknrs_and_prealloc_arrays_from_search_start
857 	(hint, new_blocknrs, amount_needed);
858 
859     /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
860      * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
861      * variant) */
862 
863     if (ret != CARRY_ON) {
864 	while (amount_needed ++ < initial_amount_needed) {
865 	    reiserfs_free_block(hint->th, *(--new_blocknrs));
866 	}
867     }
868     return ret;
869 }
870 
871 /* These 2 functions are here to provide blocks reservation to the rest of kernel */
872 /* Reserve @blocks amount of blocks in fs pointed by @sb. Caller must make sure
873    there are actually this much blocks on the FS available */
reiserfs_claim_blocks_to_be_allocated(struct super_block * sb,int blocks)874 void reiserfs_claim_blocks_to_be_allocated(
875 				      struct super_block *sb, /* super block of
876 							        filesystem where
877 								blocks should be
878 								reserved */
879 				      int blocks /* How much to reserve */
880 					  )
881 {
882 
883     /* Fast case, if reservation is zero - exit immediately. */
884     if ( !blocks )
885 	return;
886 
887     sb->u.reiserfs_sb.reserved_blocks += blocks;
888 }
889 
890 /* Unreserve @blocks amount of blocks in fs pointed by @sb */
reiserfs_release_claimed_blocks(struct super_block * sb,int blocks)891 void reiserfs_release_claimed_blocks(
892 				struct super_block *sb, /* super block of
893 							  filesystem where
894 							  blocks should be
895 							  reserved */
896 				int blocks /* How much to unreserve */
897 					  )
898 {
899 
900     /* Fast case, if unreservation is zero - exit immediately. */
901     if ( !blocks )
902 	return;
903 
904     sb->u.reiserfs_sb.reserved_blocks -= blocks;
905     RFALSE( sb->u.reiserfs_sb.reserved_blocks < 0, "amount of blocks reserved became zero?");
906 }
907