1 /*
2 * Copyright 2000-2002 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5 /* Reiserfs block (de)allocator, bitmap-based. */
6
7 #include <linux/config.h>
8 #include <linux/sched.h>
9 #include <linux/vmalloc.h>
10 #include <linux/errno.h>
11 #include <linux/locks.h>
12 #include <linux/kernel.h>
13
14 #include <linux/reiserfs_fs.h>
15 #include <linux/reiserfs_fs_sb.h>
16 #include <linux/reiserfs_fs_i.h>
17
18 #define PREALLOCATION_SIZE 9
19
20 #define INODE_INFO(inode) (&(inode)->u.reiserfs_i)
21
22 /* different reiserfs block allocator options */
23
24 #define SB_ALLOC_OPTS(s) ((s)->u.reiserfs_sb.s_alloc_options.bits)
25
26 #define _ALLOC_concentrating_formatted_nodes 0
27 #define _ALLOC_displacing_large_files 1
28 #define _ALLOC_displacing_new_packing_localities 2
29 #define _ALLOC_old_hashed_relocation 3
30 #define _ALLOC_new_hashed_relocation 4
31 #define _ALLOC_skip_busy 5
32 #define _ALLOC_displace_based_on_dirid 6
33 #define _ALLOC_hashed_formatted_nodes 7
34 #define _ALLOC_old_way 8
35 #define _ALLOC_hundredth_slices 9
36
37 #define concentrating_formatted_nodes(s) test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
38 #define displacing_large_files(s) test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
39 #define displacing_new_packing_localities(s) test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))
40
41 #define SET_OPTION(optname) \
42 do { \
43 reiserfs_warning(s, "reiserfs: option \"%s\" is set\n", #optname); \
44 set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
45 } while(0)
46 #define TEST_OPTION(optname, s) \
47 test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))
48
49
50 /* #define LIMIT(a,b) do { if ((a) > (b)) (a) = (b); } while(0) */
51
get_bit_address(struct super_block * s,unsigned long block,int * bmap_nr,int * offset)52 static inline void get_bit_address (struct super_block * s,
53 unsigned long block, int * bmap_nr, int * offset)
54 {
55 /* It is in the bitmap block number equal to the block
56 * number divided by the number of bits in a block. */
57 *bmap_nr = block / (s->s_blocksize << 3);
58 /* Within that bitmap block it is located at bit offset *offset. */
59 *offset = block & ((s->s_blocksize << 3) - 1 );
60 return;
61 }
62
63 #ifdef CONFIG_REISERFS_CHECK
is_reusable(struct super_block * s,unsigned long block,int bit_value)64 int is_reusable (struct super_block * s, unsigned long block, int bit_value)
65 {
66 int i, j;
67
68 if (block == 0 || block >= SB_BLOCK_COUNT (s)) {
69 reiserfs_warning (s, "vs-4010: is_reusable: block number is out of range %lu (%u)\n",
70 block, SB_BLOCK_COUNT (s));
71 return 0;
72 }
73
74 /* it can't be one of the bitmap blocks */
75 for (i = 0; i < SB_BMAP_NR (s); i ++)
76 if (block == SB_AP_BITMAP (s)[i].bh->b_blocknr) {
77 reiserfs_warning (s, "vs: 4020: is_reusable: "
78 "bitmap block %lu(%u) can't be freed or reused\n",
79 block, SB_BMAP_NR (s));
80 return 0;
81 }
82
83 get_bit_address (s, block, &i, &j);
84
85 if (i >= SB_BMAP_NR (s)) {
86 reiserfs_warning (s, "vs-4030: is_reusable: there is no so many bitmap blocks: "
87 "block=%lu, bitmap_nr=%d\n", block, i);
88 return 0;
89 }
90
91 if ((bit_value == 0 &&
92 reiserfs_test_le_bit(j, SB_AP_BITMAP(s)[i].bh->b_data)) ||
93 (bit_value == 1 &&
94 reiserfs_test_le_bit(j, SB_AP_BITMAP (s)[i].bh->b_data) == 0)) {
95 reiserfs_warning (s, "vs-4040: is_reusable: corresponding bit of block %lu does not "
96 "match required value (i==%d, j==%d) test_bit==%d\n",
97 block, i, j, reiserfs_test_le_bit (j, SB_AP_BITMAP (s)[i].bh->b_data));
98
99 return 0;
100 }
101
102 if (bit_value == 0 && block == SB_ROOT_BLOCK (s)) {
103 reiserfs_warning (s, "vs-4050: is_reusable: this is root block (%u), "
104 "it must be busy\n", SB_ROOT_BLOCK (s));
105 return 0;
106 }
107
108 return 1;
109 }
110 #endif /* CONFIG_REISERFS_CHECK */
111
112 /* searches in journal structures for a given block number (bmap, off). If block
113 is found in reiserfs journal it suggests next free block candidate to test. */
is_block_in_journal(struct super_block * s,int bmap,int off,int * next)114 static inline int is_block_in_journal (struct super_block * s, int bmap, int off, int *next)
115 {
116 unsigned int tmp;
117
118 if (reiserfs_in_journal (s, s->s_dev, bmap, off, s->s_blocksize, 1, &tmp)) {
119 if (tmp) { /* hint supplied */
120 *next = tmp;
121 PROC_INFO_INC( s, scan_bitmap.in_journal_hint );
122 } else {
123 (*next) = off + 1; /* inc offset to avoid looping. */
124 PROC_INFO_INC( s, scan_bitmap.in_journal_nohint );
125 }
126 PROC_INFO_INC( s, scan_bitmap.retry );
127 return 1;
128 }
129 return 0;
130 }
131
132 /* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
133 * block; */
scan_bitmap_block(struct reiserfs_transaction_handle * th,int bmap_n,int * beg,int boundary,int min,int max,int unfm)134 static int scan_bitmap_block (struct reiserfs_transaction_handle *th,
135 int bmap_n, int *beg, int boundary, int min, int max, int unfm)
136 {
137 struct super_block *s = th->t_super;
138 struct reiserfs_bitmap_info *bi=&SB_AP_BITMAP(s)[bmap_n];
139 int end, next;
140 int org = *beg;
141
142 RFALSE(bmap_n >= SB_BMAP_NR (s), "Bitmap %d is out of range (0..%d)\n",bmap_n, SB_BMAP_NR (s) - 1);
143 PROC_INFO_INC( s, scan_bitmap.bmap );
144 /* this is unclear and lacks comments, explain how journal bitmaps
145 work here for the reader. Convey a sense of the design here. What
146 is a window? */
147 /* - I mean `a window of zero bits' as in description of this function - Zam. */
148
149 if ( !bi ) {
150 printk("Hey, bitmap info pointer is zero for bitmap %d!\n",bmap_n);
151 return 0;
152 }
153 if (buffer_locked (bi->bh)) {
154 PROC_INFO_INC( s, scan_bitmap.wait );
155 __wait_on_buffer (bi->bh);
156 }
157
158 /* If we know that first zero bit is only one or first zero bit is
159 closer to the end of bitmap than our start pointer */
160 if (bi->first_zero_hint > *beg || bi->free_count == 1)
161 *beg = bi->first_zero_hint;
162
163 while (1) {
164 cont:
165 if (bi->free_count < min)
166 return 0; // No free blocks in this bitmap
167
168 /* search for a first zero bit -- beggining of a window */
169 *beg = reiserfs_find_next_zero_le_bit
170 ((unsigned long*)(bi->bh->b_data), boundary, *beg);
171
172 if (*beg + min > boundary) { /* search for a zero bit fails or the rest of bitmap block
173 * cannot contain a zero window of minimum size */
174 return 0;
175 }
176
177 if (unfm && is_block_in_journal(s,bmap_n, *beg, beg))
178 continue;
179 /* first zero bit found; we check next bits */
180 for (end = *beg + 1;; end ++) {
181 if (end >= *beg + max || end >= boundary || reiserfs_test_le_bit (end, bi->bh->b_data)) {
182 next = end;
183 break;
184 }
185 /* finding the other end of zero bit window requires looking into journal structures (in
186 * case of searching for free blocks for unformatted nodes) */
187 if (unfm && is_block_in_journal(s, bmap_n, end, &next))
188 break;
189 }
190
191 /* now (*beg) points to beginning of zero bits window,
192 * (end) points to one bit after the window end */
193 if (end - *beg >= min) { /* it seems we have found window of proper size */
194 int i;
195 reiserfs_prepare_for_journal (s, bi->bh, 1);
196 /* try to set all blocks used checking are they still free */
197 for (i = *beg; i < end; i++) {
198 /* It seems that we should not check in journal again. */
199 if (reiserfs_test_and_set_le_bit (i, bi->bh->b_data)) {
200 /* bit was set by another process
201 * while we slept in prepare_for_journal() */
202 PROC_INFO_INC( s, scan_bitmap.stolen );
203 if (i >= *beg + min) { /* we can continue with smaller set of allocated blocks,
204 * if length of this set is more or equal to `min' */
205 end = i;
206 break;
207 }
208 /* otherwise we clear all bit were set ... */
209 while (--i >= *beg)
210 reiserfs_test_and_clear_le_bit (i, bi->bh->b_data);
211 reiserfs_restore_prepared_buffer (s, bi->bh);
212 *beg = max(org, (int)bi->first_zero_hint);
213 /* ... and search again in current block from beginning */
214 goto cont;
215 }
216 }
217 bi->free_count -= (end - *beg);
218
219 /* if search started from zero_hint bit, and zero hint have not
220 changed since, then we need to update first_zero_hint */
221 if ( bi->first_zero_hint >= *beg)
222 /* no point in looking for free bit if there is not any */
223 bi->first_zero_hint = (bi->free_count > 0 ) ?
224 reiserfs_find_next_zero_le_bit
225 ((unsigned long*)(bi->bh->b_data), s->s_blocksize << 3, end) : (s->s_blocksize << 3);
226
227 journal_mark_dirty (th, s, bi->bh);
228
229 /* free block count calculation */
230 reiserfs_prepare_for_journal (s, SB_BUFFER_WITH_SB(s), 1);
231 PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
232 journal_mark_dirty (th, s, SB_BUFFER_WITH_SB(s));
233
234 return end - (*beg);
235 } else {
236 *beg = next;
237 }
238 }
239 }
240
241 /* Tries to find contiguous zero bit window (given size) in given region of
242 * bitmap and place new blocks there. Returns number of allocated blocks. */
scan_bitmap(struct reiserfs_transaction_handle * th,unsigned long * start,unsigned long finish,int min,int max,int unfm,unsigned long file_block)243 static int scan_bitmap (struct reiserfs_transaction_handle *th,
244 unsigned long *start, unsigned long finish,
245 int min, int max, int unfm, unsigned long file_block)
246 {
247 int nr_allocated=0;
248 struct super_block * s = th->t_super;
249 /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
250 * - Hans, it is not a block number - Zam. */
251
252 int bm, off;
253 int end_bm, end_off;
254 int off_max = s->s_blocksize << 3;
255
256 PROC_INFO_INC( s, scan_bitmap.call );
257 if ( SB_FREE_BLOCKS(s) <= 0)
258 return 0; // No point in looking for more free blocks
259
260 get_bit_address (s, *start, &bm, &off);
261 get_bit_address (s, finish, &end_bm, &end_off);
262
263 // With this option set first we try to find a bitmap that is at least 10%
264 // free, and if that fails, then we fall back to old whole bitmap scanning
265 if ( TEST_OPTION(skip_busy, s) && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s)/20 ) {
266 for (;bm < end_bm; bm++, off = 0) {
267 if ( ( off && (!unfm || (file_block != 0))) || SB_AP_BITMAP(s)[bm].free_count > (s->s_blocksize << 3) / 10 )
268 nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
269 if (nr_allocated)
270 goto ret;
271 }
272 get_bit_address (s, *start, &bm, &off);
273 }
274
275 for (;bm < end_bm; bm++, off = 0) {
276 nr_allocated = scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
277 if (nr_allocated)
278 goto ret;
279 }
280
281 nr_allocated = scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);
282
283 ret:
284 *start = bm * off_max + off;
285 return nr_allocated;
286
287 }
288
_reiserfs_free_block(struct reiserfs_transaction_handle * th,b_blocknr_t block)289 static void _reiserfs_free_block (struct reiserfs_transaction_handle *th,
290 b_blocknr_t block)
291 {
292 struct super_block * s = th->t_super;
293 struct reiserfs_super_block * rs;
294 struct buffer_head * sbh;
295 struct reiserfs_bitmap_info *apbi;
296 int nr, offset;
297
298 PROC_INFO_INC( s, free_block );
299
300 rs = SB_DISK_SUPER_BLOCK (s);
301 sbh = SB_BUFFER_WITH_SB (s);
302 apbi = SB_AP_BITMAP(s);
303
304 get_bit_address (s, block, &nr, &offset);
305
306 if (nr >= sb_bmap_nr (rs)) {
307 reiserfs_warning (s, "vs-4075: reiserfs_free_block: "
308 "block %lu is out of range on %s\n",
309 block, bdevname(s->s_dev));
310 return;
311 }
312
313 reiserfs_prepare_for_journal(s, apbi[nr].bh, 1 ) ;
314
315 /* clear bit for the given block in bit map */
316 if (!reiserfs_test_and_clear_le_bit (offset, apbi[nr].bh->b_data)) {
317 reiserfs_warning (s, "vs-4080: reiserfs_free_block: "
318 "free_block (%04x:%lu)[dev:blocknr]: bit already cleared\n",
319 s->s_dev, block);
320 }
321 if (offset < apbi[nr].first_zero_hint) {
322 apbi[nr].first_zero_hint = offset;
323 }
324 apbi[nr].free_count ++;
325 journal_mark_dirty (th, s, apbi[nr].bh);
326
327 reiserfs_prepare_for_journal(s, sbh, 1) ;
328 /* update super block */
329 set_sb_free_blocks( rs, sb_free_blocks(rs) + 1 );
330
331 journal_mark_dirty (th, s, sbh);
332 }
333
reiserfs_free_block(struct reiserfs_transaction_handle * th,unsigned long block)334 void reiserfs_free_block (struct reiserfs_transaction_handle *th,
335 unsigned long block) {
336 struct super_block * s = th->t_super;
337
338 RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
339 RFALSE(is_reusable (s, block, 1) == 0, "vs-4071: can not free such block");
340 /* mark it before we clear it, just in case */
341 journal_mark_freed(th, s, block) ;
342 _reiserfs_free_block(th, block) ;
343 }
344
345 /* preallocated blocks don't need to be run through journal_mark_freed */
reiserfs_free_prealloc_block(struct reiserfs_transaction_handle * th,unsigned long block)346 void reiserfs_free_prealloc_block (struct reiserfs_transaction_handle *th,
347 unsigned long block) {
348 RFALSE(!th->t_super, "vs-4060: trying to free block on nonexistent device");
349 RFALSE(is_reusable (th->t_super, block, 1) == 0, "vs-4070: can not free such block");
350 _reiserfs_free_block(th, block) ;
351 }
352
__discard_prealloc(struct reiserfs_transaction_handle * th,struct inode * inode)353 static void __discard_prealloc (struct reiserfs_transaction_handle * th,
354 struct inode * inode)
355 {
356 unsigned long save = inode->u.reiserfs_i.i_prealloc_block ;
357 #ifdef CONFIG_REISERFS_CHECK
358 if (inode->u.reiserfs_i.i_prealloc_count < 0)
359 reiserfs_warning(th->t_super, "zam-4001:%s: inode has negative prealloc blocks count.\n", __FUNCTION__ );
360 #endif
361 while (inode->u.reiserfs_i.i_prealloc_count > 0) {
362 reiserfs_free_prealloc_block(th,inode->u.reiserfs_i.i_prealloc_block);
363 inode->u.reiserfs_i.i_prealloc_block++;
364 inode->u.reiserfs_i.i_prealloc_count --;
365 }
366 inode->u.reiserfs_i.i_prealloc_block = save ;
367 list_del (&(inode->u.reiserfs_i.i_prealloc_list));
368 }
369
370 /* FIXME: It should be inline function */
reiserfs_discard_prealloc(struct reiserfs_transaction_handle * th,struct inode * inode)371 void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th,
372 struct inode * inode)
373 {
374 if (inode->u.reiserfs_i.i_prealloc_count) {
375 __discard_prealloc(th, inode);
376 }
377 }
378
reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle * th)379 void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th)
380 {
381 struct list_head * plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
382 struct inode * inode;
383
384 while (!list_empty(plist)) {
385 inode = list_entry(plist->next, struct inode, u.reiserfs_i.i_prealloc_list);
386 #ifdef CONFIG_REISERFS_CHECK
387 if (!inode->u.reiserfs_i.i_prealloc_count) {
388 reiserfs_warning(th->t_super, "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.\n", __FUNCTION__ );
389 }
390 #endif
391 __discard_prealloc(th, inode);
392 }
393 }
394
395 /* block allocator related options are parsed here */
reiserfs_parse_alloc_options(struct super_block * s,char * options)396 int reiserfs_parse_alloc_options(struct super_block * s, char * options)
397 {
398 char * this_char, * value;
399
400 s->u.reiserfs_sb.s_alloc_options.bits = 0; /* clear default settings */
401
402 for (this_char = strtok (options, ":"); this_char != NULL; this_char = strtok (NULL, ":")) {
403 if ((value = strchr (this_char, '=')) != NULL)
404 *value++ = 0;
405
406 if (!strcmp(this_char, "concentrating_formatted_nodes")) {
407 int temp;
408 SET_OPTION(concentrating_formatted_nodes);
409 temp = (value && *value) ? simple_strtoul (value, &value, 0) : 10;
410 if (temp <= 0 || temp > 100) {
411 s->u.reiserfs_sb.s_alloc_options.border = 10;
412 } else {
413 s->u.reiserfs_sb.s_alloc_options.border = 100 / temp;
414 }
415 continue;
416 }
417 if (!strcmp(this_char, "displacing_large_files")) {
418 SET_OPTION(displacing_large_files);
419 s->u.reiserfs_sb.s_alloc_options.large_file_size =
420 (value && *value) ? simple_strtoul (value, &value, 0) : 16;
421 continue;
422 }
423 if (!strcmp(this_char, "displacing_new_packing_localities")) {
424 SET_OPTION(displacing_new_packing_localities);
425 continue;
426 };
427
428 if (!strcmp(this_char, "old_hashed_relocation")) {
429 SET_OPTION(old_hashed_relocation);
430 continue;
431 }
432
433 if (!strcmp(this_char, "new_hashed_relocation")) {
434 SET_OPTION(new_hashed_relocation);
435 continue;
436 }
437
438 if (!strcmp(this_char, "hashed_formatted_nodes")) {
439 SET_OPTION(hashed_formatted_nodes);
440 continue;
441 }
442
443 if (!strcmp(this_char, "skip_busy")) {
444 SET_OPTION(skip_busy);
445 continue;
446 }
447
448 if (!strcmp(this_char, "hundredth_slices")) {
449 SET_OPTION(hundredth_slices);
450 continue;
451 }
452
453 if (!strcmp(this_char, "old_way")) {
454 SET_OPTION(old_way);
455 continue;
456 }
457
458 if (!strcmp(this_char, "displace_based_on_dirid")) {
459 SET_OPTION(displace_based_on_dirid);
460 continue;
461 }
462
463 if (!strcmp(this_char, "preallocmin")) {
464 s->u.reiserfs_sb.s_alloc_options.preallocmin =
465 (value && *value) ? simple_strtoul (value, &value, 0) : 4;
466 continue;
467 }
468
469 if (!strcmp(this_char, "preallocsize")) {
470 s->u.reiserfs_sb.s_alloc_options.preallocsize =
471 (value && *value) ? simple_strtoul (value, &value, 0) : PREALLOCATION_SIZE;
472 continue;
473 }
474
475 reiserfs_warning(s, "zam-4001: %s : unknown option - %s\n", __FUNCTION__ , this_char);
476 return 1;
477 }
478
479 return 0;
480 }
481
new_hashed_relocation(reiserfs_blocknr_hint_t * hint)482 static void inline new_hashed_relocation (reiserfs_blocknr_hint_t * hint)
483 {
484 char * hash_in;
485 if (hint->formatted_node) {
486 hash_in = (char*)&hint->key.k_dir_id;
487 } else {
488 if (!hint->inode) {
489 //hint->search_start = hint->beg;
490 hash_in = (char*)&hint->key.k_dir_id;
491 } else
492 if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
493 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
494 else
495 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
496 }
497
498 hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
499 }
500
get_left_neighbor(reiserfs_blocknr_hint_t * hint)501 static void inline get_left_neighbor(reiserfs_blocknr_hint_t *hint)
502 {
503 struct path * path;
504 struct buffer_head * bh;
505 struct item_head * ih;
506 int pos_in_item;
507 __u32 * item;
508
509 if (!hint->path) /* reiserfs code can call this function w/o pointer to path
510 * structure supplied; then we rely on supplied search_start */
511 return;
512
513 path = hint->path;
514 bh = get_last_bh(path);
515 RFALSE( !bh, "green-4002: Illegal path specified to get_left_neighbor\n");
516 ih = get_ih(path);
517 pos_in_item = path->pos_in_item;
518 item = get_item (path);
519
520 hint->search_start = bh->b_blocknr;
521
522 if (!hint->formatted_node && is_indirect_le_ih (ih)) {
523 /* for indirect item: go to left and look for the first non-hole entry
524 in the indirect item */
525 if (pos_in_item == I_UNFM_NUM (ih))
526 pos_in_item--;
527 // pos_in_item = I_UNFM_NUM (ih) - 1;
528 while (pos_in_item >= 0) {
529 int t=get_block_num(item,pos_in_item);
530 if (t) {
531 hint->search_start = t;
532 break;
533 }
534 pos_in_item --;
535 }
536 } else {
537 }
538
539 /* does result value fit into specified region? */
540 return;
541 }
542
543 /* should be, if formatted node, then try to put on first part of the device
544 specified as number of percent with mount option device, else try to put
545 on last of device. This is not to say it is good code to do so,
546 but the effect should be measured. */
set_border_in_hint(struct super_block * s,reiserfs_blocknr_hint_t * hint)547 static void inline set_border_in_hint(struct super_block *s, reiserfs_blocknr_hint_t *hint)
548 {
549 b_blocknr_t border = SB_BLOCK_COUNT(hint->th->t_super) / s->u.reiserfs_sb.s_alloc_options.border;
550
551 if (hint->formatted_node)
552 hint->end = border - 1;
553 else
554 hint->beg = border;
555 }
556
displace_large_file(reiserfs_blocknr_hint_t * hint)557 static void inline displace_large_file(reiserfs_blocknr_hint_t *hint)
558 {
559 if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
560 hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id), 4) % (hint->end - hint->beg);
561 else
562 hint->search_start = hint->beg + keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid), 4) % (hint->end - hint->beg);
563 }
564
hash_formatted_node(reiserfs_blocknr_hint_t * hint)565 static void inline hash_formatted_node(reiserfs_blocknr_hint_t *hint)
566 {
567 char * hash_in;
568
569 if (!hint->inode)
570 hash_in = (char*)&hint->key.k_dir_id;
571 else if ( TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
572 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
573 else
574 hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
575
576 hint->search_start = hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
577 }
578
this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t * hint)579 static int inline this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *hint)
580 {
581 return hint->block == hint->th->t_super->u.reiserfs_sb.s_alloc_options.large_file_size;
582 }
583
584 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)585 static void inline displace_new_packing_locality (reiserfs_blocknr_hint_t *hint)
586 {
587 struct key * key = &hint->key;
588
589 hint->th->displace_new_blocks = 0;
590 hint->search_start = hint->beg + keyed_hash((char*)(&key->k_objectid),4) % (hint->end - hint->beg);
591 }
592 #endif
593
old_hashed_relocation(reiserfs_blocknr_hint_t * hint)594 static int inline old_hashed_relocation (reiserfs_blocknr_hint_t * hint)
595 {
596 unsigned long border;
597 unsigned long hash_in;
598
599 if (hint->formatted_node || hint->inode == NULL) {
600 return 0;
601 }
602
603 hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
604 border = hint->beg + (unsigned long) keyed_hash(((char *) (&hash_in)), 4) % (hint->end - hint->beg - 1);
605 if (border > hint->search_start)
606 hint->search_start = border;
607
608 return 1;
609 }
610
old_way(reiserfs_blocknr_hint_t * hint)611 static int inline old_way (reiserfs_blocknr_hint_t * hint)
612 {
613 unsigned long border;
614
615 if (hint->formatted_node || hint->inode == NULL) {
616 return 0;
617 }
618
619 border = hint->beg + le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end - hint->beg);
620 if (border > hint->search_start)
621 hint->search_start = border;
622
623 return 1;
624 }
625
hundredth_slices(reiserfs_blocknr_hint_t * hint)626 static void inline hundredth_slices (reiserfs_blocknr_hint_t * hint)
627 {
628 struct key * key = &hint->key;
629 unsigned long slice_start;
630
631 slice_start = (keyed_hash((char*)(&key->k_dir_id),4) % 100) * (hint->end / 100);
632 if ( slice_start > hint->search_start || slice_start + (hint->end / 100) <= hint->search_start) {
633 hint->search_start = slice_start;
634 }
635 }
636
determine_search_start(reiserfs_blocknr_hint_t * hint,int amount_needed)637 static void inline determine_search_start(reiserfs_blocknr_hint_t *hint,
638 int amount_needed)
639 {
640 struct super_block *s = hint->th->t_super;
641 hint->beg = 0;
642 hint->end = SB_BLOCK_COUNT(s) - 1;
643
644 /* This is former border algorithm. Now with tunable border offset */
645 if (concentrating_formatted_nodes(s))
646 set_border_in_hint(s, hint);
647
648 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
649 /* whenever we create a new directory, we displace it. At first we will
650 hash for location, later we might look for a moderately empty place for
651 it */
652 if (displacing_new_packing_localities(s)
653 && hint->th->displace_new_blocks) {
654 displace_new_packing_locality(hint);
655
656 /* we do not continue determine_search_start,
657 * if new packing locality is being displaced */
658 return;
659 }
660 #endif
661
662 /* all persons should feel encouraged to add more special cases here and
663 * test them */
664
665 if (displacing_large_files(s) && !hint->formatted_node
666 && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
667 displace_large_file(hint);
668 return;
669 }
670
671 /* attempt to copy a feature from old block allocator code */
672 if (TEST_OPTION(old_hashed_relocation, s) && !hint->formatted_node) {
673 old_hashed_relocation(hint);
674 }
675
676 /* if none of our special cases is relevant, use the left neighbor in the
677 tree order of the new node we are allocating for */
678 if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes,s)) {
679 hash_formatted_node(hint);
680 return;
681 }
682
683 get_left_neighbor(hint);
684
685 /* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
686 new blocks are displaced based on directory ID. Also, if suggested search_start
687 is less than last preallocated block, we start searching from it, assuming that
688 HDD dataflow is faster in forward direction */
689 if ( TEST_OPTION(old_way, s)) {
690 if (!hint->formatted_node) {
691 if ( !reiserfs_hashed_relocation(s))
692 old_way(hint);
693 else if (!reiserfs_no_unhashed_relocation(s))
694 old_hashed_relocation(hint);
695
696 if ( hint->inode && hint->search_start < hint->inode->u.reiserfs_i.i_prealloc_block)
697 hint->search_start = hint->inode->u.reiserfs_i.i_prealloc_block;
698 }
699 return;
700 }
701
702 /* This is an approach proposed by Hans */
703 if ( TEST_OPTION(hundredth_slices, s) && ! (displacing_large_files(s) && !hint->formatted_node)) {
704 hundredth_slices(hint);
705 return;
706 }
707
708 if (TEST_OPTION(old_hashed_relocation, s))
709 old_hashed_relocation(hint);
710 if (TEST_OPTION(new_hashed_relocation, s))
711 new_hashed_relocation(hint);
712 return;
713 }
714
determine_prealloc_size(reiserfs_blocknr_hint_t * hint)715 static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
716 {
717 /* make minimum size a mount option and benchmark both ways */
718 /* we preallocate blocks only for regular files, specific size */
719 /* benchmark preallocating always and see what happens */
720
721 hint->prealloc_size = 0;
722
723 if (!hint->formatted_node && hint->preallocate) {
724 if (S_ISREG(hint->inode->i_mode)
725 && hint->inode->i_size >= hint->th->t_super->u.reiserfs_sb.s_alloc_options.preallocmin * hint->inode->i_sb->s_blocksize)
726 hint->prealloc_size = hint->th->t_super->u.reiserfs_sb.s_alloc_options.preallocsize - 1;
727 }
728 return CARRY_ON;
729 }
730
731 /* XXX I know it could be merged with upper-level function;
732 but may be result function would be too complex. */
allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,b_blocknr_t * new_blocknrs,b_blocknr_t start,b_blocknr_t finish,int amount_needed,int prealloc_size)733 static inline int allocate_without_wrapping_disk (reiserfs_blocknr_hint_t * hint,
734 b_blocknr_t * new_blocknrs,
735 b_blocknr_t start, b_blocknr_t finish,
736 int amount_needed, int prealloc_size)
737 {
738 int rest = amount_needed;
739 int nr_allocated;
740
741 while (rest > 0) {
742 nr_allocated = scan_bitmap (hint->th, &start, finish, 1,
743 rest + prealloc_size, !hint->formatted_node,
744 hint->block);
745
746 if (nr_allocated == 0) /* no new blocks allocated, return */
747 break;
748
749 /* fill free_blocknrs array first */
750 while (rest > 0 && nr_allocated > 0) {
751 * new_blocknrs ++ = start ++;
752 rest --; nr_allocated --;
753 }
754
755 /* do we have something to fill prealloc. array also ? */
756 if (nr_allocated > 0) {
757 /* it means prealloc_size was greater that 0 and we do preallocation */
758 list_add(&INODE_INFO(hint->inode)->i_prealloc_list,
759 &SB_JOURNAL(hint->th->t_super)->j_prealloc_list);
760 INODE_INFO(hint->inode)->i_prealloc_block = start;
761 INODE_INFO(hint->inode)->i_prealloc_count = nr_allocated;
762 break;
763 }
764 }
765
766 return (amount_needed - rest);
767 }
768
blocknrs_and_prealloc_arrays_from_search_start(reiserfs_blocknr_hint_t * hint,b_blocknr_t * new_blocknrs,int amount_needed)769 static inline int blocknrs_and_prealloc_arrays_from_search_start
770 (reiserfs_blocknr_hint_t *hint, b_blocknr_t *new_blocknrs, int amount_needed)
771 {
772 struct super_block *s = hint->th->t_super;
773 b_blocknr_t start = hint->search_start;
774 b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
775 int second_pass = 0;
776 int nr_allocated = 0;
777
778 determine_prealloc_size(hint);
779 while((nr_allocated
780 += allocate_without_wrapping_disk(hint, new_blocknrs + nr_allocated, start, finish,
781 amount_needed - nr_allocated, hint->prealloc_size))
782 < amount_needed) {
783
784 /* not all blocks were successfully allocated yet*/
785 if (second_pass) { /* it was a second pass; we must free all blocks */
786 while (nr_allocated --)
787 reiserfs_free_block(hint->th, new_blocknrs[nr_allocated]);
788
789 return NO_DISK_SPACE;
790 } else { /* refine search parameters for next pass */
791 second_pass = 1;
792 finish = start;
793 start = 0;
794 continue;
795 }
796 }
797 return CARRY_ON;
798 }
799
800 /* grab new blocknrs from preallocated list */
801 /* return amount still needed after using them */
use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,b_blocknr_t * new_blocknrs,int amount_needed)802 static int use_preallocated_list_if_available (reiserfs_blocknr_hint_t *hint,
803 b_blocknr_t *new_blocknrs, int amount_needed)
804 {
805 struct inode * inode = hint->inode;
806
807 if (INODE_INFO(inode)->i_prealloc_count > 0) {
808 while (amount_needed) {
809
810 *new_blocknrs ++ = INODE_INFO(inode)->i_prealloc_block ++;
811 INODE_INFO(inode)->i_prealloc_count --;
812
813 amount_needed --;
814
815 if (INODE_INFO(inode)->i_prealloc_count <= 0) {
816 list_del(&inode->u.reiserfs_i.i_prealloc_list);
817 break;
818 }
819 }
820 }
821 /* return amount still needed after using preallocated blocks */
822 return amount_needed;
823 }
824
reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint,b_blocknr_t * new_blocknrs,int amount_needed,int reserved_by_us)825 int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *hint,
826 b_blocknr_t * new_blocknrs, int amount_needed,
827 int reserved_by_us /* Amount of blocks we have
828 already reserved */)
829 {
830 int initial_amount_needed = amount_needed;
831 int ret;
832
833 /* Check if there is enough space, taking into account reserved space */
834 if ( SB_FREE_BLOCKS(hint->th->t_super) - hint->th->t_super->u.reiserfs_sb.reserved_blocks <
835 amount_needed - reserved_by_us)
836 return NO_DISK_SPACE;
837 /* should this be if !hint->inode && hint->preallocate? */
838 /* do you mean hint->formatted_node can be removed ? - Zam */
839 /* hint->formatted_node cannot be removed because we try to access
840 inode information here, and there is often no inode assotiated with
841 metadata allocations - green */
842
843 if (!hint->formatted_node && hint->preallocate) {
844 amount_needed = use_preallocated_list_if_available
845 (hint, new_blocknrs, amount_needed);
846 if (amount_needed == 0) /* all blocknrs we need we got from
847 prealloc. list */
848 return CARRY_ON;
849 new_blocknrs += (initial_amount_needed - amount_needed);
850 }
851
852 /* find search start and save it in hint structure */
853 determine_search_start(hint, amount_needed);
854
855 /* allocation itself; fill new_blocknrs and preallocation arrays */
856 ret = blocknrs_and_prealloc_arrays_from_search_start
857 (hint, new_blocknrs, amount_needed);
858
859 /* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
860 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
861 * variant) */
862
863 if (ret != CARRY_ON) {
864 while (amount_needed ++ < initial_amount_needed) {
865 reiserfs_free_block(hint->th, *(--new_blocknrs));
866 }
867 }
868 return ret;
869 }
870
871 /* These 2 functions are here to provide blocks reservation to the rest of kernel */
872 /* Reserve @blocks amount of blocks in fs pointed by @sb. Caller must make sure
873 there are actually this much blocks on the FS available */
reiserfs_claim_blocks_to_be_allocated(struct super_block * sb,int blocks)874 void reiserfs_claim_blocks_to_be_allocated(
875 struct super_block *sb, /* super block of
876 filesystem where
877 blocks should be
878 reserved */
879 int blocks /* How much to reserve */
880 )
881 {
882
883 /* Fast case, if reservation is zero - exit immediately. */
884 if ( !blocks )
885 return;
886
887 sb->u.reiserfs_sb.reserved_blocks += blocks;
888 }
889
890 /* Unreserve @blocks amount of blocks in fs pointed by @sb */
reiserfs_release_claimed_blocks(struct super_block * sb,int blocks)891 void reiserfs_release_claimed_blocks(
892 struct super_block *sb, /* super block of
893 filesystem where
894 blocks should be
895 reserved */
896 int blocks /* How much to unreserve */
897 )
898 {
899
900 /* Fast case, if unreservation is zero - exit immediately. */
901 if ( !blocks )
902 return;
903
904 sb->u.reiserfs_sb.reserved_blocks -= blocks;
905 RFALSE( sb->u.reiserfs_sb.reserved_blocks < 0, "amount of blocks reserved became zero?");
906 }
907