1 /*
2 * Copyright 2000-2002 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5 /* Now we have all buffers that must be used in balancing of the tree */
6 /* Further calculations can not cause schedule(), and thus the buffer */
7 /* tree will be stable until the balancing will be finished */
8 /* balance the tree according to the analysis made before, */
9 /* and using buffers obtained after all above. */
10
11
12 /**
13 ** balance_leaf_when_delete
14 ** balance_leaf
15 ** do_balance
16 **
17 **/
18
19 #include <linux/config.h>
20 #include <asm/uaccess.h>
21 #include <linux/sched.h>
22 #include <linux/reiserfs_fs.h>
23
24 #ifdef CONFIG_REISERFS_CHECK
25
26 struct tree_balance * cur_tb = NULL; /* detects whether more than one
27 copy of tb exists as a means
28 of checking whether schedule
29 is interrupting do_balance */
30 #endif
31
32
do_balance_mark_leaf_dirty(struct tree_balance * tb,struct buffer_head * bh,int flag)33 inline void do_balance_mark_leaf_dirty (struct tree_balance * tb,
34 struct buffer_head * bh, int flag)
35 {
36 if (reiserfs_dont_log(tb->tb_sb)) {
37 if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
38 __mark_buffer_dirty(bh) ;
39 tb->need_balance_dirty = 1;
40 }
41 } else {
42 int windex = push_journal_writer("do_balance") ;
43 journal_mark_dirty(tb->transaction_handle, tb->transaction_handle->t_super, bh) ;
44 pop_journal_writer(windex) ;
45 }
46 }
47
48 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
49 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
50
51
52 /* summary:
53 if deleting something ( tb->insert_size[0] < 0 )
54 return(balance_leaf_when_delete()); (flag d handled here)
55 else
56 if lnum is larger than 0 we put items into the left node
57 if rnum is larger than 0 we put items into the right node
58 if snum1 is larger than 0 we put items into the new node s1
59 if snum2 is larger than 0 we put items into the new node s2
60 Note that all *num* count new items being created.
61
62 It would be easier to read balance_leaf() if each of these summary
63 lines was a separate procedure rather than being inlined. I think
64 that there are many passages here and in balance_leaf_when_delete() in
65 which two calls to one procedure can replace two passages, and it
66 might save cache space and improve software maintenance costs to do so.
67
68 Vladimir made the perceptive comment that we should offload most of
69 the decision making in this function into fix_nodes/check_balance, and
70 then create some sort of structure in tb that says what actions should
71 be performed by do_balance.
72
73 -Hans */
74
75
76
77 /* Balance leaf node in case of delete or cut: insert_size[0] < 0
78 *
79 * lnum, rnum can have values >= -1
80 * -1 means that the neighbor must be joined with S
81 * 0 means that nothing should be done with the neighbor
82 * >0 means to shift entirely or partly the specified number of items to the neighbor
83 */
balance_leaf_when_delete(struct tree_balance * tb,int flag)84 static int balance_leaf_when_delete (struct tree_balance * tb, int flag)
85 {
86 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
87 int item_pos = PATH_LAST_POSITION (tb->tb_path);
88 int pos_in_item = tb->tb_path->pos_in_item;
89 struct buffer_info bi;
90 int n;
91 struct item_head * ih;
92
93 RFALSE( tb->FR[0] && B_LEVEL (tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
94 "vs- 12000: level: wrong FR %z\n", tb->FR[0]);
95 RFALSE( tb->blknum[0] > 1,
96 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
97 RFALSE( ! tb->blknum[0] && ! PATH_H_PPARENT(tb->tb_path, 0),
98 "PAP-12010: tree can not be empty");
99
100 ih = B_N_PITEM_HEAD (tbS0, item_pos);
101
102 /* Delete or truncate the item */
103
104 switch (flag) {
105 case M_DELETE: /* delete item in S[0] */
106
107 RFALSE( ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
108 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
109 -tb->insert_size [0], ih);
110
111 bi.tb = tb;
112 bi.bi_bh = tbS0;
113 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
114 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
115 leaf_delete_items (&bi, 0, item_pos, 1, -1);
116
117 if ( ! item_pos && tb->CFL[0] ) {
118 if ( B_NR_ITEMS(tbS0) ) {
119 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
120 }
121 else {
122 if ( ! PATH_H_POSITION (tb->tb_path, 1) )
123 replace_key(tb, tb->CFL[0],tb->lkey[0],PATH_H_PPARENT(tb->tb_path, 0),0);
124 }
125 }
126
127 RFALSE( ! item_pos && !tb->CFL[0],
128 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], tb->L[0]);
129
130 break;
131
132 case M_CUT: { /* cut item in S[0] */
133 bi.tb = tb;
134 bi.bi_bh = tbS0;
135 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
136 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
137 if (is_direntry_le_ih (ih)) {
138
139 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
140 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
141 tb->insert_size[0] = -1;
142 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
143
144 RFALSE( ! item_pos && ! pos_in_item && ! tb->CFL[0],
145 "PAP-12030: can not change delimiting key. CFL[0]=%p",
146 tb->CFL[0]);
147
148 if ( ! item_pos && ! pos_in_item && tb->CFL[0] ) {
149 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
150 }
151 } else {
152 leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
153
154 RFALSE( ! ih_item_len(ih),
155 "PAP-12035: cut must leave non-zero dynamic length of item");
156 }
157 break;
158 }
159
160 default:
161 print_cur_tb ("12040");
162 reiserfs_panic (tb->tb_sb, "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
163 (flag == M_PASTE) ? "PASTE" : ((flag == M_INSERT) ? "INSERT" : "UNKNOWN"), flag);
164 }
165
166 /* the rule is that no shifting occurs unless by shifting a node can be freed */
167 n = B_NR_ITEMS(tbS0);
168 if ( tb->lnum[0] ) /* L[0] takes part in balancing */
169 {
170 if ( tb->lnum[0] == -1 ) /* L[0] must be joined with S[0] */
171 {
172 if ( tb->rnum[0] == -1 ) /* R[0] must be also joined with S[0] */
173 {
174 if ( tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0) )
175 {
176 /* all contents of all the 3 buffers will be in L[0] */
177 if ( PATH_H_POSITION (tb->tb_path, 1) == 0 && 1 < B_NR_ITEMS(tb->FR[0]) )
178 replace_key(tb, tb->CFL[0],tb->lkey[0],tb->FR[0],1);
179
180 leaf_move_items (LEAF_FROM_S_TO_L, tb, n, -1, 0);
181 leaf_move_items (LEAF_FROM_R_TO_L, tb, B_NR_ITEMS(tb->R[0]), -1, 0);
182
183 reiserfs_invalidate_buffer (tb, tbS0);
184 reiserfs_invalidate_buffer (tb, tb->R[0]);
185
186 return 0;
187 }
188 /* all contents of all the 3 buffers will be in R[0] */
189 leaf_move_items (LEAF_FROM_S_TO_R, tb, n, -1, 0);
190 leaf_move_items (LEAF_FROM_L_TO_R, tb, B_NR_ITEMS(tb->L[0]), -1, 0);
191
192 /* right_delimiting_key is correct in R[0] */
193 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
194
195 reiserfs_invalidate_buffer (tb, tbS0);
196 reiserfs_invalidate_buffer (tb, tb->L[0]);
197
198 return -1;
199 }
200
201 RFALSE( tb->rnum[0] != 0,
202 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
203 /* all contents of L[0] and S[0] will be in L[0] */
204 leaf_shift_left(tb, n, -1);
205
206 reiserfs_invalidate_buffer (tb, tbS0);
207
208 return 0;
209 }
210 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
211
212 RFALSE( ( tb->lnum[0] + tb->rnum[0] < n ) ||
213 ( tb->lnum[0] + tb->rnum[0] > n+1 ),
214 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
215 tb->rnum[0], tb->lnum[0], n);
216 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n ) &&
217 (tb->lbytes != -1 || tb->rbytes != -1),
218 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
219 tb->rbytes, tb->lbytes);
220 RFALSE( ( tb->lnum[0] + tb->rnum[0] == n + 1 ) &&
221 (tb->lbytes < 1 || tb->rbytes != -1),
222 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
223 tb->rbytes, tb->lbytes);
224
225 leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
226 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
227
228 reiserfs_invalidate_buffer (tb, tbS0);
229
230 return 0;
231 }
232
233 if ( tb->rnum[0] == -1 ) {
234 /* all contents of R[0] and S[0] will be in R[0] */
235 leaf_shift_right(tb, n, -1);
236 reiserfs_invalidate_buffer (tb, tbS0);
237 return 0;
238 }
239
240 RFALSE( tb->rnum[0],
241 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
242 return 0;
243 }
244
245
balance_leaf(struct tree_balance * tb,struct item_head * ih,const char * body,int flag,struct item_head * insert_key,struct buffer_head ** insert_ptr)246 static int balance_leaf (struct tree_balance * tb,
247 struct item_head * ih, /* item header of inserted item (this is on little endian) */
248 const char * body, /* body of inserted item or bytes to paste */
249 int flag, /* i - insert, d - delete, c - cut, p - paste
250 (see comment to do_balance) */
251 struct item_head * insert_key, /* in our processing of one level we sometimes determine what
252 must be inserted into the next higher level. This insertion
253 consists of a key or two keys and their corresponding
254 pointers */
255 struct buffer_head ** insert_ptr /* inserted node-ptrs for the next level */
256 )
257 {
258 struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
259 int item_pos = PATH_LAST_POSITION (tb->tb_path); /* index into the array of item headers in S[0]
260 of the affected item */
261 struct buffer_info bi;
262 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
263 int snum[2]; /* number of items that will be placed
264 into S_new (includes partially shifted
265 items) */
266 int sbytes[2]; /* if an item is partially shifted into S_new then
267 if it is a directory item
268 it is the number of entries from the item that are shifted into S_new
269 else
270 it is the number of bytes from the item that are shifted into S_new
271 */
272 int n, i;
273 int ret_val;
274 int pos_in_item;
275 int zeros_num;
276
277 PROC_INFO_INC( tb -> tb_sb, balance_at[ 0 ] );
278
279 /* Make balance in case insert_size[0] < 0 */
280 if ( tb->insert_size[0] < 0 )
281 return balance_leaf_when_delete (tb, flag);
282
283 zeros_num = 0;
284 if (flag == M_INSERT && body == 0)
285 zeros_num = ih_item_len( ih );
286
287 pos_in_item = tb->tb_path->pos_in_item;
288 /* for indirect item pos_in_item is measured in unformatted node
289 pointers. Recalculate to bytes */
290 if (flag != M_INSERT && is_indirect_le_ih (B_N_PITEM_HEAD (tbS0, item_pos)))
291 pos_in_item *= UNFM_P_SIZE;
292
293 if ( tb->lnum[0] > 0 ) {
294 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
295 if ( item_pos < tb->lnum[0] ) {
296 /* new item or it part falls to L[0], shift it too */
297 n = B_NR_ITEMS(tb->L[0]);
298
299 switch (flag) {
300 case M_INSERT: /* insert item into L[0] */
301
302 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
303 /* part of new item falls into L[0] */
304 int new_item_len;
305 int version;
306
307 ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1);
308
309 /* Calculate item length to insert to S[0] */
310 new_item_len = ih_item_len(ih) - tb->lbytes;
311 /* Calculate and check item length to insert to L[0] */
312 put_ih_item_len(ih, ih_item_len(ih) - new_item_len );
313
314 RFALSE( ih_item_len(ih) <= 0,
315 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
316 ih_item_len(ih));
317
318 /* Insert new item into L[0] */
319 bi.tb = tb;
320 bi.bi_bh = tb->L[0];
321 bi.bi_parent = tb->FL[0];
322 bi.bi_position = get_left_neighbor_position (tb, 0);
323 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body,
324 zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
325
326 version = ih_version (ih);
327
328 /* Calculate key component, item length and body to insert into S[0] */
329 set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + (tb->lbytes << (is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits-UNFM_P_SHIFT:0)) );
330
331 put_ih_item_len( ih, new_item_len );
332 if ( tb->lbytes > zeros_num ) {
333 body += (tb->lbytes - zeros_num);
334 zeros_num = 0;
335 }
336 else
337 zeros_num -= tb->lbytes;
338
339 RFALSE( ih_item_len(ih) <= 0,
340 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
341 ih_item_len(ih));
342 } else {
343 /* new item in whole falls into L[0] */
344 /* Shift lnum[0]-1 items to L[0] */
345 ret_val = leaf_shift_left(tb, tb->lnum[0]-1, tb->lbytes);
346 /* Insert new item into L[0] */
347 bi.tb = tb;
348 bi.bi_bh = tb->L[0];
349 bi.bi_parent = tb->FL[0];
350 bi.bi_position = get_left_neighbor_position (tb, 0);
351 leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, zeros_num);
352 tb->insert_size[0] = 0;
353 zeros_num = 0;
354 }
355 break;
356
357 case M_PASTE: /* append item in L[0] */
358
359 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
360 /* we must shift the part of the appended item */
361 if ( is_direntry_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) {
362
363 RFALSE( zeros_num,
364 "PAP-12090: illegal parameter in case of a directory");
365 /* directory item */
366 if ( tb->lbytes > pos_in_item ) {
367 /* new directory entry falls into L[0] */
368 struct item_head * pasted;
369 int l_pos_in_item = pos_in_item;
370
371 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
372 ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1);
373 if ( ret_val && ! item_pos ) {
374 pasted = B_N_PITEM_HEAD(tb->L[0],B_NR_ITEMS(tb->L[0])-1);
375 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes-1);
376 }
377
378 /* Append given directory entry to directory item */
379 bi.tb = tb;
380 bi.bi_bh = tb->L[0];
381 bi.bi_parent = tb->FL[0];
382 bi.bi_position = get_left_neighbor_position (tb, 0);
383 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, l_pos_in_item,
384 tb->insert_size[0], body, zeros_num);
385
386 /* previous string prepared space for pasting new entry, following string pastes this entry */
387
388 /* when we have merge directory item, pos_in_item has been changed too */
389
390 /* paste new directory entry. 1 is entry number */
391 leaf_paste_entries (bi.bi_bh, n + item_pos - ret_val, l_pos_in_item, 1,
392 (struct reiserfs_de_head *)body,
393 body + DEH_SIZE, tb->insert_size[0]
394 );
395 tb->insert_size[0] = 0;
396 } else {
397 /* new directory item doesn't fall into L[0] */
398 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
399 leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
400 }
401 /* Calculate new position to append in item body */
402 pos_in_item -= tb->lbytes;
403 }
404 else {
405 /* regular object */
406 RFALSE( tb->lbytes <= 0,
407 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
408 tb->lbytes);
409 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
410 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
411 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)), pos_in_item);
412
413 if ( tb->lbytes >= pos_in_item ) {
414 /* appended item will be in L[0] in whole */
415 int l_n;
416
417 /* this bytes number must be appended to the last item of L[h] */
418 l_n = tb->lbytes - pos_in_item;
419
420 /* Calculate new insert_size[0] */
421 tb->insert_size[0] -= l_n;
422
423 RFALSE( tb->insert_size[0] <= 0,
424 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
425 tb->insert_size[0]);
426 ret_val = leaf_shift_left(tb,tb->lnum[0],
427 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)));
428 /* Append to body of item in L[0] */
429 bi.tb = tb;
430 bi.bi_bh = tb->L[0];
431 bi.bi_parent = tb->FL[0];
432 bi.bi_position = get_left_neighbor_position (tb, 0);
433 leaf_paste_in_buffer(
434 &bi,n + item_pos - ret_val,
435 ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)),
436 l_n,body, zeros_num > l_n ? l_n : zeros_num
437 );
438 /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/
439 {
440 int version;
441 int temp_l = l_n;
442
443 RFALSE (ih_item_len (B_N_PITEM_HEAD (tbS0, 0)),
444 "PAP-12106: item length must be 0");
445 RFALSE (comp_short_le_keys (B_N_PKEY (tbS0, 0),
446 B_N_PKEY (tb->L[0],
447 n + item_pos - ret_val)),
448 "PAP-12107: items must be of the same file");
449 if (is_indirect_le_ih(B_N_PITEM_HEAD (tb->L[0],
450 n + item_pos - ret_val))) {
451 temp_l = l_n << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
452 }
453 /* update key of first item in S0 */
454 version = ih_version (B_N_PITEM_HEAD (tbS0, 0));
455 set_le_key_k_offset (version, B_N_PKEY (tbS0, 0),
456 le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + temp_l);
457 /* update left delimiting key */
458 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
459 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + temp_l);
460 }
461
462 /* Calculate new body, position in item and insert_size[0] */
463 if ( l_n > zeros_num ) {
464 body += (l_n - zeros_num);
465 zeros_num = 0;
466 }
467 else
468 zeros_num -= l_n;
469 pos_in_item = 0;
470
471 RFALSE( comp_short_le_keys
472 (B_N_PKEY(tbS0,0),
473 B_N_PKEY(tb->L[0],B_NR_ITEMS(tb->L[0])-1)) ||
474
475 !op_is_left_mergeable
476 (B_N_PKEY (tbS0, 0), tbS0->b_size) ||
477 !op_is_left_mergeable
478 (B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
479 tbS0->b_size),
480 "PAP-12120: item must be merge-able with left neighboring item");
481 }
482 else /* only part of the appended item will be in L[0] */
483 {
484 /* Calculate position in item for append in S[0] */
485 pos_in_item -= tb->lbytes;
486
487 RFALSE( pos_in_item <= 0,
488 "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
489
490 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
491 leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
492 }
493 }
494 }
495 else /* appended item will be in L[0] in whole */
496 {
497 struct item_head * pasted;
498
499 if ( ! item_pos && op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) )
500 { /* if we paste into first item of S[0] and it is left mergable */
501 /* then increment pos_in_item by the size of the last item in L[0] */
502 pasted = B_N_PITEM_HEAD(tb->L[0],n-1);
503 if ( is_direntry_le_ih (pasted) )
504 pos_in_item += ih_entry_count(pasted);
505 else
506 pos_in_item += ih_item_len(pasted);
507 }
508
509 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
510 ret_val = leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
511 /* Append to body of item in L[0] */
512 bi.tb = tb;
513 bi.bi_bh = tb->L[0];
514 bi.bi_parent = tb->FL[0];
515 bi.bi_position = get_left_neighbor_position (tb, 0);
516 leaf_paste_in_buffer (&bi, n + item_pos - ret_val, pos_in_item, tb->insert_size[0],
517 body, zeros_num);
518
519 /* if appended item is directory, paste entry */
520 pasted = B_N_PITEM_HEAD (tb->L[0], n + item_pos - ret_val);
521 if (is_direntry_le_ih (pasted))
522 leaf_paste_entries (
523 bi.bi_bh, n + item_pos - ret_val, pos_in_item, 1,
524 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
525 );
526 /* if appended item is indirect item, put unformatted node into un list */
527 if (is_indirect_le_ih (pasted))
528 set_ih_free_space (pasted, 0);
529 tb->insert_size[0] = 0;
530 zeros_num = 0;
531 }
532 break;
533 default: /* cases d and t */
534 reiserfs_panic (tb->tb_sb, "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
535 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
536 }
537 } else {
538 /* new item doesn't fall into L[0] */
539 leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
540 }
541 } /* tb->lnum[0] > 0 */
542
543 /* Calculate new item position */
544 item_pos -= ( tb->lnum[0] - (( tb->lbytes != -1 ) ? 1 : 0));
545
546 if ( tb->rnum[0] > 0 ) {
547 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
548 n = B_NR_ITEMS(tbS0);
549 switch ( flag ) {
550
551 case M_INSERT: /* insert item */
552 if ( n - tb->rnum[0] < item_pos )
553 { /* new item or its part falls to R[0] */
554 if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 )
555 { /* part of new item falls into R[0] */
556 loff_t old_key_comp, old_len, r_zeros_number;
557 const char * r_body;
558 int version;
559 loff_t offset;
560
561 leaf_shift_right(tb,tb->rnum[0]-1,-1);
562
563 version = ih_version(ih);
564 /* Remember key component and item length */
565 old_key_comp = le_ih_k_offset( ih );
566 old_len = ih_item_len(ih);
567
568 /* Calculate key component and item length to insert into R[0] */
569 offset = le_ih_k_offset( ih ) + ((old_len - tb->rbytes )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits-UNFM_P_SHIFT:0));
570 set_le_ih_k_offset( ih, offset );
571 put_ih_item_len( ih, tb->rbytes);
572 /* Insert part of the item into R[0] */
573 bi.tb = tb;
574 bi.bi_bh = tb->R[0];
575 bi.bi_parent = tb->FR[0];
576 bi.bi_position = get_right_neighbor_position (tb, 0);
577 if ( (old_len - tb->rbytes) > zeros_num ) {
578 r_zeros_number = 0;
579 r_body = body + (old_len - tb->rbytes) - zeros_num;
580 }
581 else {
582 r_body = body;
583 r_zeros_number = zeros_num - (old_len - tb->rbytes);
584 zeros_num -= r_zeros_number;
585 }
586
587 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
588
589 /* Replace right delimiting key by first key in R[0] */
590 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
591
592 /* Calculate key component and item length to insert into S[0] */
593 set_le_ih_k_offset( ih, old_key_comp );
594 put_ih_item_len( ih, old_len - tb->rbytes );
595
596 tb->insert_size[0] -= tb->rbytes;
597
598 }
599 else /* whole new item falls into R[0] */
600 {
601 /* Shift rnum[0]-1 items to R[0] */
602 ret_val = leaf_shift_right(tb,tb->rnum[0]-1,tb->rbytes);
603 /* Insert new item into R[0] */
604 bi.tb = tb;
605 bi.bi_bh = tb->R[0];
606 bi.bi_parent = tb->FR[0];
607 bi.bi_position = get_right_neighbor_position (tb, 0);
608 leaf_insert_into_buf (&bi, item_pos - n + tb->rnum[0] - 1, ih, body, zeros_num);
609
610 if ( item_pos - n + tb->rnum[0] - 1 == 0 ) {
611 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
612
613 }
614 zeros_num = tb->insert_size[0] = 0;
615 }
616 }
617 else /* new item or part of it doesn't fall into R[0] */
618 {
619 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
620 }
621 break;
622
623 case M_PASTE: /* append item */
624
625 if ( n - tb->rnum[0] <= item_pos ) /* pasted item or part of it falls to R[0] */
626 {
627 if ( item_pos == n - tb->rnum[0] && tb->rbytes != -1 )
628 { /* we must shift the part of the appended item */
629 if ( is_direntry_le_ih (B_N_PITEM_HEAD(tbS0, item_pos)))
630 { /* we append to directory item */
631 int entry_count;
632
633 RFALSE( zeros_num,
634 "PAP-12145: illegal parametr in case of a directory");
635 entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD(tbS0, item_pos));
636 if ( entry_count - tb->rbytes < pos_in_item )
637 /* new directory entry falls into R[0] */
638 {
639 int paste_entry_position;
640
641 RFALSE( tb->rbytes - 1 >= entry_count ||
642 ! tb->insert_size[0],
643 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
644 tb->rbytes, entry_count);
645 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
646 leaf_shift_right(tb,tb->rnum[0],tb->rbytes - 1);
647 /* Paste given directory entry to directory item */
648 paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
649 bi.tb = tb;
650 bi.bi_bh = tb->R[0];
651 bi.bi_parent = tb->FR[0];
652 bi.bi_position = get_right_neighbor_position (tb, 0);
653 leaf_paste_in_buffer (&bi, 0, paste_entry_position,
654 tb->insert_size[0],body,zeros_num);
655 /* paste entry */
656 leaf_paste_entries (
657 bi.bi_bh, 0, paste_entry_position, 1, (struct reiserfs_de_head *)body,
658 body + DEH_SIZE, tb->insert_size[0]
659 );
660
661 if ( paste_entry_position == 0 ) {
662 /* change delimiting keys */
663 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
664 }
665
666 tb->insert_size[0] = 0;
667 pos_in_item++;
668 }
669 else /* new directory entry doesn't fall into R[0] */
670 {
671 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
672 }
673 }
674 else /* regular object */
675 {
676 int n_shift, n_rem, r_zeros_number;
677 const char * r_body;
678
679 /* Calculate number of bytes which must be shifted from appended item */
680 if ( (n_shift = tb->rbytes - tb->insert_size[0]) < 0 )
681 n_shift = 0;
682
683 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD (tbS0, item_pos)),
684 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
685 pos_in_item, ih_item_len( B_N_PITEM_HEAD(tbS0,item_pos)));
686
687 leaf_shift_right(tb,tb->rnum[0],n_shift);
688 /* Calculate number of bytes which must remain in body after appending to R[0] */
689 if ( (n_rem = tb->insert_size[0] - tb->rbytes) < 0 )
690 n_rem = 0;
691
692 {
693 int version;
694 unsigned long temp_rem = n_rem;
695
696 version = ih_version (B_N_PITEM_HEAD (tb->R[0],0));
697 if (is_indirect_le_key(version,B_N_PKEY(tb->R[0],0))){
698 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits -
699 UNFM_P_SHIFT);
700 }
701 set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0),
702 le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + temp_rem);
703 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]),
704 le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + temp_rem);
705 }
706 /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
707 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
708 do_balance_mark_internal_dirty (tb, tb->CFR[0], 0);
709
710 /* Append part of body into R[0] */
711 bi.tb = tb;
712 bi.bi_bh = tb->R[0];
713 bi.bi_parent = tb->FR[0];
714 bi.bi_position = get_right_neighbor_position (tb, 0);
715 if ( n_rem > zeros_num ) {
716 r_zeros_number = 0;
717 r_body = body + n_rem - zeros_num;
718 }
719 else {
720 r_body = body;
721 r_zeros_number = zeros_num - n_rem;
722 zeros_num -= r_zeros_number;
723 }
724
725 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number);
726
727 if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) {
728 #if 0
729 RFALSE( n_rem,
730 "PAP-12160: paste more than one unformatted node pointer");
731 #endif
732 set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), 0);
733 }
734 tb->insert_size[0] = n_rem;
735 if ( ! n_rem )
736 pos_in_item ++;
737 }
738 }
739 else /* pasted item in whole falls into R[0] */
740 {
741 struct item_head * pasted;
742
743 ret_val = leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
744 /* append item in R[0] */
745 if ( pos_in_item >= 0 ) {
746 bi.tb = tb;
747 bi.bi_bh = tb->R[0];
748 bi.bi_parent = tb->FR[0];
749 bi.bi_position = get_right_neighbor_position (tb, 0);
750 leaf_paste_in_buffer(&bi,item_pos - n + tb->rnum[0], pos_in_item,
751 tb->insert_size[0],body, zeros_num);
752 }
753
754 /* paste new entry, if item is directory item */
755 pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
756 if (is_direntry_le_ih (pasted) && pos_in_item >= 0 ) {
757 leaf_paste_entries (
758 bi.bi_bh, item_pos - n + tb->rnum[0], pos_in_item, 1,
759 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
760 );
761 if ( ! pos_in_item ) {
762
763 RFALSE( item_pos - n + tb->rnum[0],
764 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
765
766 /* update delimiting keys */
767 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
768 }
769 }
770
771 if (is_indirect_le_ih (pasted))
772 set_ih_free_space (pasted, 0);
773 zeros_num = tb->insert_size[0] = 0;
774 }
775 }
776 else /* new item doesn't fall into R[0] */
777 {
778 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
779 }
780 break;
781 default: /* cases d and t */
782 reiserfs_panic (tb->tb_sb, "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
783 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
784 }
785
786 } /* tb->rnum[0] > 0 */
787
788
789 RFALSE( tb->blknum[0] > 3,
790 "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
791 RFALSE( tb->blknum[0] < 0,
792 "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
793
794 /* if while adding to a node we discover that it is possible to split
795 it in two, and merge the left part into the left neighbor and the
796 right part into the right neighbor, eliminating the node */
797 if ( tb->blknum[0] == 0 ) { /* node S[0] is empty now */
798
799 RFALSE( ! tb->lnum[0] || ! tb->rnum[0],
800 "PAP-12190: lnum and rnum must not be zero");
801 /* if insertion was done before 0-th position in R[0], right
802 delimiting key of the tb->L[0]'s and left delimiting key are
803 not set correctly */
804 if (tb->CFL[0]) {
805 if (!tb->CFR[0])
806 reiserfs_panic (tb->tb_sb, "vs-12195: balance_leaf: CFR not initialized");
807 copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]));
808 do_balance_mark_internal_dirty (tb, tb->CFL[0], 0);
809 }
810
811 reiserfs_invalidate_buffer(tb,tbS0);
812 return 0;
813 }
814
815
816 /* Fill new nodes that appear in place of S[0] */
817
818 /* I am told that this copying is because we need an array to enable
819 the looping code. -Hans */
820 snum[0] = tb->s1num,
821 snum[1] = tb->s2num;
822 sbytes[0] = tb->s1bytes;
823 sbytes[1] = tb->s2bytes;
824 for( i = tb->blknum[0] - 2; i >= 0; i-- ) {
825
826 RFALSE( !snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, snum[i]);
827
828 /* here we shift from S to S_new nodes */
829
830 S_new[i] = get_FEB(tb);
831
832 /* initialized block type and tree level */
833 set_blkh_level( B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL );
834
835
836 n = B_NR_ITEMS(tbS0);
837
838 switch (flag) {
839 case M_INSERT: /* insert item */
840
841 if ( n - snum[i] < item_pos )
842 { /* new item or it's part falls to first new node S_new[i]*/
843 if ( item_pos == n - snum[i] + 1 && sbytes[i] != -1 )
844 { /* part of new item falls into S_new[i] */
845 int old_key_comp, old_len, r_zeros_number;
846 const char * r_body;
847 int version;
848
849 /* Move snum[i]-1 items from S[0] to S_new[i] */
850 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]);
851 /* Remember key component and item length */
852 version = ih_version (ih);
853 old_key_comp = le_ih_k_offset( ih );
854 old_len = ih_item_len(ih);
855
856 /* Calculate key component and item length to insert into S_new[i] */
857 set_le_ih_k_offset( ih,
858 le_ih_k_offset(ih) + ((old_len - sbytes[i] )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits-UNFM_P_SHIFT:0) ));
859
860 put_ih_item_len( ih, sbytes[i] );
861
862 /* Insert part of the item into S_new[i] before 0-th item */
863 bi.tb = tb;
864 bi.bi_bh = S_new[i];
865 bi.bi_parent = 0;
866 bi.bi_position = 0;
867
868 if ( (old_len - sbytes[i]) > zeros_num ) {
869 r_zeros_number = 0;
870 r_body = body + (old_len - sbytes[i]) - zeros_num;
871 }
872 else {
873 r_body = body;
874 r_zeros_number = zeros_num - (old_len - sbytes[i]);
875 zeros_num -= r_zeros_number;
876 }
877
878 leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
879
880 /* Calculate key component and item length to insert into S[i] */
881 set_le_ih_k_offset( ih, old_key_comp );
882 put_ih_item_len( ih, old_len - sbytes[i] );
883 tb->insert_size[0] -= sbytes[i];
884 }
885 else /* whole new item falls into S_new[i] */
886 {
887 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
888 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, sbytes[i], S_new[i]);
889
890 /* Insert new item into S_new[i] */
891 bi.tb = tb;
892 bi.bi_bh = S_new[i];
893 bi.bi_parent = 0;
894 bi.bi_position = 0;
895 leaf_insert_into_buf (&bi, item_pos - n + snum[i] - 1, ih, body, zeros_num);
896
897 zeros_num = tb->insert_size[0] = 0;
898 }
899 }
900
901 else /* new item or it part don't falls into S_new[i] */
902 {
903 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
904 }
905 break;
906
907 case M_PASTE: /* append item */
908
909 if ( n - snum[i] <= item_pos ) /* pasted item or part if it falls to S_new[i] */
910 {
911 if ( item_pos == n - snum[i] && sbytes[i] != -1 )
912 { /* we must shift part of the appended item */
913 struct item_head * aux_ih;
914
915 RFALSE( ih, "PAP-12210: ih must be 0");
916
917 if ( is_direntry_le_ih (aux_ih = B_N_PITEM_HEAD(tbS0,item_pos))) {
918 /* we append to directory item */
919
920 int entry_count;
921
922 entry_count = ih_entry_count(aux_ih);
923
924 if ( entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count ) {
925 /* new directory entry falls into S_new[i] */
926
927 RFALSE( ! tb->insert_size[0],
928 "PAP-12215: insert_size is already 0");
929 RFALSE( sbytes[i] - 1 >= entry_count,
930 "PAP-12220: there are no so much entries (%d), only %d",
931 sbytes[i] - 1, entry_count);
932
933 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
934 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i]-1, S_new[i]);
935 /* Paste given directory entry to directory item */
936 bi.tb = tb;
937 bi.bi_bh = S_new[i];
938 bi.bi_parent = 0;
939 bi.bi_position = 0;
940 leaf_paste_in_buffer (&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
941 tb->insert_size[0], body,zeros_num);
942 /* paste new directory entry */
943 leaf_paste_entries (
944 bi.bi_bh, 0, pos_in_item - entry_count + sbytes[i] - 1,
945 1, (struct reiserfs_de_head *)body, body + DEH_SIZE,
946 tb->insert_size[0]
947 );
948 tb->insert_size[0] = 0;
949 pos_in_item++;
950 } else { /* new directory entry doesn't fall into S_new[i] */
951 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
952 }
953 }
954 else /* regular object */
955 {
956 int n_shift, n_rem, r_zeros_number;
957 const char * r_body;
958
959 RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)) ||
960 tb->insert_size[0] <= 0,
961 "PAP-12225: item too short or insert_size <= 0");
962
963 /* Calculate number of bytes which must be shifted from appended item */
964 n_shift = sbytes[i] - tb->insert_size[0];
965 if ( n_shift < 0 )
966 n_shift = 0;
967 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
968
969 /* Calculate number of bytes which must remain in body after append to S_new[i] */
970 n_rem = tb->insert_size[0] - sbytes[i];
971 if ( n_rem < 0 )
972 n_rem = 0;
973 /* Append part of body into S_new[0] */
974 bi.tb = tb;
975 bi.bi_bh = S_new[i];
976 bi.bi_parent = 0;
977 bi.bi_position = 0;
978
979 if ( n_rem > zeros_num ) {
980 r_zeros_number = 0;
981 r_body = body + n_rem - zeros_num;
982 }
983 else {
984 r_body = body;
985 r_zeros_number = zeros_num - n_rem;
986 zeros_num -= r_zeros_number;
987 }
988
989 leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0]-n_rem, r_body,r_zeros_number);
990 {
991 struct item_head * tmp;
992
993 tmp = B_N_PITEM_HEAD(S_new[i],0);
994 if (is_indirect_le_ih (tmp)) {
995 set_ih_free_space (tmp, 0);
996 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
997 (n_rem << (tb->tb_sb->s_blocksize_bits -
998 UNFM_P_SHIFT)));
999 } else {
1000 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) +
1001 n_rem );
1002 }
1003 }
1004
1005 tb->insert_size[0] = n_rem;
1006 if ( ! n_rem )
1007 pos_in_item++;
1008 }
1009 }
1010 else
1011 /* item falls wholly into S_new[i] */
1012 {
1013 int ret_val;
1014 struct item_head * pasted;
1015
1016 #ifdef CONFIG_REISERFS_CHECK
1017 struct item_head * ih = B_N_PITEM_HEAD(tbS0,item_pos);
1018
1019 if ( ! is_direntry_le_ih(ih) && (pos_in_item != ih_item_len(ih) ||
1020 tb->insert_size[0] <= 0) )
1021 reiserfs_panic (tb->tb_sb, "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1022 #endif /* CONFIG_REISERFS_CHECK */
1023
1024 ret_val = leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1025
1026 RFALSE( ret_val,
1027 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
1028 ret_val);
1029
1030 /* paste into item */
1031 bi.tb = tb;
1032 bi.bi_bh = S_new[i];
1033 bi.bi_parent = 0;
1034 bi.bi_position = 0;
1035 leaf_paste_in_buffer(&bi, item_pos - n + snum[i], pos_in_item, tb->insert_size[0], body, zeros_num);
1036
1037 pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
1038 if (is_direntry_le_ih (pasted))
1039 {
1040 leaf_paste_entries (
1041 bi.bi_bh, item_pos - n + snum[i], pos_in_item, 1,
1042 (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
1043 );
1044 }
1045
1046 /* if we paste to indirect item update ih_free_space */
1047 if (is_indirect_le_ih (pasted))
1048 set_ih_free_space (pasted, 0);
1049 zeros_num = tb->insert_size[0] = 0;
1050 }
1051 }
1052
1053 else /* pasted item doesn't fall into S_new[i] */
1054 {
1055 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
1056 }
1057 break;
1058 default: /* cases d and t */
1059 reiserfs_panic (tb->tb_sb, "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1060 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1061 }
1062
1063 memcpy (insert_key + i,B_N_PKEY(S_new[i],0),KEY_SIZE);
1064 insert_ptr[i] = S_new[i];
1065
1066 RFALSE( (atomic_read (&(S_new[i]->b_count)) != 1) &&
1067 (atomic_read(&(S_new[i]->b_count)) != 2 ||
1068 !(buffer_journaled(S_new[i]) ||
1069 buffer_journal_dirty(S_new[i]))),
1070 "PAP-12247: S_new[%d] : (%b)\n", i, S_new[i]);
1071
1072
1073 }
1074
1075 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1076 affected item which remains in S */
1077 if ( 0 <= item_pos && item_pos < tb->s0num )
1078 { /* if we must insert or append into buffer S[0] */
1079
1080 switch (flag)
1081 {
1082 case M_INSERT: /* insert item into S[0] */
1083 bi.tb = tb;
1084 bi.bi_bh = tbS0;
1085 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1086 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1087 leaf_insert_into_buf (&bi, item_pos, ih, body, zeros_num);
1088
1089 /* If we insert the first key change the delimiting key */
1090 if( item_pos == 0 ) {
1091 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1092 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1093
1094 }
1095 break;
1096
1097 case M_PASTE: { /* append item in S[0] */
1098 struct item_head * pasted;
1099
1100 pasted = B_N_PITEM_HEAD (tbS0, item_pos);
1101 /* when directory, may be new entry already pasted */
1102 if (is_direntry_le_ih (pasted)) {
1103 if ( pos_in_item >= 0 &&
1104 pos_in_item <= ih_entry_count(pasted) ) {
1105
1106 RFALSE( ! tb->insert_size[0],
1107 "PAP-12260: insert_size is 0 already");
1108
1109 /* prepare space */
1110 bi.tb = tb;
1111 bi.bi_bh = tbS0;
1112 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1113 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1114 leaf_paste_in_buffer(&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
1115
1116 /* paste entry */
1117 leaf_paste_entries (
1118 bi.bi_bh, item_pos, pos_in_item, 1, (struct reiserfs_de_head *)body,
1119 body + DEH_SIZE, tb->insert_size[0]
1120 );
1121 if ( ! item_pos && ! pos_in_item ) {
1122 RFALSE( !tb->CFL[0] || !tb->L[0],
1123 "PAP-12270: CFL[0]/L[0] must be specified");
1124 if (tb->CFL[0]) {
1125 replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
1126
1127 }
1128 }
1129 tb->insert_size[0] = 0;
1130 }
1131 } else { /* regular object */
1132 if ( pos_in_item == ih_item_len(pasted) ) {
1133
1134 RFALSE( tb->insert_size[0] <= 0,
1135 "PAP-12275: insert size must not be %d",
1136 tb->insert_size[0]);
1137 bi.tb = tb;
1138 bi.bi_bh = tbS0;
1139 bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
1140 bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
1141 leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
1142
1143 if (is_indirect_le_ih (pasted)) {
1144 #if 0
1145 RFALSE( tb->insert_size[0] != UNFM_P_SIZE,
1146 "PAP-12280: insert_size for indirect item must be %d, not %d",
1147 UNFM_P_SIZE, tb->insert_size[0]);
1148 #endif
1149 set_ih_free_space (pasted, 0);
1150 }
1151 tb->insert_size[0] = 0;
1152 }
1153
1154 #ifdef CONFIG_REISERFS_CHECK
1155 else {
1156 if ( tb->insert_size[0] ) {
1157 print_cur_tb ("12285");
1158 reiserfs_panic (tb->tb_sb, "PAP-12285: balance_leaf: insert_size must be 0 (%d)", tb->insert_size[0]);
1159 }
1160 }
1161 #endif /* CONFIG_REISERFS_CHECK */
1162
1163 }
1164 } /* case M_PASTE: */
1165 }
1166 }
1167
1168 #ifdef CONFIG_REISERFS_CHECK
1169 if ( flag == M_PASTE && tb->insert_size[0] ) {
1170 print_cur_tb ("12290");
1171 reiserfs_panic (tb->tb_sb, "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", tb->insert_size[0]);
1172 }
1173 #endif /* CONFIG_REISERFS_CHECK */
1174
1175 return 0;
1176 } /* Leaf level of the tree is balanced (end of balance_leaf) */
1177
1178
1179
1180 /* Make empty node */
make_empty_node(struct buffer_info * bi)1181 void make_empty_node (struct buffer_info * bi)
1182 {
1183 struct block_head * blkh;
1184
1185 RFALSE( bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1186
1187 blkh = B_BLK_HEAD(bi->bi_bh);
1188 set_blkh_nr_item( blkh, 0 );
1189 set_blkh_free_space( blkh, MAX_CHILD_SIZE(bi->bi_bh) );
1190
1191 if (bi->bi_parent)
1192 B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1193 }
1194
1195
1196 /* Get first empty buffer */
get_FEB(struct tree_balance * tb)1197 struct buffer_head * get_FEB (struct tree_balance * tb)
1198 {
1199 int i;
1200 struct buffer_head * first_b;
1201 struct buffer_info bi;
1202
1203 for (i = 0; i < MAX_FEB_SIZE; i ++)
1204 if (tb->FEB[i] != 0)
1205 break;
1206
1207 if (i == MAX_FEB_SIZE)
1208 reiserfs_panic(tb->tb_sb, "vs-12300: get_FEB: FEB list is empty");
1209
1210 bi.tb = tb;
1211 bi.bi_bh = first_b = tb->FEB[i];
1212 bi.bi_parent = 0;
1213 bi.bi_position = 0;
1214 make_empty_node (&bi);
1215 set_bit(BH_Uptodate, &first_b->b_state);
1216 tb->FEB[i] = 0;
1217 tb->used[i] = first_b;
1218
1219 return(first_b);
1220 }
1221
1222
1223 /* This is now used because reiserfs_free_block has to be able to
1224 ** schedule.
1225 */
store_thrown(struct tree_balance * tb,struct buffer_head * bh)1226 static void store_thrown (struct tree_balance * tb, struct buffer_head * bh)
1227 {
1228 int i;
1229
1230 if (buffer_dirty (bh))
1231 reiserfs_warning (tb->tb_sb, "store_thrown deals with dirty buffer\n");
1232 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i ++)
1233 if (!tb->thrown[i]) {
1234 tb->thrown[i] = bh;
1235 get_bh(bh) ; /* free_thrown puts this */
1236 return;
1237 }
1238 reiserfs_warning (tb->tb_sb, "store_thrown: too many thrown buffers\n");
1239 }
1240
free_thrown(struct tree_balance * tb)1241 static void free_thrown(struct tree_balance *tb) {
1242 int i ;
1243 unsigned long blocknr ;
1244 for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i++) {
1245 if (tb->thrown[i]) {
1246 blocknr = tb->thrown[i]->b_blocknr ;
1247 if (buffer_dirty (tb->thrown[i]))
1248 reiserfs_warning (tb->tb_sb, "free_thrown deals with dirty buffer %ld\n", blocknr);
1249 brelse(tb->thrown[i]) ; /* incremented in store_thrown */
1250 reiserfs_free_block (tb->transaction_handle, blocknr);
1251 }
1252 }
1253 }
1254
reiserfs_invalidate_buffer(struct tree_balance * tb,struct buffer_head * bh)1255 void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh)
1256 {
1257 struct block_head *blkh;
1258 blkh = B_BLK_HEAD(bh);
1259 set_blkh_level( blkh, FREE_LEVEL );
1260 set_blkh_nr_item( blkh, 0 );
1261
1262 mark_buffer_clean (bh);
1263 /* reiserfs_free_block is no longer schedule safe
1264 reiserfs_free_block (tb->transaction_handle, tb->tb_sb, bh->b_blocknr);
1265 */
1266
1267 store_thrown (tb, bh);
1268 }
1269
1270 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
replace_key(struct tree_balance * tb,struct buffer_head * dest,int n_dest,struct buffer_head * src,int n_src)1271 void replace_key (struct tree_balance * tb, struct buffer_head * dest, int n_dest,
1272 struct buffer_head * src, int n_src)
1273 {
1274
1275 RFALSE( dest == NULL || src == NULL,
1276 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1277 src, dest);
1278 RFALSE( ! B_IS_KEYS_LEVEL (dest),
1279 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1280 dest);
1281 RFALSE( n_dest < 0 || n_src < 0,
1282 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1283 RFALSE( n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1284 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1285 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1286
1287 if (B_IS_ITEMS_LEVEL (src))
1288 /* source buffer contains leaf node */
1289 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PITEM_HEAD(src,n_src), KEY_SIZE);
1290 else
1291 memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PDELIM_KEY(src,n_src), KEY_SIZE);
1292
1293 do_balance_mark_internal_dirty (tb, dest, 0);
1294 }
1295
1296
get_left_neighbor_position(struct tree_balance * tb,int h)1297 int get_left_neighbor_position (
1298 struct tree_balance * tb,
1299 int h
1300 )
1301 {
1302 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
1303
1304 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FL[h] == 0,
1305 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1306 h, tb->FL[h], h, PATH_H_PPARENT (tb->tb_path, h));
1307
1308 if (Sh_position == 0)
1309 return B_NR_ITEMS (tb->FL[h]);
1310 else
1311 return Sh_position - 1;
1312 }
1313
1314
get_right_neighbor_position(struct tree_balance * tb,int h)1315 int get_right_neighbor_position (struct tree_balance * tb, int h)
1316 {
1317 int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
1318
1319 RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FR[h] == 0,
1320 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1321 h, PATH_H_PPARENT (tb->tb_path, h), h, tb->FR[h]);
1322
1323 if (Sh_position == B_NR_ITEMS (PATH_H_PPARENT (tb->tb_path, h)))
1324 return 0;
1325 else
1326 return Sh_position + 1;
1327 }
1328
1329
1330 #ifdef CONFIG_REISERFS_CHECK
1331
1332 int is_reusable (struct super_block * s, unsigned long block, int bit_value);
check_internal_node(struct super_block * s,struct buffer_head * bh,char * mes)1333 static void check_internal_node (struct super_block * s, struct buffer_head * bh, char * mes)
1334 {
1335 struct disk_child * dc;
1336 int i;
1337
1338 RFALSE( !bh, "PAP-12336: bh == 0");
1339
1340 if (!bh || !B_IS_IN_TREE (bh))
1341 return;
1342
1343 RFALSE( !buffer_dirty (bh) &&
1344 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1345 "PAP-12337: buffer (%b) must be dirty", bh);
1346 dc = B_N_CHILD (bh, 0);
1347
1348 for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) {
1349 if (!is_reusable (s, dc_block_number(dc), 1) ) {
1350 print_cur_tb (mes);
1351 reiserfs_panic (s, "PAP-12338: check_internal_node: invalid child pointer %y in %b", dc, bh);
1352 }
1353 }
1354 }
1355
1356
locked_or_not_in_tree(struct buffer_head * bh,char * which)1357 static int locked_or_not_in_tree (struct buffer_head * bh, char * which)
1358 {
1359 if ( buffer_locked (bh) || !B_IS_IN_TREE (bh) ) {
1360 reiserfs_warning (NULL, "vs-12339: locked_or_not_in_tree: %s (%b)\n", which, bh);
1361 return 1;
1362 }
1363 return 0;
1364 }
1365
1366
check_before_balancing(struct tree_balance * tb)1367 static int check_before_balancing (struct tree_balance * tb)
1368 {
1369 int retval = 0;
1370
1371 if ( cur_tb ) {
1372 reiserfs_panic (tb->tb_sb, "vs-12335: check_before_balancing: "
1373 "suspect that schedule occurred based on cur_tb not being null at this point in code. "
1374 "do_balance cannot properly handle schedule occuring while it runs.");
1375 }
1376
1377 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1378 prepped all of these for us). */
1379 if ( tb->lnum[0] ) {
1380 retval |= locked_or_not_in_tree (tb->L[0], "L[0]");
1381 retval |= locked_or_not_in_tree (tb->FL[0], "FL[0]");
1382 retval |= locked_or_not_in_tree (tb->CFL[0], "CFL[0]");
1383 check_leaf (tb->L[0]);
1384 }
1385 if ( tb->rnum[0] ) {
1386 retval |= locked_or_not_in_tree (tb->R[0], "R[0]");
1387 retval |= locked_or_not_in_tree (tb->FR[0], "FR[0]");
1388 retval |= locked_or_not_in_tree (tb->CFR[0], "CFR[0]");
1389 check_leaf (tb->R[0]);
1390 }
1391 retval |= locked_or_not_in_tree (PATH_PLAST_BUFFER (tb->tb_path), "S[0]");
1392 check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
1393
1394 return retval;
1395 }
1396
1397
check_after_balance_leaf(struct tree_balance * tb)1398 void check_after_balance_leaf (struct tree_balance * tb)
1399 {
1400 if (tb->lnum[0]) {
1401 if (B_FREE_SPACE (tb->L[0]) !=
1402 MAX_CHILD_SIZE (tb->L[0]) - dc_size(B_N_CHILD (tb->FL[0], get_left_neighbor_position (tb, 0)))) {
1403 print_cur_tb ("12221");
1404 reiserfs_panic (tb->tb_sb, "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
1405 }
1406 }
1407 if (tb->rnum[0]) {
1408 if (B_FREE_SPACE (tb->R[0]) !=
1409 MAX_CHILD_SIZE (tb->R[0]) - dc_size(B_N_CHILD (tb->FR[0], get_right_neighbor_position (tb, 0)))) {
1410 print_cur_tb ("12222");
1411 reiserfs_panic (tb->tb_sb, "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
1412 }
1413 }
1414 if (PATH_H_PBUFFER(tb->tb_path,1) &&
1415 (B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) !=
1416 (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) -
1417 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
1418 PATH_H_POSITION (tb->tb_path, 1)))) )) {
1419 int left = B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0));
1420 int right = (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) -
1421 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
1422 PATH_H_POSITION (tb->tb_path, 1))));
1423 print_cur_tb ("12223");
1424 reiserfs_warning( tb->tb_sb,
1425 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1426 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d\n",
1427 left,
1428 MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)),
1429 PATH_H_PBUFFER(tb->tb_path,1),
1430 PATH_H_POSITION (tb->tb_path, 1),
1431 dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), PATH_H_POSITION (tb->tb_path, 1 )) ),
1432 right );
1433 reiserfs_panic (tb->tb_sb, "PAP-12365: check_after_balance_leaf: S is incorrect");
1434 }
1435 }
1436
1437
check_leaf_level(struct tree_balance * tb)1438 void check_leaf_level (struct tree_balance * tb)
1439 {
1440 check_leaf (tb->L[0]);
1441 check_leaf (tb->R[0]);
1442 check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
1443 }
1444
check_internal_levels(struct tree_balance * tb)1445 void check_internal_levels (struct tree_balance * tb)
1446 {
1447 int h;
1448
1449 /* check all internal nodes */
1450 for (h = 1; tb->insert_size[h]; h ++) {
1451 check_internal_node (tb->tb_sb, PATH_H_PBUFFER (tb->tb_path, h), "BAD BUFFER ON PATH");
1452 if (tb->lnum[h])
1453 check_internal_node (tb->tb_sb, tb->L[h], "BAD L");
1454 if (tb->rnum[h])
1455 check_internal_node (tb->tb_sb, tb->R[h], "BAD R");
1456 }
1457
1458 }
1459
1460 #endif
1461
1462
1463
1464
1465
1466
1467 /* Now we have all of the buffers that must be used in balancing of
1468 the tree. We rely on the assumption that schedule() will not occur
1469 while do_balance works. ( Only interrupt handlers are acceptable.)
1470 We balance the tree according to the analysis made before this,
1471 using buffers already obtained. For SMP support it will someday be
1472 necessary to add ordered locking of tb. */
1473
1474 /* Some interesting rules of balancing:
1475
1476 we delete a maximum of two nodes per level per balancing: we never
1477 delete R, when we delete two of three nodes L, S, R then we move
1478 them into R.
1479
1480 we only delete L if we are deleting two nodes, if we delete only
1481 one node we delete S
1482
1483 if we shift leaves then we shift as much as we can: this is a
1484 deliberate policy of extremism in node packing which results in
1485 higher average utilization after repeated random balance operations
1486 at the cost of more memory copies and more balancing as a result of
1487 small insertions to full nodes.
1488
1489 if we shift internal nodes we try to evenly balance the node
1490 utilization, with consequent less balancing at the cost of lower
1491 utilization.
1492
1493 one could argue that the policy for directories in leaves should be
1494 that of internal nodes, but we will wait until another day to
1495 evaluate this.... It would be nice to someday measure and prove
1496 these assumptions as to what is optimal....
1497
1498 */
1499
do_balance_starts(struct tree_balance * tb)1500 static inline void do_balance_starts (struct tree_balance *tb)
1501 {
1502 /* use print_cur_tb() to see initial state of struct
1503 tree_balance */
1504
1505 /* store_print_tb (tb); */
1506
1507 /* do not delete, just comment it out */
1508 /* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1509 "check");*/
1510 RFALSE( check_before_balancing (tb), "PAP-12340: locked buffers in TB");
1511 #ifdef CONFIG_REISERFS_CHECK
1512 cur_tb = tb;
1513 #endif
1514 }
1515
1516
do_balance_completed(struct tree_balance * tb)1517 static inline void do_balance_completed (struct tree_balance * tb)
1518 {
1519
1520 #ifdef CONFIG_REISERFS_CHECK
1521 check_leaf_level (tb);
1522 check_internal_levels (tb);
1523 cur_tb = NULL;
1524 #endif
1525
1526 /* reiserfs_free_block is no longer schedule safe. So, we need to
1527 ** put the buffers we want freed on the thrown list during do_balance,
1528 ** and then free them now
1529 */
1530
1531 tb->tb_sb->u.reiserfs_sb.s_do_balance ++;
1532
1533
1534 /* release all nodes hold to perform the balancing */
1535 unfix_nodes(tb);
1536
1537 free_thrown(tb) ;
1538 }
1539
1540
1541
1542
1543
do_balance(struct tree_balance * tb,struct item_head * ih,const char * body,int flag)1544 void do_balance (struct tree_balance * tb, /* tree_balance structure */
1545 struct item_head * ih, /* item header of inserted item */
1546 const char * body, /* body of inserted item or bytes to paste */
1547 int flag) /* i - insert, d - delete
1548 c - cut, p - paste
1549
1550 Cut means delete part of an item
1551 (includes removing an entry from a
1552 directory).
1553
1554 Delete means delete whole item.
1555
1556 Insert means add a new item into the
1557 tree.
1558
1559 Paste means to append to the end of an
1560 existing file or to insert a directory
1561 entry. */
1562 {
1563 int child_pos, /* position of a child node in its parent */
1564 h; /* level of the tree being processed */
1565 struct item_head insert_key[2]; /* in our processing of one level
1566 we sometimes determine what
1567 must be inserted into the next
1568 higher level. This insertion
1569 consists of a key or two keys
1570 and their corresponding
1571 pointers */
1572 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
1573 level */
1574
1575 tb->tb_mode = flag;
1576 tb->need_balance_dirty = 0;
1577
1578 if (FILESYSTEM_CHANGED_TB(tb)) {
1579 reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ;
1580 }
1581 /* if we have no real work to do */
1582 if ( ! tb->insert_size[0] ) {
1583 reiserfs_warning (tb->tb_sb, "PAP-12350: do_balance: insert_size == 0, mode == %c\n ",
1584 flag);
1585 unfix_nodes(tb);
1586 return;
1587 }
1588
1589 atomic_inc (&(fs_generation (tb->tb_sb)));
1590 do_balance_starts (tb);
1591
1592 /* balance leaf returns 0 except if combining L R and S into
1593 one node. see balance_internal() for explanation of this
1594 line of code.*/
1595 child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0) +
1596 balance_leaf (tb, ih, body, flag, insert_key, insert_ptr);
1597
1598 #ifdef CONFIG_REISERFS_CHECK
1599 check_after_balance_leaf (tb);
1600 #endif
1601
1602 /* Balance internal level of the tree. */
1603 for ( h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++ )
1604 child_pos = balance_internal (tb, h, child_pos, insert_key, insert_ptr);
1605
1606
1607 do_balance_completed (tb);
1608
1609 }
1610