1 /*
2 * Copyright (c) 2000-2002 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write the Free Software Foundation, Inc., 59
21 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31 */
32
33 /*
34 * This file contains common code for the space manager's btree implementations.
35 */
36
37 #include "xfs.h"
38
39 #include "xfs_macros.h"
40 #include "xfs_types.h"
41 #include "xfs_inum.h"
42 #include "xfs_log.h"
43 #include "xfs_trans.h"
44 #include "xfs_sb.h"
45 #include "xfs_ag.h"
46 #include "xfs_dir.h"
47 #include "xfs_dir2.h"
48 #include "xfs_dmapi.h"
49 #include "xfs_mount.h"
50 #include "xfs_alloc_btree.h"
51 #include "xfs_bmap_btree.h"
52 #include "xfs_ialloc_btree.h"
53 #include "xfs_btree.h"
54 #include "xfs_ialloc.h"
55 #include "xfs_attr_sf.h"
56 #include "xfs_dir_sf.h"
57 #include "xfs_dir2_sf.h"
58 #include "xfs_dinode.h"
59 #include "xfs_inode.h"
60 #include "xfs_bit.h"
61 #include "xfs_error.h"
62
63 /*
64 * Cursor allocation zone.
65 */
66 kmem_zone_t *xfs_btree_cur_zone;
67
68 /*
69 * Btree magic numbers.
70 */
71 const __uint32_t xfs_magics[XFS_BTNUM_MAX] =
72 {
73 XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
74 };
75
76 /*
77 * Prototypes for internal routines.
78 */
79
80 /*
81 * Checking routine: return maxrecs for the block.
82 */
83 STATIC int /* number of records fitting in block */
84 xfs_btree_maxrecs(
85 xfs_btree_cur_t *cur, /* btree cursor */
86 xfs_btree_block_t *block);/* generic btree block pointer */
87
88 /*
89 * Internal routines.
90 */
91
92 /*
93 * Checking routine: return maxrecs for the block.
94 */
95 STATIC int /* number of records fitting in block */
xfs_btree_maxrecs(xfs_btree_cur_t * cur,xfs_btree_block_t * block)96 xfs_btree_maxrecs(
97 xfs_btree_cur_t *cur, /* btree cursor */
98 xfs_btree_block_t *block) /* generic btree block pointer */
99 {
100 switch (cur->bc_btnum) {
101 case XFS_BTNUM_BNO:
102 case XFS_BTNUM_CNT:
103 return (int)XFS_ALLOC_BLOCK_MAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
104 case XFS_BTNUM_BMAP:
105 return (int)XFS_BMAP_BLOCK_IMAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
106 case XFS_BTNUM_INO:
107 return (int)XFS_INOBT_BLOCK_MAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
108 default:
109 ASSERT(0);
110 return 0;
111 }
112 }
113
114 /*
115 * External routines.
116 */
117
118 #ifdef DEBUG
119 /*
120 * Debug routine: check that block header is ok.
121 */
122 void
xfs_btree_check_block(xfs_btree_cur_t * cur,xfs_btree_block_t * block,int level,xfs_buf_t * bp)123 xfs_btree_check_block(
124 xfs_btree_cur_t *cur, /* btree cursor */
125 xfs_btree_block_t *block, /* generic btree block pointer */
126 int level, /* level of the btree block */
127 xfs_buf_t *bp) /* buffer containing block, if any */
128 {
129 if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
130 xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level,
131 bp);
132 else
133 xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level,
134 bp);
135 }
136
137 /*
138 * Debug routine: check that keys are in the right order.
139 */
140 void
xfs_btree_check_key(xfs_btnum_t btnum,void * ak1,void * ak2)141 xfs_btree_check_key(
142 xfs_btnum_t btnum, /* btree identifier */
143 void *ak1, /* pointer to left (lower) key */
144 void *ak2) /* pointer to right (higher) key */
145 {
146 switch (btnum) {
147 case XFS_BTNUM_BNO: {
148 xfs_alloc_key_t *k1;
149 xfs_alloc_key_t *k2;
150
151 k1 = ak1;
152 k2 = ak2;
153 ASSERT(INT_GET(k1->ar_startblock, ARCH_CONVERT) < INT_GET(k2->ar_startblock, ARCH_CONVERT));
154 break;
155 }
156 case XFS_BTNUM_CNT: {
157 xfs_alloc_key_t *k1;
158 xfs_alloc_key_t *k2;
159
160 k1 = ak1;
161 k2 = ak2;
162 ASSERT(INT_GET(k1->ar_blockcount, ARCH_CONVERT) < INT_GET(k2->ar_blockcount, ARCH_CONVERT) ||
163 (INT_GET(k1->ar_blockcount, ARCH_CONVERT) == INT_GET(k2->ar_blockcount, ARCH_CONVERT) &&
164 INT_GET(k1->ar_startblock, ARCH_CONVERT) < INT_GET(k2->ar_startblock, ARCH_CONVERT)));
165 break;
166 }
167 case XFS_BTNUM_BMAP: {
168 xfs_bmbt_key_t *k1;
169 xfs_bmbt_key_t *k2;
170
171 k1 = ak1;
172 k2 = ak2;
173 ASSERT(INT_GET(k1->br_startoff, ARCH_CONVERT) < INT_GET(k2->br_startoff, ARCH_CONVERT));
174 break;
175 }
176 case XFS_BTNUM_INO: {
177 xfs_inobt_key_t *k1;
178 xfs_inobt_key_t *k2;
179
180 k1 = ak1;
181 k2 = ak2;
182 ASSERT(INT_GET(k1->ir_startino, ARCH_CONVERT) < INT_GET(k2->ir_startino, ARCH_CONVERT));
183 break;
184 }
185 default:
186 ASSERT(0);
187 }
188 }
189 #endif /* DEBUG */
190
191 /*
192 * Checking routine: check that long form block header is ok.
193 */
194 /* ARGSUSED */
195 int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lblock(xfs_btree_cur_t * cur,xfs_btree_lblock_t * block,int level,xfs_buf_t * bp)196 xfs_btree_check_lblock(
197 xfs_btree_cur_t *cur, /* btree cursor */
198 xfs_btree_lblock_t *block, /* btree long form block pointer */
199 int level, /* level of the btree block */
200 xfs_buf_t *bp) /* buffer for block, if any */
201 {
202 int lblock_ok; /* block passes checks */
203 xfs_mount_t *mp; /* file system mount point */
204
205 mp = cur->bc_mp;
206 lblock_ok =
207 INT_GET(block->bb_magic, ARCH_CONVERT) == xfs_magics[cur->bc_btnum] &&
208 INT_GET(block->bb_level, ARCH_CONVERT) == level &&
209 INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
210 xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
211 !INT_ISZERO(block->bb_leftsib, ARCH_CONVERT) &&
212 (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO ||
213 XFS_FSB_SANITY_CHECK(mp, INT_GET(block->bb_leftsib, ARCH_CONVERT))) &&
214 !INT_ISZERO(block->bb_rightsib, ARCH_CONVERT) &&
215 (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO ||
216 XFS_FSB_SANITY_CHECK(mp, INT_GET(block->bb_rightsib, ARCH_CONVERT)));
217 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK,
218 XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
219 if (bp)
220 xfs_buftrace("LBTREE ERROR", bp);
221 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
222 mp);
223 return XFS_ERROR(EFSCORRUPTED);
224 }
225 return 0;
226 }
227
228 /*
229 * Checking routine: check that (long) pointer is ok.
230 */
231 int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_lptr(xfs_btree_cur_t * cur,xfs_dfsbno_t ptr,int level)232 xfs_btree_check_lptr(
233 xfs_btree_cur_t *cur, /* btree cursor */
234 xfs_dfsbno_t ptr, /* btree block disk address */
235 int level) /* btree block level */
236 {
237 xfs_mount_t *mp; /* file system mount point */
238
239 mp = cur->bc_mp;
240 XFS_WANT_CORRUPTED_RETURN(
241 level > 0 &&
242 ptr != NULLDFSBNO &&
243 XFS_FSB_SANITY_CHECK(mp, ptr));
244 return 0;
245 }
246
247 #ifdef DEBUG
248 /*
249 * Debug routine: check that records are in the right order.
250 */
251 void
xfs_btree_check_rec(xfs_btnum_t btnum,void * ar1,void * ar2)252 xfs_btree_check_rec(
253 xfs_btnum_t btnum, /* btree identifier */
254 void *ar1, /* pointer to left (lower) record */
255 void *ar2) /* pointer to right (higher) record */
256 {
257 switch (btnum) {
258 case XFS_BTNUM_BNO: {
259 xfs_alloc_rec_t *r1;
260 xfs_alloc_rec_t *r2;
261
262 r1 = ar1;
263 r2 = ar2;
264 ASSERT(INT_GET(r1->ar_startblock, ARCH_CONVERT) + INT_GET(r1->ar_blockcount, ARCH_CONVERT) <=
265 INT_GET(r2->ar_startblock, ARCH_CONVERT));
266 break;
267 }
268 case XFS_BTNUM_CNT: {
269 xfs_alloc_rec_t *r1;
270 xfs_alloc_rec_t *r2;
271
272 r1 = ar1;
273 r2 = ar2;
274 ASSERT(INT_GET(r1->ar_blockcount, ARCH_CONVERT) < INT_GET(r2->ar_blockcount, ARCH_CONVERT) ||
275 (INT_GET(r1->ar_blockcount, ARCH_CONVERT) == INT_GET(r2->ar_blockcount, ARCH_CONVERT) &&
276 INT_GET(r1->ar_startblock, ARCH_CONVERT) < INT_GET(r2->ar_startblock, ARCH_CONVERT)));
277 break;
278 }
279 case XFS_BTNUM_BMAP: {
280 xfs_bmbt_rec_t *r1;
281 xfs_bmbt_rec_t *r2;
282
283 r1 = ar1;
284 r2 = ar2;
285 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
286 xfs_bmbt_disk_get_blockcount(r1) <=
287 xfs_bmbt_disk_get_startoff(r2));
288 break;
289 }
290 case XFS_BTNUM_INO: {
291 xfs_inobt_rec_t *r1;
292 xfs_inobt_rec_t *r2;
293
294 r1 = ar1;
295 r2 = ar2;
296 ASSERT(INT_GET(r1->ir_startino, ARCH_CONVERT) + XFS_INODES_PER_CHUNK <=
297 INT_GET(r2->ir_startino, ARCH_CONVERT));
298 break;
299 }
300 default:
301 ASSERT(0);
302 }
303 }
304 #endif /* DEBUG */
305
306 /*
307 * Checking routine: check that block header is ok.
308 */
309 /* ARGSUSED */
310 int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_sblock(xfs_btree_cur_t * cur,xfs_btree_sblock_t * block,int level,xfs_buf_t * bp)311 xfs_btree_check_sblock(
312 xfs_btree_cur_t *cur, /* btree cursor */
313 xfs_btree_sblock_t *block, /* btree short form block pointer */
314 int level, /* level of the btree block */
315 xfs_buf_t *bp) /* buffer containing block */
316 {
317 xfs_buf_t *agbp; /* buffer for ag. freespace struct */
318 xfs_agf_t *agf; /* ag. freespace structure */
319 xfs_agblock_t agflen; /* native ag. freespace length */
320 int sblock_ok; /* block passes checks */
321
322 agbp = cur->bc_private.a.agbp;
323 agf = XFS_BUF_TO_AGF(agbp);
324 agflen = INT_GET(agf->agf_length, ARCH_CONVERT);
325 sblock_ok =
326 INT_GET(block->bb_magic, ARCH_CONVERT) == xfs_magics[cur->bc_btnum] &&
327 INT_GET(block->bb_level, ARCH_CONVERT) == level &&
328 INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
329 xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
330 (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK ||
331 INT_GET(block->bb_leftsib, ARCH_CONVERT) < agflen) &&
332 !INT_ISZERO(block->bb_leftsib, ARCH_CONVERT) &&
333 (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK ||
334 INT_GET(block->bb_rightsib, ARCH_CONVERT) < agflen) &&
335 !INT_ISZERO(block->bb_rightsib, ARCH_CONVERT);
336 if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
337 XFS_ERRTAG_BTREE_CHECK_SBLOCK,
338 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
339 if (bp)
340 xfs_buftrace("SBTREE ERROR", bp);
341 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
342 cur->bc_mp);
343 return XFS_ERROR(EFSCORRUPTED);
344 }
345 return 0;
346 }
347
348 /*
349 * Checking routine: check that (short) pointer is ok.
350 */
351 int /* error (0 or EFSCORRUPTED) */
xfs_btree_check_sptr(xfs_btree_cur_t * cur,xfs_agblock_t ptr,int level)352 xfs_btree_check_sptr(
353 xfs_btree_cur_t *cur, /* btree cursor */
354 xfs_agblock_t ptr, /* btree block disk address */
355 int level) /* btree block level */
356 {
357 xfs_buf_t *agbp; /* buffer for ag. freespace struct */
358 xfs_agf_t *agf; /* ag. freespace structure */
359
360 agbp = cur->bc_private.a.agbp;
361 agf = XFS_BUF_TO_AGF(agbp);
362 XFS_WANT_CORRUPTED_RETURN(
363 level > 0 &&
364 ptr != NULLAGBLOCK && ptr != 0 &&
365 ptr < INT_GET(agf->agf_length, ARCH_CONVERT));
366 return 0;
367 }
368
369 /*
370 * Delete the btree cursor.
371 */
372 void
xfs_btree_del_cursor(xfs_btree_cur_t * cur,int error)373 xfs_btree_del_cursor(
374 xfs_btree_cur_t *cur, /* btree cursor */
375 int error) /* del because of error */
376 {
377 int i; /* btree level */
378
379 /*
380 * Clear the buffer pointers, and release the buffers.
381 * If we're doing this in the face of an error, we
382 * need to make sure to inspect all of the entries
383 * in the bc_bufs array for buffers to be unlocked.
384 * This is because some of the btree code works from
385 * level n down to 0, and if we get an error along
386 * the way we won't have initialized all the entries
387 * down to 0.
388 */
389 for (i = 0; i < cur->bc_nlevels; i++) {
390 if (cur->bc_bufs[i])
391 xfs_btree_setbuf(cur, i, NULL);
392 else if (!error)
393 break;
394 }
395 /*
396 * Can't free a bmap cursor without having dealt with the
397 * allocated indirect blocks' accounting.
398 */
399 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
400 cur->bc_private.b.allocated == 0);
401 /*
402 * Free the cursor.
403 */
404 kmem_zone_free(xfs_btree_cur_zone, cur);
405 }
406
407 /*
408 * Duplicate the btree cursor.
409 * Allocate a new one, copy the record, re-get the buffers.
410 */
411 int /* error */
xfs_btree_dup_cursor(xfs_btree_cur_t * cur,xfs_btree_cur_t ** ncur)412 xfs_btree_dup_cursor(
413 xfs_btree_cur_t *cur, /* input cursor */
414 xfs_btree_cur_t **ncur) /* output cursor */
415 {
416 xfs_buf_t *bp; /* btree block's buffer pointer */
417 int error; /* error return value */
418 int i; /* level number of btree block */
419 xfs_mount_t *mp; /* mount structure for filesystem */
420 xfs_btree_cur_t *new; /* new cursor value */
421 xfs_trans_t *tp; /* transaction pointer, can be NULL */
422
423 tp = cur->bc_tp;
424 mp = cur->bc_mp;
425 /*
426 * Allocate a new cursor like the old one.
427 */
428 new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp,
429 cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip,
430 cur->bc_private.b.whichfork);
431 /*
432 * Copy the record currently in the cursor.
433 */
434 new->bc_rec = cur->bc_rec;
435 /*
436 * For each level current, re-get the buffer and copy the ptr value.
437 */
438 for (i = 0; i < new->bc_nlevels; i++) {
439 new->bc_ptrs[i] = cur->bc_ptrs[i];
440 new->bc_ra[i] = cur->bc_ra[i];
441 if ((bp = cur->bc_bufs[i])) {
442 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
443 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
444 xfs_btree_del_cursor(new, error);
445 *ncur = NULL;
446 return error;
447 }
448 new->bc_bufs[i] = bp;
449 ASSERT(bp);
450 ASSERT(!XFS_BUF_GETERROR(bp));
451 } else
452 new->bc_bufs[i] = NULL;
453 }
454 /*
455 * For bmap btrees, copy the firstblock, flist, and flags values,
456 * since init cursor doesn't get them.
457 */
458 if (new->bc_btnum == XFS_BTNUM_BMAP) {
459 new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
460 new->bc_private.b.flist = cur->bc_private.b.flist;
461 new->bc_private.b.flags = cur->bc_private.b.flags;
462 }
463 *ncur = new;
464 return 0;
465 }
466
467 /*
468 * Change the cursor to point to the first record at the given level.
469 * Other levels are unaffected.
470 */
471 int /* success=1, failure=0 */
xfs_btree_firstrec(xfs_btree_cur_t * cur,int level)472 xfs_btree_firstrec(
473 xfs_btree_cur_t *cur, /* btree cursor */
474 int level) /* level to change */
475 {
476 xfs_btree_block_t *block; /* generic btree block pointer */
477 xfs_buf_t *bp; /* buffer containing block */
478
479 /*
480 * Get the block pointer for this level.
481 */
482 block = xfs_btree_get_block(cur, level, &bp);
483 xfs_btree_check_block(cur, block, level, bp);
484 /*
485 * It's empty, there is no such record.
486 */
487 if (INT_ISZERO(block->bb_h.bb_numrecs, ARCH_CONVERT))
488 return 0;
489 /*
490 * Set the ptr value to 1, that's the first record/key.
491 */
492 cur->bc_ptrs[level] = 1;
493 return 1;
494 }
495
496 /*
497 * Retrieve the block pointer from the cursor at the given level.
498 * This may be a bmap btree root or from a buffer.
499 */
500 xfs_btree_block_t * /* generic btree block pointer */
xfs_btree_get_block(xfs_btree_cur_t * cur,int level,xfs_buf_t ** bpp)501 xfs_btree_get_block(
502 xfs_btree_cur_t *cur, /* btree cursor */
503 int level, /* level in btree */
504 xfs_buf_t **bpp) /* buffer containing the block */
505 {
506 xfs_btree_block_t *block; /* return value */
507 xfs_buf_t *bp; /* return buffer */
508 xfs_ifork_t *ifp; /* inode fork pointer */
509 int whichfork; /* data or attr fork */
510
511 if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) {
512 whichfork = cur->bc_private.b.whichfork;
513 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork);
514 block = (xfs_btree_block_t *)ifp->if_broot;
515 bp = NULL;
516 } else {
517 bp = cur->bc_bufs[level];
518 block = XFS_BUF_TO_BLOCK(bp);
519 }
520 ASSERT(block != NULL);
521 *bpp = bp;
522 return block;
523 }
524
525 /*
526 * Get a buffer for the block, return it with no data read.
527 * Long-form addressing.
528 */
529 xfs_buf_t * /* buffer for fsbno */
xfs_btree_get_bufl(xfs_mount_t * mp,xfs_trans_t * tp,xfs_fsblock_t fsbno,uint lock)530 xfs_btree_get_bufl(
531 xfs_mount_t *mp, /* file system mount point */
532 xfs_trans_t *tp, /* transaction pointer */
533 xfs_fsblock_t fsbno, /* file system block number */
534 uint lock) /* lock flags for get_buf */
535 {
536 xfs_buf_t *bp; /* buffer pointer (return value) */
537 xfs_daddr_t d; /* real disk block address */
538
539 ASSERT(fsbno != NULLFSBLOCK);
540 d = XFS_FSB_TO_DADDR(mp, fsbno);
541 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
542 ASSERT(bp);
543 ASSERT(!XFS_BUF_GETERROR(bp));
544 return bp;
545 }
546
547 /*
548 * Get a buffer for the block, return it with no data read.
549 * Short-form addressing.
550 */
551 xfs_buf_t * /* buffer for agno/agbno */
xfs_btree_get_bufs(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,uint lock)552 xfs_btree_get_bufs(
553 xfs_mount_t *mp, /* file system mount point */
554 xfs_trans_t *tp, /* transaction pointer */
555 xfs_agnumber_t agno, /* allocation group number */
556 xfs_agblock_t agbno, /* allocation group block number */
557 uint lock) /* lock flags for get_buf */
558 {
559 xfs_buf_t *bp; /* buffer pointer (return value) */
560 xfs_daddr_t d; /* real disk block address */
561
562 ASSERT(agno != NULLAGNUMBER);
563 ASSERT(agbno != NULLAGBLOCK);
564 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
565 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
566 ASSERT(bp);
567 ASSERT(!XFS_BUF_GETERROR(bp));
568 return bp;
569 }
570
571 /*
572 * Allocate a new btree cursor.
573 * The cursor is either for allocation (A) or bmap (B) or inodes (I).
574 */
575 xfs_btree_cur_t * /* new btree cursor */
xfs_btree_init_cursor(xfs_mount_t * mp,xfs_trans_t * tp,xfs_buf_t * agbp,xfs_agnumber_t agno,xfs_btnum_t btnum,xfs_inode_t * ip,int whichfork)576 xfs_btree_init_cursor(
577 xfs_mount_t *mp, /* file system mount point */
578 xfs_trans_t *tp, /* transaction pointer */
579 xfs_buf_t *agbp, /* (A only) buffer for agf structure */
580 /* (I only) buffer for agi structure */
581 xfs_agnumber_t agno, /* (AI only) allocation group number */
582 xfs_btnum_t btnum, /* btree identifier */
583 xfs_inode_t *ip, /* (B only) inode owning the btree */
584 int whichfork) /* (B only) data or attr fork */
585 {
586 xfs_agf_t *agf; /* (A) allocation group freespace */
587 xfs_agi_t *agi; /* (I) allocation group inodespace */
588 xfs_btree_cur_t *cur; /* return value */
589 xfs_ifork_t *ifp; /* (I) inode fork pointer */
590 int nlevels=0; /* number of levels in the btree */
591
592 ASSERT(xfs_btree_cur_zone != NULL);
593 /*
594 * Allocate a new cursor.
595 */
596 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
597 /*
598 * Deduce the number of btree levels from the arguments.
599 */
600 switch (btnum) {
601 case XFS_BTNUM_BNO:
602 case XFS_BTNUM_CNT:
603 agf = XFS_BUF_TO_AGF(agbp);
604 nlevels = INT_GET(agf->agf_levels[btnum], ARCH_CONVERT);
605 break;
606 case XFS_BTNUM_BMAP:
607 ifp = XFS_IFORK_PTR(ip, whichfork);
608 nlevels = INT_GET(ifp->if_broot->bb_level, ARCH_CONVERT) + 1;
609 break;
610 case XFS_BTNUM_INO:
611 agi = XFS_BUF_TO_AGI(agbp);
612 nlevels = INT_GET(agi->agi_level, ARCH_CONVERT);
613 break;
614 default:
615 ASSERT(0);
616 }
617 /*
618 * Fill in the common fields.
619 */
620 cur->bc_tp = tp;
621 cur->bc_mp = mp;
622 cur->bc_nlevels = nlevels;
623 cur->bc_btnum = btnum;
624 cur->bc_blocklog = mp->m_sb.sb_blocklog;
625 /*
626 * Fill in private fields.
627 */
628 switch (btnum) {
629 case XFS_BTNUM_BNO:
630 case XFS_BTNUM_CNT:
631 /*
632 * Allocation btree fields.
633 */
634 cur->bc_private.a.agbp = agbp;
635 cur->bc_private.a.agno = agno;
636 break;
637 case XFS_BTNUM_BMAP:
638 /*
639 * Bmap btree fields.
640 */
641 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
642 cur->bc_private.b.ip = ip;
643 cur->bc_private.b.firstblock = NULLFSBLOCK;
644 cur->bc_private.b.flist = NULL;
645 cur->bc_private.b.allocated = 0;
646 cur->bc_private.b.flags = 0;
647 cur->bc_private.b.whichfork = whichfork;
648 break;
649 case XFS_BTNUM_INO:
650 /*
651 * Inode allocation btree fields.
652 */
653 cur->bc_private.i.agbp = agbp;
654 cur->bc_private.i.agno = agno;
655 break;
656 default:
657 ASSERT(0);
658 }
659 return cur;
660 }
661
662 /*
663 * Check for the cursor referring to the last block at the given level.
664 */
665 int /* 1=is last block, 0=not last block */
xfs_btree_islastblock(xfs_btree_cur_t * cur,int level)666 xfs_btree_islastblock(
667 xfs_btree_cur_t *cur, /* btree cursor */
668 int level) /* level to check */
669 {
670 xfs_btree_block_t *block; /* generic btree block pointer */
671 xfs_buf_t *bp; /* buffer containing block */
672
673 block = xfs_btree_get_block(cur, level, &bp);
674 xfs_btree_check_block(cur, block, level, bp);
675 if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
676 return INT_GET(block->bb_u.l.bb_rightsib, ARCH_CONVERT) == NULLDFSBNO;
677 else
678 return INT_GET(block->bb_u.s.bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK;
679 }
680
681 /*
682 * Change the cursor to point to the last record in the current block
683 * at the given level. Other levels are unaffected.
684 */
685 int /* success=1, failure=0 */
xfs_btree_lastrec(xfs_btree_cur_t * cur,int level)686 xfs_btree_lastrec(
687 xfs_btree_cur_t *cur, /* btree cursor */
688 int level) /* level to change */
689 {
690 xfs_btree_block_t *block; /* generic btree block pointer */
691 xfs_buf_t *bp; /* buffer containing block */
692
693 /*
694 * Get the block pointer for this level.
695 */
696 block = xfs_btree_get_block(cur, level, &bp);
697 xfs_btree_check_block(cur, block, level, bp);
698 /*
699 * It's empty, there is no such record.
700 */
701 if (INT_ISZERO(block->bb_h.bb_numrecs, ARCH_CONVERT))
702 return 0;
703 /*
704 * Set the ptr value to numrecs, that's the last record/key.
705 */
706 cur->bc_ptrs[level] = INT_GET(block->bb_h.bb_numrecs, ARCH_CONVERT);
707 return 1;
708 }
709
710 /*
711 * Compute first and last byte offsets for the fields given.
712 * Interprets the offsets table, which contains struct field offsets.
713 */
714 void
xfs_btree_offsets(__int64_t fields,const short * offsets,int nbits,int * first,int * last)715 xfs_btree_offsets(
716 __int64_t fields, /* bitmask of fields */
717 const short *offsets, /* table of field offsets */
718 int nbits, /* number of bits to inspect */
719 int *first, /* output: first byte offset */
720 int *last) /* output: last byte offset */
721 {
722 int i; /* current bit number */
723 __int64_t imask; /* mask for current bit number */
724
725 ASSERT(fields != 0);
726 /*
727 * Find the lowest bit, so the first byte offset.
728 */
729 for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
730 if (imask & fields) {
731 *first = offsets[i];
732 break;
733 }
734 }
735 /*
736 * Find the highest bit, so the last byte offset.
737 */
738 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
739 if (imask & fields) {
740 *last = offsets[i + 1] - 1;
741 break;
742 }
743 }
744 }
745
746 /*
747 * Get a buffer for the block, return it read in.
748 * Long-form addressing.
749 */
750 int /* error */
xfs_btree_read_bufl(xfs_mount_t * mp,xfs_trans_t * tp,xfs_fsblock_t fsbno,uint lock,xfs_buf_t ** bpp,int refval)751 xfs_btree_read_bufl(
752 xfs_mount_t *mp, /* file system mount point */
753 xfs_trans_t *tp, /* transaction pointer */
754 xfs_fsblock_t fsbno, /* file system block number */
755 uint lock, /* lock flags for read_buf */
756 xfs_buf_t **bpp, /* buffer for fsbno */
757 int refval) /* ref count value for buffer */
758 {
759 xfs_buf_t *bp; /* return value */
760 xfs_daddr_t d; /* real disk block address */
761 int error;
762
763 ASSERT(fsbno != NULLFSBLOCK);
764 d = XFS_FSB_TO_DADDR(mp, fsbno);
765 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
766 mp->m_bsize, lock, &bp))) {
767 return error;
768 }
769 ASSERT(!bp || !XFS_BUF_GETERROR(bp));
770 if (bp != NULL) {
771 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
772 }
773 *bpp = bp;
774 return 0;
775 }
776
777 /*
778 * Get a buffer for the block, return it read in.
779 * Short-form addressing.
780 */
781 int /* error */
xfs_btree_read_bufs(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,uint lock,xfs_buf_t ** bpp,int refval)782 xfs_btree_read_bufs(
783 xfs_mount_t *mp, /* file system mount point */
784 xfs_trans_t *tp, /* transaction pointer */
785 xfs_agnumber_t agno, /* allocation group number */
786 xfs_agblock_t agbno, /* allocation group block number */
787 uint lock, /* lock flags for read_buf */
788 xfs_buf_t **bpp, /* buffer for agno/agbno */
789 int refval) /* ref count value for buffer */
790 {
791 xfs_buf_t *bp; /* return value */
792 xfs_daddr_t d; /* real disk block address */
793 int error;
794
795 ASSERT(agno != NULLAGNUMBER);
796 ASSERT(agbno != NULLAGBLOCK);
797 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
798 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
799 mp->m_bsize, lock, &bp))) {
800 return error;
801 }
802 ASSERT(!bp || !XFS_BUF_GETERROR(bp));
803 if (bp != NULL) {
804 switch (refval) {
805 case XFS_ALLOC_BTREE_REF:
806 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
807 break;
808 case XFS_INO_BTREE_REF:
809 XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
810 break;
811 }
812 }
813 *bpp = bp;
814 return 0;
815 }
816
817 /*
818 * Read-ahead the block, don't wait for it, don't return a buffer.
819 * Long-form addressing.
820 */
821 /* ARGSUSED */
822 void
xfs_btree_reada_bufl(xfs_mount_t * mp,xfs_fsblock_t fsbno,xfs_extlen_t count)823 xfs_btree_reada_bufl(
824 xfs_mount_t *mp, /* file system mount point */
825 xfs_fsblock_t fsbno, /* file system block number */
826 xfs_extlen_t count) /* count of filesystem blocks */
827 {
828 xfs_daddr_t d;
829
830 ASSERT(fsbno != NULLFSBLOCK);
831 d = XFS_FSB_TO_DADDR(mp, fsbno);
832 xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
833 }
834
835 /*
836 * Read-ahead the block, don't wait for it, don't return a buffer.
837 * Short-form addressing.
838 */
839 /* ARGSUSED */
840 void
xfs_btree_reada_bufs(xfs_mount_t * mp,xfs_agnumber_t agno,xfs_agblock_t agbno,xfs_extlen_t count)841 xfs_btree_reada_bufs(
842 xfs_mount_t *mp, /* file system mount point */
843 xfs_agnumber_t agno, /* allocation group number */
844 xfs_agblock_t agbno, /* allocation group block number */
845 xfs_extlen_t count) /* count of filesystem blocks */
846 {
847 xfs_daddr_t d;
848
849 ASSERT(agno != NULLAGNUMBER);
850 ASSERT(agbno != NULLAGBLOCK);
851 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
852 xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
853 }
854
855 /*
856 * Read-ahead btree blocks, at the given level.
857 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
858 */
859 int
xfs_btree_readahead_core(xfs_btree_cur_t * cur,int lev,int lr)860 xfs_btree_readahead_core(
861 xfs_btree_cur_t *cur, /* btree cursor */
862 int lev, /* level in btree */
863 int lr) /* left/right bits */
864 {
865 xfs_alloc_block_t *a;
866 xfs_bmbt_block_t *b;
867 xfs_inobt_block_t *i;
868 int rval = 0;
869
870 ASSERT(cur->bc_bufs[lev] != NULL);
871 cur->bc_ra[lev] |= lr;
872 switch (cur->bc_btnum) {
873 case XFS_BTNUM_BNO:
874 case XFS_BTNUM_CNT:
875 a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]);
876 if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(a->bb_leftsib, ARCH_CONVERT) != NULLAGBLOCK) {
877 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
878 INT_GET(a->bb_leftsib, ARCH_CONVERT), 1);
879 rval++;
880 }
881 if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(a->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
882 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
883 INT_GET(a->bb_rightsib, ARCH_CONVERT), 1);
884 rval++;
885 }
886 break;
887 case XFS_BTNUM_BMAP:
888 b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]);
889 if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(b->bb_leftsib, ARCH_CONVERT) != NULLDFSBNO) {
890 xfs_btree_reada_bufl(cur->bc_mp, INT_GET(b->bb_leftsib, ARCH_CONVERT), 1);
891 rval++;
892 }
893 if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(b->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
894 xfs_btree_reada_bufl(cur->bc_mp, INT_GET(b->bb_rightsib, ARCH_CONVERT), 1);
895 rval++;
896 }
897 break;
898 case XFS_BTNUM_INO:
899 i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
900 if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(i->bb_leftsib, ARCH_CONVERT) != NULLAGBLOCK) {
901 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
902 INT_GET(i->bb_leftsib, ARCH_CONVERT), 1);
903 rval++;
904 }
905 if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(i->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
906 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
907 INT_GET(i->bb_rightsib, ARCH_CONVERT), 1);
908 rval++;
909 }
910 break;
911 default:
912 ASSERT(0);
913 }
914 return rval;
915 }
916
917 /*
918 * Set the buffer for level "lev" in the cursor to bp, releasing
919 * any previous buffer.
920 */
921 void
xfs_btree_setbuf(xfs_btree_cur_t * cur,int lev,xfs_buf_t * bp)922 xfs_btree_setbuf(
923 xfs_btree_cur_t *cur, /* btree cursor */
924 int lev, /* level in btree */
925 xfs_buf_t *bp) /* new buffer to set */
926 {
927 xfs_btree_block_t *b; /* btree block */
928 xfs_buf_t *obp; /* old buffer pointer */
929
930 obp = cur->bc_bufs[lev];
931 if (obp)
932 xfs_trans_brelse(cur->bc_tp, obp);
933 cur->bc_bufs[lev] = bp;
934 cur->bc_ra[lev] = 0;
935 if (!bp)
936 return;
937 b = XFS_BUF_TO_BLOCK(bp);
938 if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) {
939 if (INT_GET(b->bb_u.l.bb_leftsib, ARCH_CONVERT) == NULLDFSBNO)
940 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
941 if (INT_GET(b->bb_u.l.bb_rightsib, ARCH_CONVERT) == NULLDFSBNO)
942 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
943 } else {
944 if (INT_GET(b->bb_u.s.bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK)
945 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
946 if (INT_GET(b->bb_u.s.bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK)
947 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
948 }
949 }
950