1 /*
2  * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32 
33 /*
34  * This file contains common code for the space manager's btree implementations.
35  */
36 
37 #include "xfs.h"
38 
39 #include "xfs_macros.h"
40 #include "xfs_types.h"
41 #include "xfs_inum.h"
42 #include "xfs_log.h"
43 #include "xfs_trans.h"
44 #include "xfs_sb.h"
45 #include "xfs_ag.h"
46 #include "xfs_dir.h"
47 #include "xfs_dir2.h"
48 #include "xfs_dmapi.h"
49 #include "xfs_mount.h"
50 #include "xfs_alloc_btree.h"
51 #include "xfs_bmap_btree.h"
52 #include "xfs_ialloc_btree.h"
53 #include "xfs_btree.h"
54 #include "xfs_ialloc.h"
55 #include "xfs_attr_sf.h"
56 #include "xfs_dir_sf.h"
57 #include "xfs_dir2_sf.h"
58 #include "xfs_dinode.h"
59 #include "xfs_inode.h"
60 #include "xfs_bit.h"
61 #include "xfs_error.h"
62 
63 /*
64  * Cursor allocation zone.
65  */
66 kmem_zone_t	*xfs_btree_cur_zone;
67 
68 /*
69  * Btree magic numbers.
70  */
71 const __uint32_t xfs_magics[XFS_BTNUM_MAX] =
72 {
73 	XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
74 };
75 
76 /*
77  * Prototypes for internal routines.
78  */
79 
80 /*
81  * Checking routine: return maxrecs for the block.
82  */
83 STATIC int				/* number of records fitting in block */
84 xfs_btree_maxrecs(
85 	xfs_btree_cur_t		*cur,	/* btree cursor */
86 	xfs_btree_block_t	*block);/* generic btree block pointer */
87 
88 /*
89  * Internal routines.
90  */
91 
92 /*
93  * Checking routine: return maxrecs for the block.
94  */
95 STATIC int				/* number of records fitting in block */
xfs_btree_maxrecs(xfs_btree_cur_t * cur,xfs_btree_block_t * block)96 xfs_btree_maxrecs(
97 	xfs_btree_cur_t		*cur,	/* btree cursor */
98 	xfs_btree_block_t	*block)	/* generic btree block pointer */
99 {
100 	switch (cur->bc_btnum) {
101 	case XFS_BTNUM_BNO:
102 	case XFS_BTNUM_CNT:
103 		return (int)XFS_ALLOC_BLOCK_MAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
104 	case XFS_BTNUM_BMAP:
105 		return (int)XFS_BMAP_BLOCK_IMAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
106 	case XFS_BTNUM_INO:
107 		return (int)XFS_INOBT_BLOCK_MAXRECS(INT_GET(block->bb_h.bb_level, ARCH_CONVERT), cur);
108 	default:
109 		ASSERT(0);
110 		return 0;
111 	}
112 }
113 
114 /*
115  * External routines.
116  */
117 
118 #ifdef DEBUG
119 /*
120  * Debug routine: check that block header is ok.
121  */
122 void
xfs_btree_check_block(xfs_btree_cur_t * cur,xfs_btree_block_t * block,int level,xfs_buf_t * bp)123 xfs_btree_check_block(
124 	xfs_btree_cur_t		*cur,	/* btree cursor */
125 	xfs_btree_block_t	*block,	/* generic btree block pointer */
126 	int			level,	/* level of the btree block */
127 	xfs_buf_t		*bp)	/* buffer containing block, if any */
128 {
129 	if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
130 		xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level,
131 			bp);
132 	else
133 		xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level,
134 			bp);
135 }
136 
137 /*
138  * Debug routine: check that keys are in the right order.
139  */
140 void
xfs_btree_check_key(xfs_btnum_t btnum,void * ak1,void * ak2)141 xfs_btree_check_key(
142 	xfs_btnum_t	btnum,		/* btree identifier */
143 	void		*ak1,		/* pointer to left (lower) key */
144 	void		*ak2)		/* pointer to right (higher) key */
145 {
146 	switch (btnum) {
147 	case XFS_BTNUM_BNO: {
148 		xfs_alloc_key_t	*k1;
149 		xfs_alloc_key_t	*k2;
150 
151 		k1 = ak1;
152 		k2 = ak2;
153 		ASSERT(INT_GET(k1->ar_startblock, ARCH_CONVERT) < INT_GET(k2->ar_startblock, ARCH_CONVERT));
154 		break;
155 	    }
156 	case XFS_BTNUM_CNT: {
157 		xfs_alloc_key_t	*k1;
158 		xfs_alloc_key_t	*k2;
159 
160 		k1 = ak1;
161 		k2 = ak2;
162 		ASSERT(INT_GET(k1->ar_blockcount, ARCH_CONVERT) < INT_GET(k2->ar_blockcount, ARCH_CONVERT) ||
163 		       (INT_GET(k1->ar_blockcount, ARCH_CONVERT) == INT_GET(k2->ar_blockcount, ARCH_CONVERT) &&
164 			INT_GET(k1->ar_startblock, ARCH_CONVERT) < INT_GET(k2->ar_startblock, ARCH_CONVERT)));
165 		break;
166 	    }
167 	case XFS_BTNUM_BMAP: {
168 		xfs_bmbt_key_t	*k1;
169 		xfs_bmbt_key_t	*k2;
170 
171 		k1 = ak1;
172 		k2 = ak2;
173 		ASSERT(INT_GET(k1->br_startoff, ARCH_CONVERT) < INT_GET(k2->br_startoff, ARCH_CONVERT));
174 		break;
175 	    }
176 	case XFS_BTNUM_INO: {
177 		xfs_inobt_key_t	*k1;
178 		xfs_inobt_key_t	*k2;
179 
180 		k1 = ak1;
181 		k2 = ak2;
182 		ASSERT(INT_GET(k1->ir_startino, ARCH_CONVERT) < INT_GET(k2->ir_startino, ARCH_CONVERT));
183 		break;
184 	    }
185 	default:
186 		ASSERT(0);
187 	}
188 }
189 #endif	/* DEBUG */
190 
191 /*
192  * Checking routine: check that long form block header is ok.
193  */
194 /* ARGSUSED */
195 int					/* error (0 or EFSCORRUPTED) */
xfs_btree_check_lblock(xfs_btree_cur_t * cur,xfs_btree_lblock_t * block,int level,xfs_buf_t * bp)196 xfs_btree_check_lblock(
197 	xfs_btree_cur_t		*cur,	/* btree cursor */
198 	xfs_btree_lblock_t	*block,	/* btree long form block pointer */
199 	int			level,	/* level of the btree block */
200 	xfs_buf_t		*bp)	/* buffer for block, if any */
201 {
202 	int			lblock_ok; /* block passes checks */
203 	xfs_mount_t		*mp;	/* file system mount point */
204 
205 	mp = cur->bc_mp;
206 	lblock_ok =
207 		INT_GET(block->bb_magic, ARCH_CONVERT) == xfs_magics[cur->bc_btnum] &&
208 		INT_GET(block->bb_level, ARCH_CONVERT) == level &&
209 		INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
210 			xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
211 		!INT_ISZERO(block->bb_leftsib, ARCH_CONVERT) &&
212 		(INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLDFSBNO ||
213 		 XFS_FSB_SANITY_CHECK(mp, INT_GET(block->bb_leftsib, ARCH_CONVERT))) &&
214 		!INT_ISZERO(block->bb_rightsib, ARCH_CONVERT) &&
215 		(INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLDFSBNO ||
216 		 XFS_FSB_SANITY_CHECK(mp, INT_GET(block->bb_rightsib, ARCH_CONVERT)));
217 	if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK,
218 			XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
219 		if (bp)
220 			xfs_buftrace("LBTREE ERROR", bp);
221 		XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
222 				 mp);
223 		return XFS_ERROR(EFSCORRUPTED);
224 	}
225 	return 0;
226 }
227 
228 /*
229  * Checking routine: check that (long) pointer is ok.
230  */
231 int					/* error (0 or EFSCORRUPTED) */
xfs_btree_check_lptr(xfs_btree_cur_t * cur,xfs_dfsbno_t ptr,int level)232 xfs_btree_check_lptr(
233 	xfs_btree_cur_t	*cur,		/* btree cursor */
234 	xfs_dfsbno_t	ptr,		/* btree block disk address */
235 	int		level)		/* btree block level */
236 {
237 	xfs_mount_t	*mp;		/* file system mount point */
238 
239 	mp = cur->bc_mp;
240 	XFS_WANT_CORRUPTED_RETURN(
241 		level > 0 &&
242 		ptr != NULLDFSBNO &&
243 		XFS_FSB_SANITY_CHECK(mp, ptr));
244 	return 0;
245 }
246 
247 #ifdef DEBUG
248 /*
249  * Debug routine: check that records are in the right order.
250  */
251 void
xfs_btree_check_rec(xfs_btnum_t btnum,void * ar1,void * ar2)252 xfs_btree_check_rec(
253 	xfs_btnum_t	btnum,		/* btree identifier */
254 	void		*ar1,		/* pointer to left (lower) record */
255 	void		*ar2)		/* pointer to right (higher) record */
256 {
257 	switch (btnum) {
258 	case XFS_BTNUM_BNO: {
259 		xfs_alloc_rec_t	*r1;
260 		xfs_alloc_rec_t	*r2;
261 
262 		r1 = ar1;
263 		r2 = ar2;
264 		ASSERT(INT_GET(r1->ar_startblock, ARCH_CONVERT) + INT_GET(r1->ar_blockcount, ARCH_CONVERT) <=
265 		       INT_GET(r2->ar_startblock, ARCH_CONVERT));
266 		break;
267 	    }
268 	case XFS_BTNUM_CNT: {
269 		xfs_alloc_rec_t	*r1;
270 		xfs_alloc_rec_t	*r2;
271 
272 		r1 = ar1;
273 		r2 = ar2;
274 		ASSERT(INT_GET(r1->ar_blockcount, ARCH_CONVERT) < INT_GET(r2->ar_blockcount, ARCH_CONVERT) ||
275 		       (INT_GET(r1->ar_blockcount, ARCH_CONVERT) == INT_GET(r2->ar_blockcount, ARCH_CONVERT) &&
276 			INT_GET(r1->ar_startblock, ARCH_CONVERT) < INT_GET(r2->ar_startblock, ARCH_CONVERT)));
277 		break;
278 	    }
279 	case XFS_BTNUM_BMAP: {
280 		xfs_bmbt_rec_t	*r1;
281 		xfs_bmbt_rec_t	*r2;
282 
283 		r1 = ar1;
284 		r2 = ar2;
285 		ASSERT(xfs_bmbt_disk_get_startoff(r1) +
286 		       xfs_bmbt_disk_get_blockcount(r1) <=
287 		       xfs_bmbt_disk_get_startoff(r2));
288 		break;
289 	    }
290 	case XFS_BTNUM_INO: {
291 		xfs_inobt_rec_t	*r1;
292 		xfs_inobt_rec_t	*r2;
293 
294 		r1 = ar1;
295 		r2 = ar2;
296 		ASSERT(INT_GET(r1->ir_startino, ARCH_CONVERT) + XFS_INODES_PER_CHUNK <=
297 		       INT_GET(r2->ir_startino, ARCH_CONVERT));
298 		break;
299 	    }
300 	default:
301 		ASSERT(0);
302 	}
303 }
304 #endif	/* DEBUG */
305 
306 /*
307  * Checking routine: check that block header is ok.
308  */
309 /* ARGSUSED */
310 int					/* error (0 or EFSCORRUPTED) */
xfs_btree_check_sblock(xfs_btree_cur_t * cur,xfs_btree_sblock_t * block,int level,xfs_buf_t * bp)311 xfs_btree_check_sblock(
312 	xfs_btree_cur_t		*cur,	/* btree cursor */
313 	xfs_btree_sblock_t	*block,	/* btree short form block pointer */
314 	int			level,	/* level of the btree block */
315 	xfs_buf_t		*bp)	/* buffer containing block */
316 {
317 	xfs_buf_t		*agbp;	/* buffer for ag. freespace struct */
318 	xfs_agf_t		*agf;	/* ag. freespace structure */
319 	xfs_agblock_t		agflen;	/* native ag. freespace length */
320 	int			sblock_ok; /* block passes checks */
321 
322 	agbp = cur->bc_private.a.agbp;
323 	agf = XFS_BUF_TO_AGF(agbp);
324 	agflen = INT_GET(agf->agf_length, ARCH_CONVERT);
325 	sblock_ok =
326 		INT_GET(block->bb_magic, ARCH_CONVERT) == xfs_magics[cur->bc_btnum] &&
327 		INT_GET(block->bb_level, ARCH_CONVERT) == level &&
328 		INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
329 			xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
330 		(INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK ||
331 		 INT_GET(block->bb_leftsib, ARCH_CONVERT) < agflen) &&
332 		!INT_ISZERO(block->bb_leftsib, ARCH_CONVERT) &&
333 		(INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK ||
334 		 INT_GET(block->bb_rightsib, ARCH_CONVERT) < agflen) &&
335 		!INT_ISZERO(block->bb_rightsib, ARCH_CONVERT);
336 	if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
337 			XFS_ERRTAG_BTREE_CHECK_SBLOCK,
338 			XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
339 		if (bp)
340 			xfs_buftrace("SBTREE ERROR", bp);
341 		XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
342 				 cur->bc_mp);
343 		return XFS_ERROR(EFSCORRUPTED);
344 	}
345 	return 0;
346 }
347 
348 /*
349  * Checking routine: check that (short) pointer is ok.
350  */
351 int					/* error (0 or EFSCORRUPTED) */
xfs_btree_check_sptr(xfs_btree_cur_t * cur,xfs_agblock_t ptr,int level)352 xfs_btree_check_sptr(
353 	xfs_btree_cur_t	*cur,		/* btree cursor */
354 	xfs_agblock_t	ptr,		/* btree block disk address */
355 	int		level)		/* btree block level */
356 {
357 	xfs_buf_t	*agbp;		/* buffer for ag. freespace struct */
358 	xfs_agf_t	*agf;		/* ag. freespace structure */
359 
360 	agbp = cur->bc_private.a.agbp;
361 	agf = XFS_BUF_TO_AGF(agbp);
362 	XFS_WANT_CORRUPTED_RETURN(
363 		level > 0 &&
364 		ptr != NULLAGBLOCK && ptr != 0 &&
365 		ptr < INT_GET(agf->agf_length, ARCH_CONVERT));
366 	return 0;
367 }
368 
369 /*
370  * Delete the btree cursor.
371  */
372 void
xfs_btree_del_cursor(xfs_btree_cur_t * cur,int error)373 xfs_btree_del_cursor(
374 	xfs_btree_cur_t	*cur,		/* btree cursor */
375 	int		error)		/* del because of error */
376 {
377 	int		i;		/* btree level */
378 
379 	/*
380 	 * Clear the buffer pointers, and release the buffers.
381 	 * If we're doing this in the face of an error, we
382 	 * need to make sure to inspect all of the entries
383 	 * in the bc_bufs array for buffers to be unlocked.
384 	 * This is because some of the btree code works from
385 	 * level n down to 0, and if we get an error along
386 	 * the way we won't have initialized all the entries
387 	 * down to 0.
388 	 */
389 	for (i = 0; i < cur->bc_nlevels; i++) {
390 		if (cur->bc_bufs[i])
391 			xfs_btree_setbuf(cur, i, NULL);
392 		else if (!error)
393 			break;
394 	}
395 	/*
396 	 * Can't free a bmap cursor without having dealt with the
397 	 * allocated indirect blocks' accounting.
398 	 */
399 	ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
400 	       cur->bc_private.b.allocated == 0);
401 	/*
402 	 * Free the cursor.
403 	 */
404 	kmem_zone_free(xfs_btree_cur_zone, cur);
405 }
406 
407 /*
408  * Duplicate the btree cursor.
409  * Allocate a new one, copy the record, re-get the buffers.
410  */
411 int					/* error */
xfs_btree_dup_cursor(xfs_btree_cur_t * cur,xfs_btree_cur_t ** ncur)412 xfs_btree_dup_cursor(
413 	xfs_btree_cur_t	*cur,		/* input cursor */
414 	xfs_btree_cur_t	**ncur)		/* output cursor */
415 {
416 	xfs_buf_t	*bp;		/* btree block's buffer pointer */
417 	int		error;		/* error return value */
418 	int		i;		/* level number of btree block */
419 	xfs_mount_t	*mp;		/* mount structure for filesystem */
420 	xfs_btree_cur_t	*new;		/* new cursor value */
421 	xfs_trans_t	*tp;		/* transaction pointer, can be NULL */
422 
423 	tp = cur->bc_tp;
424 	mp = cur->bc_mp;
425 	/*
426 	 * Allocate a new cursor like the old one.
427 	 */
428 	new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp,
429 		cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip,
430 		cur->bc_private.b.whichfork);
431 	/*
432 	 * Copy the record currently in the cursor.
433 	 */
434 	new->bc_rec = cur->bc_rec;
435 	/*
436 	 * For each level current, re-get the buffer and copy the ptr value.
437 	 */
438 	for (i = 0; i < new->bc_nlevels; i++) {
439 		new->bc_ptrs[i] = cur->bc_ptrs[i];
440 		new->bc_ra[i] = cur->bc_ra[i];
441 		if ((bp = cur->bc_bufs[i])) {
442 			if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
443 				XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
444 				xfs_btree_del_cursor(new, error);
445 				*ncur = NULL;
446 				return error;
447 			}
448 			new->bc_bufs[i] = bp;
449 			ASSERT(bp);
450 			ASSERT(!XFS_BUF_GETERROR(bp));
451 		} else
452 			new->bc_bufs[i] = NULL;
453 	}
454 	/*
455 	 * For bmap btrees, copy the firstblock, flist, and flags values,
456 	 * since init cursor doesn't get them.
457 	 */
458 	if (new->bc_btnum == XFS_BTNUM_BMAP) {
459 		new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
460 		new->bc_private.b.flist = cur->bc_private.b.flist;
461 		new->bc_private.b.flags = cur->bc_private.b.flags;
462 	}
463 	*ncur = new;
464 	return 0;
465 }
466 
467 /*
468  * Change the cursor to point to the first record at the given level.
469  * Other levels are unaffected.
470  */
471 int					/* success=1, failure=0 */
xfs_btree_firstrec(xfs_btree_cur_t * cur,int level)472 xfs_btree_firstrec(
473 	xfs_btree_cur_t		*cur,	/* btree cursor */
474 	int			level)	/* level to change */
475 {
476 	xfs_btree_block_t	*block;	/* generic btree block pointer */
477 	xfs_buf_t		*bp;	/* buffer containing block */
478 
479 	/*
480 	 * Get the block pointer for this level.
481 	 */
482 	block = xfs_btree_get_block(cur, level, &bp);
483 	xfs_btree_check_block(cur, block, level, bp);
484 	/*
485 	 * It's empty, there is no such record.
486 	 */
487 	if (INT_ISZERO(block->bb_h.bb_numrecs, ARCH_CONVERT))
488 		return 0;
489 	/*
490 	 * Set the ptr value to 1, that's the first record/key.
491 	 */
492 	cur->bc_ptrs[level] = 1;
493 	return 1;
494 }
495 
496 /*
497  * Retrieve the block pointer from the cursor at the given level.
498  * This may be a bmap btree root or from a buffer.
499  */
500 xfs_btree_block_t *			/* generic btree block pointer */
xfs_btree_get_block(xfs_btree_cur_t * cur,int level,xfs_buf_t ** bpp)501 xfs_btree_get_block(
502 	xfs_btree_cur_t		*cur,	/* btree cursor */
503 	int			level,	/* level in btree */
504 	xfs_buf_t		**bpp)	/* buffer containing the block */
505 {
506 	xfs_btree_block_t	*block;	/* return value */
507 	xfs_buf_t		*bp;	/* return buffer */
508 	xfs_ifork_t		*ifp;	/* inode fork pointer */
509 	int			whichfork; /* data or attr fork */
510 
511 	if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) {
512 		whichfork = cur->bc_private.b.whichfork;
513 		ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork);
514 		block = (xfs_btree_block_t *)ifp->if_broot;
515 		bp = NULL;
516 	} else {
517 		bp = cur->bc_bufs[level];
518 		block = XFS_BUF_TO_BLOCK(bp);
519 	}
520 	ASSERT(block != NULL);
521 	*bpp = bp;
522 	return block;
523 }
524 
525 /*
526  * Get a buffer for the block, return it with no data read.
527  * Long-form addressing.
528  */
529 xfs_buf_t *				/* buffer for fsbno */
xfs_btree_get_bufl(xfs_mount_t * mp,xfs_trans_t * tp,xfs_fsblock_t fsbno,uint lock)530 xfs_btree_get_bufl(
531 	xfs_mount_t	*mp,		/* file system mount point */
532 	xfs_trans_t	*tp,		/* transaction pointer */
533 	xfs_fsblock_t	fsbno,		/* file system block number */
534 	uint		lock)		/* lock flags for get_buf */
535 {
536 	xfs_buf_t	*bp;		/* buffer pointer (return value) */
537 	xfs_daddr_t		d;		/* real disk block address */
538 
539 	ASSERT(fsbno != NULLFSBLOCK);
540 	d = XFS_FSB_TO_DADDR(mp, fsbno);
541 	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
542 	ASSERT(bp);
543 	ASSERT(!XFS_BUF_GETERROR(bp));
544 	return bp;
545 }
546 
547 /*
548  * Get a buffer for the block, return it with no data read.
549  * Short-form addressing.
550  */
551 xfs_buf_t *				/* buffer for agno/agbno */
xfs_btree_get_bufs(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,uint lock)552 xfs_btree_get_bufs(
553 	xfs_mount_t	*mp,		/* file system mount point */
554 	xfs_trans_t	*tp,		/* transaction pointer */
555 	xfs_agnumber_t	agno,		/* allocation group number */
556 	xfs_agblock_t	agbno,		/* allocation group block number */
557 	uint		lock)		/* lock flags for get_buf */
558 {
559 	xfs_buf_t	*bp;		/* buffer pointer (return value) */
560 	xfs_daddr_t		d;		/* real disk block address */
561 
562 	ASSERT(agno != NULLAGNUMBER);
563 	ASSERT(agbno != NULLAGBLOCK);
564 	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
565 	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
566 	ASSERT(bp);
567 	ASSERT(!XFS_BUF_GETERROR(bp));
568 	return bp;
569 }
570 
571 /*
572  * Allocate a new btree cursor.
573  * The cursor is either for allocation (A) or bmap (B) or inodes (I).
574  */
575 xfs_btree_cur_t *			/* new btree cursor */
xfs_btree_init_cursor(xfs_mount_t * mp,xfs_trans_t * tp,xfs_buf_t * agbp,xfs_agnumber_t agno,xfs_btnum_t btnum,xfs_inode_t * ip,int whichfork)576 xfs_btree_init_cursor(
577 	xfs_mount_t	*mp,		/* file system mount point */
578 	xfs_trans_t	*tp,		/* transaction pointer */
579 	xfs_buf_t	*agbp,		/* (A only) buffer for agf structure */
580 					/* (I only) buffer for agi structure */
581 	xfs_agnumber_t	agno,		/* (AI only) allocation group number */
582 	xfs_btnum_t	btnum,		/* btree identifier */
583 	xfs_inode_t	*ip,		/* (B only) inode owning the btree */
584 	int		whichfork)	/* (B only) data or attr fork */
585 {
586 	xfs_agf_t	*agf;		/* (A) allocation group freespace */
587 	xfs_agi_t	*agi;		/* (I) allocation group inodespace */
588 	xfs_btree_cur_t	*cur;		/* return value */
589 	xfs_ifork_t	*ifp;		/* (I) inode fork pointer */
590 	int		nlevels=0;	/* number of levels in the btree */
591 
592 	ASSERT(xfs_btree_cur_zone != NULL);
593 	/*
594 	 * Allocate a new cursor.
595 	 */
596 	cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
597 	/*
598 	 * Deduce the number of btree levels from the arguments.
599 	 */
600 	switch (btnum) {
601 	case XFS_BTNUM_BNO:
602 	case XFS_BTNUM_CNT:
603 		agf = XFS_BUF_TO_AGF(agbp);
604 		nlevels = INT_GET(agf->agf_levels[btnum], ARCH_CONVERT);
605 		break;
606 	case XFS_BTNUM_BMAP:
607 		ifp = XFS_IFORK_PTR(ip, whichfork);
608 		nlevels = INT_GET(ifp->if_broot->bb_level, ARCH_CONVERT) + 1;
609 		break;
610 	case XFS_BTNUM_INO:
611 		agi = XFS_BUF_TO_AGI(agbp);
612 		nlevels = INT_GET(agi->agi_level, ARCH_CONVERT);
613 		break;
614 	default:
615 		ASSERT(0);
616 	}
617 	/*
618 	 * Fill in the common fields.
619 	 */
620 	cur->bc_tp = tp;
621 	cur->bc_mp = mp;
622 	cur->bc_nlevels = nlevels;
623 	cur->bc_btnum = btnum;
624 	cur->bc_blocklog = mp->m_sb.sb_blocklog;
625 	/*
626 	 * Fill in private fields.
627 	 */
628 	switch (btnum) {
629 	case XFS_BTNUM_BNO:
630 	case XFS_BTNUM_CNT:
631 		/*
632 		 * Allocation btree fields.
633 		 */
634 		cur->bc_private.a.agbp = agbp;
635 		cur->bc_private.a.agno = agno;
636 		break;
637 	case XFS_BTNUM_BMAP:
638 		/*
639 		 * Bmap btree fields.
640 		 */
641 		cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
642 		cur->bc_private.b.ip = ip;
643 		cur->bc_private.b.firstblock = NULLFSBLOCK;
644 		cur->bc_private.b.flist = NULL;
645 		cur->bc_private.b.allocated = 0;
646 		cur->bc_private.b.flags = 0;
647 		cur->bc_private.b.whichfork = whichfork;
648 		break;
649 	case XFS_BTNUM_INO:
650 		/*
651 		 * Inode allocation btree fields.
652 		 */
653 		cur->bc_private.i.agbp = agbp;
654 		cur->bc_private.i.agno = agno;
655 		break;
656 	default:
657 		ASSERT(0);
658 	}
659 	return cur;
660 }
661 
662 /*
663  * Check for the cursor referring to the last block at the given level.
664  */
665 int					/* 1=is last block, 0=not last block */
xfs_btree_islastblock(xfs_btree_cur_t * cur,int level)666 xfs_btree_islastblock(
667 	xfs_btree_cur_t		*cur,	/* btree cursor */
668 	int			level)	/* level to check */
669 {
670 	xfs_btree_block_t	*block;	/* generic btree block pointer */
671 	xfs_buf_t		*bp;	/* buffer containing block */
672 
673 	block = xfs_btree_get_block(cur, level, &bp);
674 	xfs_btree_check_block(cur, block, level, bp);
675 	if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
676 		return INT_GET(block->bb_u.l.bb_rightsib, ARCH_CONVERT) == NULLDFSBNO;
677 	else
678 		return INT_GET(block->bb_u.s.bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK;
679 }
680 
681 /*
682  * Change the cursor to point to the last record in the current block
683  * at the given level.  Other levels are unaffected.
684  */
685 int					/* success=1, failure=0 */
xfs_btree_lastrec(xfs_btree_cur_t * cur,int level)686 xfs_btree_lastrec(
687 	xfs_btree_cur_t		*cur,	/* btree cursor */
688 	int			level)	/* level to change */
689 {
690 	xfs_btree_block_t	*block;	/* generic btree block pointer */
691 	xfs_buf_t		*bp;	/* buffer containing block */
692 
693 	/*
694 	 * Get the block pointer for this level.
695 	 */
696 	block = xfs_btree_get_block(cur, level, &bp);
697 	xfs_btree_check_block(cur, block, level, bp);
698 	/*
699 	 * It's empty, there is no such record.
700 	 */
701 	if (INT_ISZERO(block->bb_h.bb_numrecs, ARCH_CONVERT))
702 		return 0;
703 	/*
704 	 * Set the ptr value to numrecs, that's the last record/key.
705 	 */
706 	cur->bc_ptrs[level] = INT_GET(block->bb_h.bb_numrecs, ARCH_CONVERT);
707 	return 1;
708 }
709 
710 /*
711  * Compute first and last byte offsets for the fields given.
712  * Interprets the offsets table, which contains struct field offsets.
713  */
714 void
xfs_btree_offsets(__int64_t fields,const short * offsets,int nbits,int * first,int * last)715 xfs_btree_offsets(
716 	__int64_t	fields,		/* bitmask of fields */
717 	const short	*offsets,	/* table of field offsets */
718 	int		nbits,		/* number of bits to inspect */
719 	int		*first,		/* output: first byte offset */
720 	int		*last)		/* output: last byte offset */
721 {
722 	int		i;		/* current bit number */
723 	__int64_t	imask;		/* mask for current bit number */
724 
725 	ASSERT(fields != 0);
726 	/*
727 	 * Find the lowest bit, so the first byte offset.
728 	 */
729 	for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
730 		if (imask & fields) {
731 			*first = offsets[i];
732 			break;
733 		}
734 	}
735 	/*
736 	 * Find the highest bit, so the last byte offset.
737 	 */
738 	for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
739 		if (imask & fields) {
740 			*last = offsets[i + 1] - 1;
741 			break;
742 		}
743 	}
744 }
745 
746 /*
747  * Get a buffer for the block, return it read in.
748  * Long-form addressing.
749  */
750 int					/* error */
xfs_btree_read_bufl(xfs_mount_t * mp,xfs_trans_t * tp,xfs_fsblock_t fsbno,uint lock,xfs_buf_t ** bpp,int refval)751 xfs_btree_read_bufl(
752 	xfs_mount_t	*mp,		/* file system mount point */
753 	xfs_trans_t	*tp,		/* transaction pointer */
754 	xfs_fsblock_t	fsbno,		/* file system block number */
755 	uint		lock,		/* lock flags for read_buf */
756 	xfs_buf_t	**bpp,		/* buffer for fsbno */
757 	int		refval)		/* ref count value for buffer */
758 {
759 	xfs_buf_t	*bp;		/* return value */
760 	xfs_daddr_t		d;		/* real disk block address */
761 	int		error;
762 
763 	ASSERT(fsbno != NULLFSBLOCK);
764 	d = XFS_FSB_TO_DADDR(mp, fsbno);
765 	if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
766 			mp->m_bsize, lock, &bp))) {
767 		return error;
768 	}
769 	ASSERT(!bp || !XFS_BUF_GETERROR(bp));
770 	if (bp != NULL) {
771 		XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
772 	}
773 	*bpp = bp;
774 	return 0;
775 }
776 
777 /*
778  * Get a buffer for the block, return it read in.
779  * Short-form addressing.
780  */
781 int					/* error */
xfs_btree_read_bufs(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,xfs_agblock_t agbno,uint lock,xfs_buf_t ** bpp,int refval)782 xfs_btree_read_bufs(
783 	xfs_mount_t	*mp,		/* file system mount point */
784 	xfs_trans_t	*tp,		/* transaction pointer */
785 	xfs_agnumber_t	agno,		/* allocation group number */
786 	xfs_agblock_t	agbno,		/* allocation group block number */
787 	uint		lock,		/* lock flags for read_buf */
788 	xfs_buf_t	**bpp,		/* buffer for agno/agbno */
789 	int		refval)		/* ref count value for buffer */
790 {
791 	xfs_buf_t	*bp;		/* return value */
792 	xfs_daddr_t	d;		/* real disk block address */
793 	int		error;
794 
795 	ASSERT(agno != NULLAGNUMBER);
796 	ASSERT(agbno != NULLAGBLOCK);
797 	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
798 	if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
799 					mp->m_bsize, lock, &bp))) {
800 		return error;
801 	}
802 	ASSERT(!bp || !XFS_BUF_GETERROR(bp));
803 	if (bp != NULL) {
804 		switch (refval) {
805 		case XFS_ALLOC_BTREE_REF:
806 			XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
807 			break;
808 		case XFS_INO_BTREE_REF:
809 			XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
810 			break;
811 		}
812 	}
813 	*bpp = bp;
814 	return 0;
815 }
816 
817 /*
818  * Read-ahead the block, don't wait for it, don't return a buffer.
819  * Long-form addressing.
820  */
821 /* ARGSUSED */
822 void
xfs_btree_reada_bufl(xfs_mount_t * mp,xfs_fsblock_t fsbno,xfs_extlen_t count)823 xfs_btree_reada_bufl(
824 	xfs_mount_t	*mp,		/* file system mount point */
825 	xfs_fsblock_t	fsbno,		/* file system block number */
826 	xfs_extlen_t	count)		/* count of filesystem blocks */
827 {
828 	xfs_daddr_t		d;
829 
830 	ASSERT(fsbno != NULLFSBLOCK);
831 	d = XFS_FSB_TO_DADDR(mp, fsbno);
832 	xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
833 }
834 
835 /*
836  * Read-ahead the block, don't wait for it, don't return a buffer.
837  * Short-form addressing.
838  */
839 /* ARGSUSED */
840 void
xfs_btree_reada_bufs(xfs_mount_t * mp,xfs_agnumber_t agno,xfs_agblock_t agbno,xfs_extlen_t count)841 xfs_btree_reada_bufs(
842 	xfs_mount_t	*mp,		/* file system mount point */
843 	xfs_agnumber_t	agno,		/* allocation group number */
844 	xfs_agblock_t	agbno,		/* allocation group block number */
845 	xfs_extlen_t	count)		/* count of filesystem blocks */
846 {
847 	xfs_daddr_t		d;
848 
849 	ASSERT(agno != NULLAGNUMBER);
850 	ASSERT(agbno != NULLAGBLOCK);
851 	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
852 	xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
853 }
854 
855 /*
856  * Read-ahead btree blocks, at the given level.
857  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
858  */
859 int
xfs_btree_readahead_core(xfs_btree_cur_t * cur,int lev,int lr)860 xfs_btree_readahead_core(
861 	xfs_btree_cur_t		*cur,		/* btree cursor */
862 	int			lev,		/* level in btree */
863 	int			lr)		/* left/right bits */
864 {
865 	xfs_alloc_block_t	*a;
866 	xfs_bmbt_block_t	*b;
867 	xfs_inobt_block_t	*i;
868 	int			rval = 0;
869 
870 	ASSERT(cur->bc_bufs[lev] != NULL);
871 	cur->bc_ra[lev] |= lr;
872 	switch (cur->bc_btnum) {
873 	case XFS_BTNUM_BNO:
874 	case XFS_BTNUM_CNT:
875 		a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]);
876 		if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(a->bb_leftsib, ARCH_CONVERT) != NULLAGBLOCK) {
877 			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
878 				INT_GET(a->bb_leftsib, ARCH_CONVERT), 1);
879 			rval++;
880 		}
881 		if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(a->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
882 			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
883 				INT_GET(a->bb_rightsib, ARCH_CONVERT), 1);
884 			rval++;
885 		}
886 		break;
887 	case XFS_BTNUM_BMAP:
888 		b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]);
889 		if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(b->bb_leftsib, ARCH_CONVERT) != NULLDFSBNO) {
890 			xfs_btree_reada_bufl(cur->bc_mp, INT_GET(b->bb_leftsib, ARCH_CONVERT), 1);
891 			rval++;
892 		}
893 		if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(b->bb_rightsib, ARCH_CONVERT) != NULLDFSBNO) {
894 			xfs_btree_reada_bufl(cur->bc_mp, INT_GET(b->bb_rightsib, ARCH_CONVERT), 1);
895 			rval++;
896 		}
897 		break;
898 	case XFS_BTNUM_INO:
899 		i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
900 		if ((lr & XFS_BTCUR_LEFTRA) && INT_GET(i->bb_leftsib, ARCH_CONVERT) != NULLAGBLOCK) {
901 			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
902 				INT_GET(i->bb_leftsib, ARCH_CONVERT), 1);
903 			rval++;
904 		}
905 		if ((lr & XFS_BTCUR_RIGHTRA) && INT_GET(i->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
906 			xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
907 				INT_GET(i->bb_rightsib, ARCH_CONVERT), 1);
908 			rval++;
909 		}
910 		break;
911 	default:
912 		ASSERT(0);
913 	}
914 	return rval;
915 }
916 
917 /*
918  * Set the buffer for level "lev" in the cursor to bp, releasing
919  * any previous buffer.
920  */
921 void
xfs_btree_setbuf(xfs_btree_cur_t * cur,int lev,xfs_buf_t * bp)922 xfs_btree_setbuf(
923 	xfs_btree_cur_t		*cur,	/* btree cursor */
924 	int			lev,	/* level in btree */
925 	xfs_buf_t		*bp)	/* new buffer to set */
926 {
927 	xfs_btree_block_t	*b;	/* btree block */
928 	xfs_buf_t		*obp;	/* old buffer pointer */
929 
930 	obp = cur->bc_bufs[lev];
931 	if (obp)
932 		xfs_trans_brelse(cur->bc_tp, obp);
933 	cur->bc_bufs[lev] = bp;
934 	cur->bc_ra[lev] = 0;
935 	if (!bp)
936 		return;
937 	b = XFS_BUF_TO_BLOCK(bp);
938 	if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) {
939 		if (INT_GET(b->bb_u.l.bb_leftsib, ARCH_CONVERT) == NULLDFSBNO)
940 			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
941 		if (INT_GET(b->bb_u.l.bb_rightsib, ARCH_CONVERT) == NULLDFSBNO)
942 			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
943 	} else {
944 		if (INT_GET(b->bb_u.s.bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK)
945 			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
946 		if (INT_GET(b->bb_u.s.bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK)
947 			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
948 	}
949 }
950