1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_mount.h"
29 #include "xfs_da_btree.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_dir2_sf.h"
32 #include "xfs_dinode.h"
33 #include "xfs_inode.h"
34 #include "xfs_inode_item.h"
35 #include "xfs_alloc.h"
36 #include "xfs_bmap.h"
37 #include "xfs_attr.h"
38 #include "xfs_attr_leaf.h"
39 #include "xfs_dir2_data.h"
40 #include "xfs_dir2_leaf.h"
41 #include "xfs_dir2_block.h"
42 #include "xfs_dir2_node.h"
43 #include "xfs_error.h"
44 #include "xfs_trace.h"
45 
46 /*
47  * xfs_da_btree.c
48  *
49  * Routines to implement directories as Btrees of hashed names.
50  */
51 
52 /*========================================================================
53  * Function prototypes for the kernel.
54  *========================================================================*/
55 
56 /*
57  * Routines used for growing the Btree.
58  */
59 STATIC int xfs_da_root_split(xfs_da_state_t *state,
60 					    xfs_da_state_blk_t *existing_root,
61 					    xfs_da_state_blk_t *new_child);
62 STATIC int xfs_da_node_split(xfs_da_state_t *state,
63 					    xfs_da_state_blk_t *existing_blk,
64 					    xfs_da_state_blk_t *split_blk,
65 					    xfs_da_state_blk_t *blk_to_add,
66 					    int treelevel,
67 					    int *result);
68 STATIC void xfs_da_node_rebalance(xfs_da_state_t *state,
69 					 xfs_da_state_blk_t *node_blk_1,
70 					 xfs_da_state_blk_t *node_blk_2);
71 STATIC void xfs_da_node_add(xfs_da_state_t *state,
72 				   xfs_da_state_blk_t *old_node_blk,
73 				   xfs_da_state_blk_t *new_node_blk);
74 
75 /*
76  * Routines used for shrinking the Btree.
77  */
78 STATIC int xfs_da_root_join(xfs_da_state_t *state,
79 					   xfs_da_state_blk_t *root_blk);
80 STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval);
81 STATIC void xfs_da_node_remove(xfs_da_state_t *state,
82 					      xfs_da_state_blk_t *drop_blk);
83 STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
84 					 xfs_da_state_blk_t *src_node_blk,
85 					 xfs_da_state_blk_t *dst_node_blk);
86 
87 /*
88  * Utility routines.
89  */
90 STATIC uint	xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
91 STATIC int	xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
92 STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
93 STATIC int	xfs_da_blk_unlink(xfs_da_state_t *state,
94 				  xfs_da_state_blk_t *drop_blk,
95 				  xfs_da_state_blk_t *save_blk);
96 STATIC void	xfs_da_state_kill_altpath(xfs_da_state_t *state);
97 
98 /*========================================================================
99  * Routines used for growing the Btree.
100  *========================================================================*/
101 
102 /*
103  * Create the initial contents of an intermediate node.
104  */
105 int
xfs_da_node_create(xfs_da_args_t * args,xfs_dablk_t blkno,int level,xfs_dabuf_t ** bpp,int whichfork)106 xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
107 				 xfs_dabuf_t **bpp, int whichfork)
108 {
109 	xfs_da_intnode_t *node;
110 	xfs_dabuf_t *bp;
111 	int error;
112 	xfs_trans_t *tp;
113 
114 	tp = args->trans;
115 	error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
116 	if (error)
117 		return(error);
118 	ASSERT(bp != NULL);
119 	node = bp->data;
120 	node->hdr.info.forw = 0;
121 	node->hdr.info.back = 0;
122 	node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
123 	node->hdr.info.pad = 0;
124 	node->hdr.count = 0;
125 	node->hdr.level = cpu_to_be16(level);
126 
127 	xfs_da_log_buf(tp, bp,
128 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
129 
130 	*bpp = bp;
131 	return(0);
132 }
133 
134 /*
135  * Split a leaf node, rebalance, then possibly split
136  * intermediate nodes, rebalance, etc.
137  */
138 int							/* error */
xfs_da_split(xfs_da_state_t * state)139 xfs_da_split(xfs_da_state_t *state)
140 {
141 	xfs_da_state_blk_t *oldblk, *newblk, *addblk;
142 	xfs_da_intnode_t *node;
143 	xfs_dabuf_t *bp;
144 	int max, action, error, i;
145 
146 	/*
147 	 * Walk back up the tree splitting/inserting/adjusting as necessary.
148 	 * If we need to insert and there isn't room, split the node, then
149 	 * decide which fragment to insert the new block from below into.
150 	 * Note that we may split the root this way, but we need more fixup.
151 	 */
152 	max = state->path.active - 1;
153 	ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
154 	ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
155 	       state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
156 
157 	addblk = &state->path.blk[max];		/* initial dummy value */
158 	for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
159 		oldblk = &state->path.blk[i];
160 		newblk = &state->altpath.blk[i];
161 
162 		/*
163 		 * If a leaf node then
164 		 *     Allocate a new leaf node, then rebalance across them.
165 		 * else if an intermediate node then
166 		 *     We split on the last layer, must we split the node?
167 		 */
168 		switch (oldblk->magic) {
169 		case XFS_ATTR_LEAF_MAGIC:
170 			error = xfs_attr_leaf_split(state, oldblk, newblk);
171 			if ((error != 0) && (error != ENOSPC)) {
172 				return(error);	/* GROT: attr is inconsistent */
173 			}
174 			if (!error) {
175 				addblk = newblk;
176 				break;
177 			}
178 			/*
179 			 * Entry wouldn't fit, split the leaf again.
180 			 */
181 			state->extravalid = 1;
182 			if (state->inleaf) {
183 				state->extraafter = 0;	/* before newblk */
184 				error = xfs_attr_leaf_split(state, oldblk,
185 							    &state->extrablk);
186 			} else {
187 				state->extraafter = 1;	/* after newblk */
188 				error = xfs_attr_leaf_split(state, newblk,
189 							    &state->extrablk);
190 			}
191 			if (error)
192 				return(error);	/* GROT: attr inconsistent */
193 			addblk = newblk;
194 			break;
195 		case XFS_DIR2_LEAFN_MAGIC:
196 			error = xfs_dir2_leafn_split(state, oldblk, newblk);
197 			if (error)
198 				return error;
199 			addblk = newblk;
200 			break;
201 		case XFS_DA_NODE_MAGIC:
202 			error = xfs_da_node_split(state, oldblk, newblk, addblk,
203 							 max - i, &action);
204 			xfs_da_buf_done(addblk->bp);
205 			addblk->bp = NULL;
206 			if (error)
207 				return(error);	/* GROT: dir is inconsistent */
208 			/*
209 			 * Record the newly split block for the next time thru?
210 			 */
211 			if (action)
212 				addblk = newblk;
213 			else
214 				addblk = NULL;
215 			break;
216 		}
217 
218 		/*
219 		 * Update the btree to show the new hashval for this child.
220 		 */
221 		xfs_da_fixhashpath(state, &state->path);
222 		/*
223 		 * If we won't need this block again, it's getting dropped
224 		 * from the active path by the loop control, so we need
225 		 * to mark it done now.
226 		 */
227 		if (i > 0 || !addblk)
228 			xfs_da_buf_done(oldblk->bp);
229 	}
230 	if (!addblk)
231 		return(0);
232 
233 	/*
234 	 * Split the root node.
235 	 */
236 	ASSERT(state->path.active == 0);
237 	oldblk = &state->path.blk[0];
238 	error = xfs_da_root_split(state, oldblk, addblk);
239 	if (error) {
240 		xfs_da_buf_done(oldblk->bp);
241 		xfs_da_buf_done(addblk->bp);
242 		addblk->bp = NULL;
243 		return(error);	/* GROT: dir is inconsistent */
244 	}
245 
246 	/*
247 	 * Update pointers to the node which used to be block 0 and
248 	 * just got bumped because of the addition of a new root node.
249 	 * There might be three blocks involved if a double split occurred,
250 	 * and the original block 0 could be at any position in the list.
251 	 */
252 
253 	node = oldblk->bp->data;
254 	if (node->hdr.info.forw) {
255 		if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
256 			bp = addblk->bp;
257 		} else {
258 			ASSERT(state->extravalid);
259 			bp = state->extrablk.bp;
260 		}
261 		node = bp->data;
262 		node->hdr.info.back = cpu_to_be32(oldblk->blkno);
263 		xfs_da_log_buf(state->args->trans, bp,
264 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
265 		    sizeof(node->hdr.info)));
266 	}
267 	node = oldblk->bp->data;
268 	if (node->hdr.info.back) {
269 		if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
270 			bp = addblk->bp;
271 		} else {
272 			ASSERT(state->extravalid);
273 			bp = state->extrablk.bp;
274 		}
275 		node = bp->data;
276 		node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
277 		xfs_da_log_buf(state->args->trans, bp,
278 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
279 		    sizeof(node->hdr.info)));
280 	}
281 	xfs_da_buf_done(oldblk->bp);
282 	xfs_da_buf_done(addblk->bp);
283 	addblk->bp = NULL;
284 	return(0);
285 }
286 
287 /*
288  * Split the root.  We have to create a new root and point to the two
289  * parts (the split old root) that we just created.  Copy block zero to
290  * the EOF, extending the inode in process.
291  */
292 STATIC int						/* error */
xfs_da_root_split(xfs_da_state_t * state,xfs_da_state_blk_t * blk1,xfs_da_state_blk_t * blk2)293 xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
294 				 xfs_da_state_blk_t *blk2)
295 {
296 	xfs_da_intnode_t *node, *oldroot;
297 	xfs_da_args_t *args;
298 	xfs_dablk_t blkno;
299 	xfs_dabuf_t *bp;
300 	int error, size;
301 	xfs_inode_t *dp;
302 	xfs_trans_t *tp;
303 	xfs_mount_t *mp;
304 	xfs_dir2_leaf_t *leaf;
305 
306 	/*
307 	 * Copy the existing (incorrect) block from the root node position
308 	 * to a free space somewhere.
309 	 */
310 	args = state->args;
311 	ASSERT(args != NULL);
312 	error = xfs_da_grow_inode(args, &blkno);
313 	if (error)
314 		return(error);
315 	dp = args->dp;
316 	tp = args->trans;
317 	mp = state->mp;
318 	error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
319 	if (error)
320 		return(error);
321 	ASSERT(bp != NULL);
322 	node = bp->data;
323 	oldroot = blk1->bp->data;
324 	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) {
325 		size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
326 			     (char *)oldroot);
327 	} else {
328 		ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
329 		leaf = (xfs_dir2_leaf_t *)oldroot;
330 		size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
331 			     (char *)leaf);
332 	}
333 	memcpy(node, oldroot, size);
334 	xfs_da_log_buf(tp, bp, 0, size - 1);
335 	xfs_da_buf_done(blk1->bp);
336 	blk1->bp = bp;
337 	blk1->blkno = blkno;
338 
339 	/*
340 	 * Set up the new root node.
341 	 */
342 	error = xfs_da_node_create(args,
343 		(args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0,
344 		be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork);
345 	if (error)
346 		return(error);
347 	node = bp->data;
348 	node->btree[0].hashval = cpu_to_be32(blk1->hashval);
349 	node->btree[0].before = cpu_to_be32(blk1->blkno);
350 	node->btree[1].hashval = cpu_to_be32(blk2->hashval);
351 	node->btree[1].before = cpu_to_be32(blk2->blkno);
352 	node->hdr.count = cpu_to_be16(2);
353 
354 #ifdef DEBUG
355 	if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) {
356 		ASSERT(blk1->blkno >= mp->m_dirleafblk &&
357 		       blk1->blkno < mp->m_dirfreeblk);
358 		ASSERT(blk2->blkno >= mp->m_dirleafblk &&
359 		       blk2->blkno < mp->m_dirfreeblk);
360 	}
361 #endif
362 
363 	/* Header is already logged by xfs_da_node_create */
364 	xfs_da_log_buf(tp, bp,
365 		XFS_DA_LOGRANGE(node, node->btree,
366 			sizeof(xfs_da_node_entry_t) * 2));
367 	xfs_da_buf_done(bp);
368 
369 	return(0);
370 }
371 
372 /*
373  * Split the node, rebalance, then add the new entry.
374  */
375 STATIC int						/* error */
xfs_da_node_split(xfs_da_state_t * state,xfs_da_state_blk_t * oldblk,xfs_da_state_blk_t * newblk,xfs_da_state_blk_t * addblk,int treelevel,int * result)376 xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
377 				 xfs_da_state_blk_t *newblk,
378 				 xfs_da_state_blk_t *addblk,
379 				 int treelevel, int *result)
380 {
381 	xfs_da_intnode_t *node;
382 	xfs_dablk_t blkno;
383 	int newcount, error;
384 	int useextra;
385 
386 	node = oldblk->bp->data;
387 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
388 
389 	/*
390 	 * With V2 dirs the extra block is data or freespace.
391 	 */
392 	useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
393 	newcount = 1 + useextra;
394 	/*
395 	 * Do we have to split the node?
396 	 */
397 	if ((be16_to_cpu(node->hdr.count) + newcount) > state->node_ents) {
398 		/*
399 		 * Allocate a new node, add to the doubly linked chain of
400 		 * nodes, then move some of our excess entries into it.
401 		 */
402 		error = xfs_da_grow_inode(state->args, &blkno);
403 		if (error)
404 			return(error);	/* GROT: dir is inconsistent */
405 
406 		error = xfs_da_node_create(state->args, blkno, treelevel,
407 					   &newblk->bp, state->args->whichfork);
408 		if (error)
409 			return(error);	/* GROT: dir is inconsistent */
410 		newblk->blkno = blkno;
411 		newblk->magic = XFS_DA_NODE_MAGIC;
412 		xfs_da_node_rebalance(state, oldblk, newblk);
413 		error = xfs_da_blk_link(state, oldblk, newblk);
414 		if (error)
415 			return(error);
416 		*result = 1;
417 	} else {
418 		*result = 0;
419 	}
420 
421 	/*
422 	 * Insert the new entry(s) into the correct block
423 	 * (updating last hashval in the process).
424 	 *
425 	 * xfs_da_node_add() inserts BEFORE the given index,
426 	 * and as a result of using node_lookup_int() we always
427 	 * point to a valid entry (not after one), but a split
428 	 * operation always results in a new block whose hashvals
429 	 * FOLLOW the current block.
430 	 *
431 	 * If we had double-split op below us, then add the extra block too.
432 	 */
433 	node = oldblk->bp->data;
434 	if (oldblk->index <= be16_to_cpu(node->hdr.count)) {
435 		oldblk->index++;
436 		xfs_da_node_add(state, oldblk, addblk);
437 		if (useextra) {
438 			if (state->extraafter)
439 				oldblk->index++;
440 			xfs_da_node_add(state, oldblk, &state->extrablk);
441 			state->extravalid = 0;
442 		}
443 	} else {
444 		newblk->index++;
445 		xfs_da_node_add(state, newblk, addblk);
446 		if (useextra) {
447 			if (state->extraafter)
448 				newblk->index++;
449 			xfs_da_node_add(state, newblk, &state->extrablk);
450 			state->extravalid = 0;
451 		}
452 	}
453 
454 	return(0);
455 }
456 
457 /*
458  * Balance the btree elements between two intermediate nodes,
459  * usually one full and one empty.
460  *
461  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
462  */
463 STATIC void
xfs_da_node_rebalance(xfs_da_state_t * state,xfs_da_state_blk_t * blk1,xfs_da_state_blk_t * blk2)464 xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
465 				     xfs_da_state_blk_t *blk2)
466 {
467 	xfs_da_intnode_t *node1, *node2, *tmpnode;
468 	xfs_da_node_entry_t *btree_s, *btree_d;
469 	int count, tmp;
470 	xfs_trans_t *tp;
471 
472 	node1 = blk1->bp->data;
473 	node2 = blk2->bp->data;
474 	/*
475 	 * Figure out how many entries need to move, and in which direction.
476 	 * Swap the nodes around if that makes it simpler.
477 	 */
478 	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
479 	    ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) ||
480 	     (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
481 	      be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
482 		tmpnode = node1;
483 		node1 = node2;
484 		node2 = tmpnode;
485 	}
486 	ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC);
487 	ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC);
488 	count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
489 	if (count == 0)
490 		return;
491 	tp = state->args->trans;
492 	/*
493 	 * Two cases: high-to-low and low-to-high.
494 	 */
495 	if (count > 0) {
496 		/*
497 		 * Move elements in node2 up to make a hole.
498 		 */
499 		if ((tmp = be16_to_cpu(node2->hdr.count)) > 0) {
500 			tmp *= (uint)sizeof(xfs_da_node_entry_t);
501 			btree_s = &node2->btree[0];
502 			btree_d = &node2->btree[count];
503 			memmove(btree_d, btree_s, tmp);
504 		}
505 
506 		/*
507 		 * Move the req'd B-tree elements from high in node1 to
508 		 * low in node2.
509 		 */
510 		be16_add_cpu(&node2->hdr.count, count);
511 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
512 		btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count];
513 		btree_d = &node2->btree[0];
514 		memcpy(btree_d, btree_s, tmp);
515 		be16_add_cpu(&node1->hdr.count, -count);
516 	} else {
517 		/*
518 		 * Move the req'd B-tree elements from low in node2 to
519 		 * high in node1.
520 		 */
521 		count = -count;
522 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
523 		btree_s = &node2->btree[0];
524 		btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)];
525 		memcpy(btree_d, btree_s, tmp);
526 		be16_add_cpu(&node1->hdr.count, count);
527 		xfs_da_log_buf(tp, blk1->bp,
528 			XFS_DA_LOGRANGE(node1, btree_d, tmp));
529 
530 		/*
531 		 * Move elements in node2 down to fill the hole.
532 		 */
533 		tmp  = be16_to_cpu(node2->hdr.count) - count;
534 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
535 		btree_s = &node2->btree[count];
536 		btree_d = &node2->btree[0];
537 		memmove(btree_d, btree_s, tmp);
538 		be16_add_cpu(&node2->hdr.count, -count);
539 	}
540 
541 	/*
542 	 * Log header of node 1 and all current bits of node 2.
543 	 */
544 	xfs_da_log_buf(tp, blk1->bp,
545 		XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
546 	xfs_da_log_buf(tp, blk2->bp,
547 		XFS_DA_LOGRANGE(node2, &node2->hdr,
548 			sizeof(node2->hdr) +
549 			sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count)));
550 
551 	/*
552 	 * Record the last hashval from each block for upward propagation.
553 	 * (note: don't use the swapped node pointers)
554 	 */
555 	node1 = blk1->bp->data;
556 	node2 = blk2->bp->data;
557 	blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval);
558 	blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval);
559 
560 	/*
561 	 * Adjust the expected index for insertion.
562 	 */
563 	if (blk1->index >= be16_to_cpu(node1->hdr.count)) {
564 		blk2->index = blk1->index - be16_to_cpu(node1->hdr.count);
565 		blk1->index = be16_to_cpu(node1->hdr.count) + 1;	/* make it invalid */
566 	}
567 }
568 
569 /*
570  * Add a new entry to an intermediate node.
571  */
572 STATIC void
xfs_da_node_add(xfs_da_state_t * state,xfs_da_state_blk_t * oldblk,xfs_da_state_blk_t * newblk)573 xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
574 			       xfs_da_state_blk_t *newblk)
575 {
576 	xfs_da_intnode_t *node;
577 	xfs_da_node_entry_t *btree;
578 	int tmp;
579 
580 	node = oldblk->bp->data;
581 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
582 	ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
583 	ASSERT(newblk->blkno != 0);
584 	if (state->args->whichfork == XFS_DATA_FORK)
585 		ASSERT(newblk->blkno >= state->mp->m_dirleafblk &&
586 		       newblk->blkno < state->mp->m_dirfreeblk);
587 
588 	/*
589 	 * We may need to make some room before we insert the new node.
590 	 */
591 	tmp = 0;
592 	btree = &node->btree[ oldblk->index ];
593 	if (oldblk->index < be16_to_cpu(node->hdr.count)) {
594 		tmp = (be16_to_cpu(node->hdr.count) - oldblk->index) * (uint)sizeof(*btree);
595 		memmove(btree + 1, btree, tmp);
596 	}
597 	btree->hashval = cpu_to_be32(newblk->hashval);
598 	btree->before = cpu_to_be32(newblk->blkno);
599 	xfs_da_log_buf(state->args->trans, oldblk->bp,
600 		XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
601 	be16_add_cpu(&node->hdr.count, 1);
602 	xfs_da_log_buf(state->args->trans, oldblk->bp,
603 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
604 
605 	/*
606 	 * Copy the last hash value from the oldblk to propagate upwards.
607 	 */
608 	oldblk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1 ].hashval);
609 }
610 
611 /*========================================================================
612  * Routines used for shrinking the Btree.
613  *========================================================================*/
614 
615 /*
616  * Deallocate an empty leaf node, remove it from its parent,
617  * possibly deallocating that block, etc...
618  */
619 int
xfs_da_join(xfs_da_state_t * state)620 xfs_da_join(xfs_da_state_t *state)
621 {
622 	xfs_da_state_blk_t *drop_blk, *save_blk;
623 	int action, error;
624 
625 	action = 0;
626 	drop_blk = &state->path.blk[ state->path.active-1 ];
627 	save_blk = &state->altpath.blk[ state->path.active-1 ];
628 	ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
629 	ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
630 	       drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
631 
632 	/*
633 	 * Walk back up the tree joining/deallocating as necessary.
634 	 * When we stop dropping blocks, break out.
635 	 */
636 	for (  ; state->path.active >= 2; drop_blk--, save_blk--,
637 		 state->path.active--) {
638 		/*
639 		 * See if we can combine the block with a neighbor.
640 		 *   (action == 0) => no options, just leave
641 		 *   (action == 1) => coalesce, then unlink
642 		 *   (action == 2) => block empty, unlink it
643 		 */
644 		switch (drop_blk->magic) {
645 		case XFS_ATTR_LEAF_MAGIC:
646 			error = xfs_attr_leaf_toosmall(state, &action);
647 			if (error)
648 				return(error);
649 			if (action == 0)
650 				return(0);
651 			xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
652 			break;
653 		case XFS_DIR2_LEAFN_MAGIC:
654 			error = xfs_dir2_leafn_toosmall(state, &action);
655 			if (error)
656 				return error;
657 			if (action == 0)
658 				return 0;
659 			xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
660 			break;
661 		case XFS_DA_NODE_MAGIC:
662 			/*
663 			 * Remove the offending node, fixup hashvals,
664 			 * check for a toosmall neighbor.
665 			 */
666 			xfs_da_node_remove(state, drop_blk);
667 			xfs_da_fixhashpath(state, &state->path);
668 			error = xfs_da_node_toosmall(state, &action);
669 			if (error)
670 				return(error);
671 			if (action == 0)
672 				return 0;
673 			xfs_da_node_unbalance(state, drop_blk, save_blk);
674 			break;
675 		}
676 		xfs_da_fixhashpath(state, &state->altpath);
677 		error = xfs_da_blk_unlink(state, drop_blk, save_blk);
678 		xfs_da_state_kill_altpath(state);
679 		if (error)
680 			return(error);
681 		error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
682 							 drop_blk->bp);
683 		drop_blk->bp = NULL;
684 		if (error)
685 			return(error);
686 	}
687 	/*
688 	 * We joined all the way to the top.  If it turns out that
689 	 * we only have one entry in the root, make the child block
690 	 * the new root.
691 	 */
692 	xfs_da_node_remove(state, drop_blk);
693 	xfs_da_fixhashpath(state, &state->path);
694 	error = xfs_da_root_join(state, &state->path.blk[0]);
695 	return(error);
696 }
697 
698 /*
699  * We have only one entry in the root.  Copy the only remaining child of
700  * the old root to block 0 as the new root node.
701  */
702 STATIC int
xfs_da_root_join(xfs_da_state_t * state,xfs_da_state_blk_t * root_blk)703 xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
704 {
705 	xfs_da_intnode_t *oldroot;
706 	/* REFERENCED */
707 	xfs_da_blkinfo_t *blkinfo;
708 	xfs_da_args_t *args;
709 	xfs_dablk_t child;
710 	xfs_dabuf_t *bp;
711 	int error;
712 
713 	args = state->args;
714 	ASSERT(args != NULL);
715 	ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
716 	oldroot = root_blk->bp->data;
717 	ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC);
718 	ASSERT(!oldroot->hdr.info.forw);
719 	ASSERT(!oldroot->hdr.info.back);
720 
721 	/*
722 	 * If the root has more than one child, then don't do anything.
723 	 */
724 	if (be16_to_cpu(oldroot->hdr.count) > 1)
725 		return(0);
726 
727 	/*
728 	 * Read in the (only) child block, then copy those bytes into
729 	 * the root block's buffer and free the original child block.
730 	 */
731 	child = be32_to_cpu(oldroot->btree[0].before);
732 	ASSERT(child != 0);
733 	error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
734 					     args->whichfork);
735 	if (error)
736 		return(error);
737 	ASSERT(bp != NULL);
738 	blkinfo = bp->data;
739 	if (be16_to_cpu(oldroot->hdr.level) == 1) {
740 		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC ||
741 		       be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
742 	} else {
743 		ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC);
744 	}
745 	ASSERT(!blkinfo->forw);
746 	ASSERT(!blkinfo->back);
747 	memcpy(root_blk->bp->data, bp->data, state->blocksize);
748 	xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
749 	error = xfs_da_shrink_inode(args, child, bp);
750 	return(error);
751 }
752 
753 /*
754  * Check a node block and its neighbors to see if the block should be
755  * collapsed into one or the other neighbor.  Always keep the block
756  * with the smaller block number.
757  * If the current block is over 50% full, don't try to join it, return 0.
758  * If the block is empty, fill in the state structure and return 2.
759  * If it can be collapsed, fill in the state structure and return 1.
760  * If nothing can be done, return 0.
761  */
762 STATIC int
xfs_da_node_toosmall(xfs_da_state_t * state,int * action)763 xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
764 {
765 	xfs_da_intnode_t *node;
766 	xfs_da_state_blk_t *blk;
767 	xfs_da_blkinfo_t *info;
768 	int count, forward, error, retval, i;
769 	xfs_dablk_t blkno;
770 	xfs_dabuf_t *bp;
771 
772 	/*
773 	 * Check for the degenerate case of the block being over 50% full.
774 	 * If so, it's not worth even looking to see if we might be able
775 	 * to coalesce with a sibling.
776 	 */
777 	blk = &state->path.blk[ state->path.active-1 ];
778 	info = blk->bp->data;
779 	ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC);
780 	node = (xfs_da_intnode_t *)info;
781 	count = be16_to_cpu(node->hdr.count);
782 	if (count > (state->node_ents >> 1)) {
783 		*action = 0;	/* blk over 50%, don't try to join */
784 		return(0);	/* blk over 50%, don't try to join */
785 	}
786 
787 	/*
788 	 * Check for the degenerate case of the block being empty.
789 	 * If the block is empty, we'll simply delete it, no need to
790 	 * coalesce it with a sibling block.  We choose (arbitrarily)
791 	 * to merge with the forward block unless it is NULL.
792 	 */
793 	if (count == 0) {
794 		/*
795 		 * Make altpath point to the block we want to keep and
796 		 * path point to the block we want to drop (this one).
797 		 */
798 		forward = (info->forw != 0);
799 		memcpy(&state->altpath, &state->path, sizeof(state->path));
800 		error = xfs_da_path_shift(state, &state->altpath, forward,
801 						 0, &retval);
802 		if (error)
803 			return(error);
804 		if (retval) {
805 			*action = 0;
806 		} else {
807 			*action = 2;
808 		}
809 		return(0);
810 	}
811 
812 	/*
813 	 * Examine each sibling block to see if we can coalesce with
814 	 * at least 25% free space to spare.  We need to figure out
815 	 * whether to merge with the forward or the backward block.
816 	 * We prefer coalescing with the lower numbered sibling so as
817 	 * to shrink a directory over time.
818 	 */
819 	/* start with smaller blk num */
820 	forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));
821 	for (i = 0; i < 2; forward = !forward, i++) {
822 		if (forward)
823 			blkno = be32_to_cpu(info->forw);
824 		else
825 			blkno = be32_to_cpu(info->back);
826 		if (blkno == 0)
827 			continue;
828 		error = xfs_da_read_buf(state->args->trans, state->args->dp,
829 					blkno, -1, &bp, state->args->whichfork);
830 		if (error)
831 			return(error);
832 		ASSERT(bp != NULL);
833 
834 		node = (xfs_da_intnode_t *)info;
835 		count  = state->node_ents;
836 		count -= state->node_ents >> 2;
837 		count -= be16_to_cpu(node->hdr.count);
838 		node = bp->data;
839 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
840 		count -= be16_to_cpu(node->hdr.count);
841 		xfs_da_brelse(state->args->trans, bp);
842 		if (count >= 0)
843 			break;	/* fits with at least 25% to spare */
844 	}
845 	if (i >= 2) {
846 		*action = 0;
847 		return(0);
848 	}
849 
850 	/*
851 	 * Make altpath point to the block we want to keep (the lower
852 	 * numbered block) and path point to the block we want to drop.
853 	 */
854 	memcpy(&state->altpath, &state->path, sizeof(state->path));
855 	if (blkno < blk->blkno) {
856 		error = xfs_da_path_shift(state, &state->altpath, forward,
857 						 0, &retval);
858 		if (error) {
859 			return(error);
860 		}
861 		if (retval) {
862 			*action = 0;
863 			return(0);
864 		}
865 	} else {
866 		error = xfs_da_path_shift(state, &state->path, forward,
867 						 0, &retval);
868 		if (error) {
869 			return(error);
870 		}
871 		if (retval) {
872 			*action = 0;
873 			return(0);
874 		}
875 	}
876 	*action = 1;
877 	return(0);
878 }
879 
880 /*
881  * Walk back up the tree adjusting hash values as necessary,
882  * when we stop making changes, return.
883  */
884 void
xfs_da_fixhashpath(xfs_da_state_t * state,xfs_da_state_path_t * path)885 xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
886 {
887 	xfs_da_state_blk_t *blk;
888 	xfs_da_intnode_t *node;
889 	xfs_da_node_entry_t *btree;
890 	xfs_dahash_t lasthash=0;
891 	int level, count;
892 
893 	level = path->active-1;
894 	blk = &path->blk[ level ];
895 	switch (blk->magic) {
896 	case XFS_ATTR_LEAF_MAGIC:
897 		lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
898 		if (count == 0)
899 			return;
900 		break;
901 	case XFS_DIR2_LEAFN_MAGIC:
902 		lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
903 		if (count == 0)
904 			return;
905 		break;
906 	case XFS_DA_NODE_MAGIC:
907 		lasthash = xfs_da_node_lasthash(blk->bp, &count);
908 		if (count == 0)
909 			return;
910 		break;
911 	}
912 	for (blk--, level--; level >= 0; blk--, level--) {
913 		node = blk->bp->data;
914 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
915 		btree = &node->btree[ blk->index ];
916 		if (be32_to_cpu(btree->hashval) == lasthash)
917 			break;
918 		blk->hashval = lasthash;
919 		btree->hashval = cpu_to_be32(lasthash);
920 		xfs_da_log_buf(state->args->trans, blk->bp,
921 				  XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
922 
923 		lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
924 	}
925 }
926 
927 /*
928  * Remove an entry from an intermediate node.
929  */
930 STATIC void
xfs_da_node_remove(xfs_da_state_t * state,xfs_da_state_blk_t * drop_blk)931 xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
932 {
933 	xfs_da_intnode_t *node;
934 	xfs_da_node_entry_t *btree;
935 	int tmp;
936 
937 	node = drop_blk->bp->data;
938 	ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
939 	ASSERT(drop_blk->index >= 0);
940 
941 	/*
942 	 * Copy over the offending entry, or just zero it out.
943 	 */
944 	btree = &node->btree[drop_blk->index];
945 	if (drop_blk->index < (be16_to_cpu(node->hdr.count)-1)) {
946 		tmp  = be16_to_cpu(node->hdr.count) - drop_blk->index - 1;
947 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
948 		memmove(btree, btree + 1, tmp);
949 		xfs_da_log_buf(state->args->trans, drop_blk->bp,
950 		    XFS_DA_LOGRANGE(node, btree, tmp));
951 		btree = &node->btree[be16_to_cpu(node->hdr.count)-1];
952 	}
953 	memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
954 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
955 	    XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
956 	be16_add_cpu(&node->hdr.count, -1);
957 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
958 	    XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
959 
960 	/*
961 	 * Copy the last hash value from the block to propagate upwards.
962 	 */
963 	btree--;
964 	drop_blk->hashval = be32_to_cpu(btree->hashval);
965 }
966 
967 /*
968  * Unbalance the btree elements between two intermediate nodes,
969  * move all Btree elements from one node into another.
970  */
971 STATIC void
xfs_da_node_unbalance(xfs_da_state_t * state,xfs_da_state_blk_t * drop_blk,xfs_da_state_blk_t * save_blk)972 xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
973 				     xfs_da_state_blk_t *save_blk)
974 {
975 	xfs_da_intnode_t *drop_node, *save_node;
976 	xfs_da_node_entry_t *btree;
977 	int tmp;
978 	xfs_trans_t *tp;
979 
980 	drop_node = drop_blk->bp->data;
981 	save_node = save_blk->bp->data;
982 	ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
983 	ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
984 	tp = state->args->trans;
985 
986 	/*
987 	 * If the dying block has lower hashvals, then move all the
988 	 * elements in the remaining block up to make a hole.
989 	 */
990 	if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) ||
991 	    (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) <
992 	     be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval)))
993 	{
994 		btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)];
995 		tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
996 		memmove(btree, &save_node->btree[0], tmp);
997 		btree = &save_node->btree[0];
998 		xfs_da_log_buf(tp, save_blk->bp,
999 			XFS_DA_LOGRANGE(save_node, btree,
1000 				(be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) *
1001 				sizeof(xfs_da_node_entry_t)));
1002 	} else {
1003 		btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)];
1004 		xfs_da_log_buf(tp, save_blk->bp,
1005 			XFS_DA_LOGRANGE(save_node, btree,
1006 				be16_to_cpu(drop_node->hdr.count) *
1007 				sizeof(xfs_da_node_entry_t)));
1008 	}
1009 
1010 	/*
1011 	 * Move all the B-tree elements from drop_blk to save_blk.
1012 	 */
1013 	tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
1014 	memcpy(btree, &drop_node->btree[0], tmp);
1015 	be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
1016 
1017 	xfs_da_log_buf(tp, save_blk->bp,
1018 		XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1019 			sizeof(save_node->hdr)));
1020 
1021 	/*
1022 	 * Save the last hashval in the remaining block for upward propagation.
1023 	 */
1024 	save_blk->hashval = be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval);
1025 }
1026 
1027 /*========================================================================
1028  * Routines used for finding things in the Btree.
1029  *========================================================================*/
1030 
1031 /*
1032  * Walk down the Btree looking for a particular filename, filling
1033  * in the state structure as we go.
1034  *
1035  * We will set the state structure to point to each of the elements
1036  * in each of the nodes where either the hashval is or should be.
1037  *
1038  * We support duplicate hashval's so for each entry in the current
1039  * node that could contain the desired hashval, descend.  This is a
1040  * pruned depth-first tree search.
1041  */
1042 int							/* error */
xfs_da_node_lookup_int(xfs_da_state_t * state,int * result)1043 xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1044 {
1045 	xfs_da_state_blk_t *blk;
1046 	xfs_da_blkinfo_t *curr;
1047 	xfs_da_intnode_t *node;
1048 	xfs_da_node_entry_t *btree;
1049 	xfs_dablk_t blkno;
1050 	int probe, span, max, error, retval;
1051 	xfs_dahash_t hashval, btreehashval;
1052 	xfs_da_args_t *args;
1053 
1054 	args = state->args;
1055 
1056 	/*
1057 	 * Descend thru the B-tree searching each level for the right
1058 	 * node to use, until the right hashval is found.
1059 	 */
1060 	blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0;
1061 	for (blk = &state->path.blk[0], state->path.active = 1;
1062 			 state->path.active <= XFS_DA_NODE_MAXDEPTH;
1063 			 blk++, state->path.active++) {
1064 		/*
1065 		 * Read the next node down in the tree.
1066 		 */
1067 		blk->blkno = blkno;
1068 		error = xfs_da_read_buf(args->trans, args->dp, blkno,
1069 					-1, &blk->bp, args->whichfork);
1070 		if (error) {
1071 			blk->blkno = 0;
1072 			state->path.active--;
1073 			return(error);
1074 		}
1075 		curr = blk->bp->data;
1076 		blk->magic = be16_to_cpu(curr->magic);
1077 		ASSERT(blk->magic == XFS_DA_NODE_MAGIC ||
1078 		       blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1079 		       blk->magic == XFS_ATTR_LEAF_MAGIC);
1080 
1081 		/*
1082 		 * Search an intermediate node for a match.
1083 		 */
1084 		if (blk->magic == XFS_DA_NODE_MAGIC) {
1085 			node = blk->bp->data;
1086 			max = be16_to_cpu(node->hdr.count);
1087 			blk->hashval = be32_to_cpu(node->btree[max-1].hashval);
1088 
1089 			/*
1090 			 * Binary search.  (note: small blocks will skip loop)
1091 			 */
1092 			probe = span = max / 2;
1093 			hashval = args->hashval;
1094 			for (btree = &node->btree[probe]; span > 4;
1095 				   btree = &node->btree[probe]) {
1096 				span /= 2;
1097 				btreehashval = be32_to_cpu(btree->hashval);
1098 				if (btreehashval < hashval)
1099 					probe += span;
1100 				else if (btreehashval > hashval)
1101 					probe -= span;
1102 				else
1103 					break;
1104 			}
1105 			ASSERT((probe >= 0) && (probe < max));
1106 			ASSERT((span <= 4) || (be32_to_cpu(btree->hashval) == hashval));
1107 
1108 			/*
1109 			 * Since we may have duplicate hashval's, find the first
1110 			 * matching hashval in the node.
1111 			 */
1112 			while ((probe > 0) && (be32_to_cpu(btree->hashval) >= hashval)) {
1113 				btree--;
1114 				probe--;
1115 			}
1116 			while ((probe < max) && (be32_to_cpu(btree->hashval) < hashval)) {
1117 				btree++;
1118 				probe++;
1119 			}
1120 
1121 			/*
1122 			 * Pick the right block to descend on.
1123 			 */
1124 			if (probe == max) {
1125 				blk->index = max-1;
1126 				blkno = be32_to_cpu(node->btree[max-1].before);
1127 			} else {
1128 				blk->index = probe;
1129 				blkno = be32_to_cpu(btree->before);
1130 			}
1131 		} else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1132 			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1133 			break;
1134 		} else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1135 			blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1136 			break;
1137 		}
1138 	}
1139 
1140 	/*
1141 	 * A leaf block that ends in the hashval that we are interested in
1142 	 * (final hashval == search hashval) means that the next block may
1143 	 * contain more entries with the same hashval, shift upward to the
1144 	 * next leaf and keep searching.
1145 	 */
1146 	for (;;) {
1147 		if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1148 			retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1149 							&blk->index, state);
1150 		} else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1151 			retval = xfs_attr_leaf_lookup_int(blk->bp, args);
1152 			blk->index = args->index;
1153 			args->blkno = blk->blkno;
1154 		} else {
1155 			ASSERT(0);
1156 			return XFS_ERROR(EFSCORRUPTED);
1157 		}
1158 		if (((retval == ENOENT) || (retval == ENOATTR)) &&
1159 		    (blk->hashval == args->hashval)) {
1160 			error = xfs_da_path_shift(state, &state->path, 1, 1,
1161 							 &retval);
1162 			if (error)
1163 				return(error);
1164 			if (retval == 0) {
1165 				continue;
1166 			} else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1167 				/* path_shift() gives ENOENT */
1168 				retval = XFS_ERROR(ENOATTR);
1169 			}
1170 		}
1171 		break;
1172 	}
1173 	*result = retval;
1174 	return(0);
1175 }
1176 
1177 /*========================================================================
1178  * Utility routines.
1179  *========================================================================*/
1180 
1181 /*
1182  * Link a new block into a doubly linked list of blocks (of whatever type).
1183  */
1184 int							/* error */
xfs_da_blk_link(xfs_da_state_t * state,xfs_da_state_blk_t * old_blk,xfs_da_state_blk_t * new_blk)1185 xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1186 			       xfs_da_state_blk_t *new_blk)
1187 {
1188 	xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
1189 	xfs_da_args_t *args;
1190 	int before=0, error;
1191 	xfs_dabuf_t *bp;
1192 
1193 	/*
1194 	 * Set up environment.
1195 	 */
1196 	args = state->args;
1197 	ASSERT(args != NULL);
1198 	old_info = old_blk->bp->data;
1199 	new_info = new_blk->bp->data;
1200 	ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1201 	       old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1202 	       old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1203 	ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
1204 	ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
1205 	ASSERT(old_blk->magic == new_blk->magic);
1206 
1207 	switch (old_blk->magic) {
1208 	case XFS_ATTR_LEAF_MAGIC:
1209 		before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1210 		break;
1211 	case XFS_DIR2_LEAFN_MAGIC:
1212 		before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1213 		break;
1214 	case XFS_DA_NODE_MAGIC:
1215 		before = xfs_da_node_order(old_blk->bp, new_blk->bp);
1216 		break;
1217 	}
1218 
1219 	/*
1220 	 * Link blocks in appropriate order.
1221 	 */
1222 	if (before) {
1223 		/*
1224 		 * Link new block in before existing block.
1225 		 */
1226 		new_info->forw = cpu_to_be32(old_blk->blkno);
1227 		new_info->back = old_info->back;
1228 		if (old_info->back) {
1229 			error = xfs_da_read_buf(args->trans, args->dp,
1230 						be32_to_cpu(old_info->back),
1231 						-1, &bp, args->whichfork);
1232 			if (error)
1233 				return(error);
1234 			ASSERT(bp != NULL);
1235 			tmp_info = bp->data;
1236 			ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic));
1237 			ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1238 			tmp_info->forw = cpu_to_be32(new_blk->blkno);
1239 			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1240 			xfs_da_buf_done(bp);
1241 		}
1242 		old_info->back = cpu_to_be32(new_blk->blkno);
1243 	} else {
1244 		/*
1245 		 * Link new block in after existing block.
1246 		 */
1247 		new_info->forw = old_info->forw;
1248 		new_info->back = cpu_to_be32(old_blk->blkno);
1249 		if (old_info->forw) {
1250 			error = xfs_da_read_buf(args->trans, args->dp,
1251 						be32_to_cpu(old_info->forw),
1252 						-1, &bp, args->whichfork);
1253 			if (error)
1254 				return(error);
1255 			ASSERT(bp != NULL);
1256 			tmp_info = bp->data;
1257 			ASSERT(tmp_info->magic == old_info->magic);
1258 			ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
1259 			tmp_info->back = cpu_to_be32(new_blk->blkno);
1260 			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1261 			xfs_da_buf_done(bp);
1262 		}
1263 		old_info->forw = cpu_to_be32(new_blk->blkno);
1264 	}
1265 
1266 	xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1267 	xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1268 	return(0);
1269 }
1270 
1271 /*
1272  * Compare two intermediate nodes for "order".
1273  */
1274 STATIC int
xfs_da_node_order(xfs_dabuf_t * node1_bp,xfs_dabuf_t * node2_bp)1275 xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
1276 {
1277 	xfs_da_intnode_t *node1, *node2;
1278 
1279 	node1 = node1_bp->data;
1280 	node2 = node2_bp->data;
1281 	ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
1282 	       (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
1283 	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
1284 	    ((be32_to_cpu(node2->btree[0].hashval) <
1285 	      be32_to_cpu(node1->btree[0].hashval)) ||
1286 	     (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
1287 	      be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
1288 		return(1);
1289 	}
1290 	return(0);
1291 }
1292 
1293 /*
1294  * Pick up the last hashvalue from an intermediate node.
1295  */
1296 STATIC uint
xfs_da_node_lasthash(xfs_dabuf_t * bp,int * count)1297 xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
1298 {
1299 	xfs_da_intnode_t *node;
1300 
1301 	node = bp->data;
1302 	ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
1303 	if (count)
1304 		*count = be16_to_cpu(node->hdr.count);
1305 	if (!node->hdr.count)
1306 		return(0);
1307 	return be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1308 }
1309 
1310 /*
1311  * Unlink a block from a doubly linked list of blocks.
1312  */
1313 STATIC int						/* error */
xfs_da_blk_unlink(xfs_da_state_t * state,xfs_da_state_blk_t * drop_blk,xfs_da_state_blk_t * save_blk)1314 xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1315 				 xfs_da_state_blk_t *save_blk)
1316 {
1317 	xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
1318 	xfs_da_args_t *args;
1319 	xfs_dabuf_t *bp;
1320 	int error;
1321 
1322 	/*
1323 	 * Set up environment.
1324 	 */
1325 	args = state->args;
1326 	ASSERT(args != NULL);
1327 	save_info = save_blk->bp->data;
1328 	drop_info = drop_blk->bp->data;
1329 	ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1330 	       save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1331 	       save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1332 	ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
1333 	ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));
1334 	ASSERT(save_blk->magic == drop_blk->magic);
1335 	ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1336 	       (be32_to_cpu(save_info->back) == drop_blk->blkno));
1337 	ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
1338 	       (be32_to_cpu(drop_info->back) == save_blk->blkno));
1339 
1340 	/*
1341 	 * Unlink the leaf block from the doubly linked chain of leaves.
1342 	 */
1343 	if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
1344 		save_info->back = drop_info->back;
1345 		if (drop_info->back) {
1346 			error = xfs_da_read_buf(args->trans, args->dp,
1347 						be32_to_cpu(drop_info->back),
1348 						-1, &bp, args->whichfork);
1349 			if (error)
1350 				return(error);
1351 			ASSERT(bp != NULL);
1352 			tmp_info = bp->data;
1353 			ASSERT(tmp_info->magic == save_info->magic);
1354 			ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
1355 			tmp_info->forw = cpu_to_be32(save_blk->blkno);
1356 			xfs_da_log_buf(args->trans, bp, 0,
1357 						    sizeof(*tmp_info) - 1);
1358 			xfs_da_buf_done(bp);
1359 		}
1360 	} else {
1361 		save_info->forw = drop_info->forw;
1362 		if (drop_info->forw) {
1363 			error = xfs_da_read_buf(args->trans, args->dp,
1364 						be32_to_cpu(drop_info->forw),
1365 						-1, &bp, args->whichfork);
1366 			if (error)
1367 				return(error);
1368 			ASSERT(bp != NULL);
1369 			tmp_info = bp->data;
1370 			ASSERT(tmp_info->magic == save_info->magic);
1371 			ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
1372 			tmp_info->back = cpu_to_be32(save_blk->blkno);
1373 			xfs_da_log_buf(args->trans, bp, 0,
1374 						    sizeof(*tmp_info) - 1);
1375 			xfs_da_buf_done(bp);
1376 		}
1377 	}
1378 
1379 	xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1380 	return(0);
1381 }
1382 
1383 /*
1384  * Move a path "forward" or "!forward" one block at the current level.
1385  *
1386  * This routine will adjust a "path" to point to the next block
1387  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1388  * Btree, including updating pointers to the intermediate nodes between
1389  * the new bottom and the root.
1390  */
1391 int							/* error */
xfs_da_path_shift(xfs_da_state_t * state,xfs_da_state_path_t * path,int forward,int release,int * result)1392 xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1393 				 int forward, int release, int *result)
1394 {
1395 	xfs_da_state_blk_t *blk;
1396 	xfs_da_blkinfo_t *info;
1397 	xfs_da_intnode_t *node;
1398 	xfs_da_args_t *args;
1399 	xfs_dablk_t blkno=0;
1400 	int level, error;
1401 
1402 	/*
1403 	 * Roll up the Btree looking for the first block where our
1404 	 * current index is not at the edge of the block.  Note that
1405 	 * we skip the bottom layer because we want the sibling block.
1406 	 */
1407 	args = state->args;
1408 	ASSERT(args != NULL);
1409 	ASSERT(path != NULL);
1410 	ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1411 	level = (path->active-1) - 1;	/* skip bottom layer in path */
1412 	for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1413 		ASSERT(blk->bp != NULL);
1414 		node = blk->bp->data;
1415 		ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
1416 		if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
1417 			blk->index++;
1418 			blkno = be32_to_cpu(node->btree[blk->index].before);
1419 			break;
1420 		} else if (!forward && (blk->index > 0)) {
1421 			blk->index--;
1422 			blkno = be32_to_cpu(node->btree[blk->index].before);
1423 			break;
1424 		}
1425 	}
1426 	if (level < 0) {
1427 		*result = XFS_ERROR(ENOENT);	/* we're out of our tree */
1428 		ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1429 		return(0);
1430 	}
1431 
1432 	/*
1433 	 * Roll down the edge of the subtree until we reach the
1434 	 * same depth we were at originally.
1435 	 */
1436 	for (blk++, level++; level < path->active; blk++, level++) {
1437 		/*
1438 		 * Release the old block.
1439 		 * (if it's dirty, trans won't actually let go)
1440 		 */
1441 		if (release)
1442 			xfs_da_brelse(args->trans, blk->bp);
1443 
1444 		/*
1445 		 * Read the next child block.
1446 		 */
1447 		blk->blkno = blkno;
1448 		error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
1449 						     &blk->bp, args->whichfork);
1450 		if (error)
1451 			return(error);
1452 		ASSERT(blk->bp != NULL);
1453 		info = blk->bp->data;
1454 		ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC ||
1455 		       be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC ||
1456 		       be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
1457 		blk->magic = be16_to_cpu(info->magic);
1458 		if (blk->magic == XFS_DA_NODE_MAGIC) {
1459 			node = (xfs_da_intnode_t *)info;
1460 			blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1461 			if (forward)
1462 				blk->index = 0;
1463 			else
1464 				blk->index = be16_to_cpu(node->hdr.count)-1;
1465 			blkno = be32_to_cpu(node->btree[blk->index].before);
1466 		} else {
1467 			ASSERT(level == path->active-1);
1468 			blk->index = 0;
1469 			switch(blk->magic) {
1470 			case XFS_ATTR_LEAF_MAGIC:
1471 				blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1472 								      NULL);
1473 				break;
1474 			case XFS_DIR2_LEAFN_MAGIC:
1475 				blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1476 								       NULL);
1477 				break;
1478 			default:
1479 				ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
1480 				       blk->magic == XFS_DIR2_LEAFN_MAGIC);
1481 				break;
1482 			}
1483 		}
1484 	}
1485 	*result = 0;
1486 	return(0);
1487 }
1488 
1489 
1490 /*========================================================================
1491  * Utility routines.
1492  *========================================================================*/
1493 
1494 /*
1495  * Implement a simple hash on a character string.
1496  * Rotate the hash value by 7 bits, then XOR each character in.
1497  * This is implemented with some source-level loop unrolling.
1498  */
1499 xfs_dahash_t
xfs_da_hashname(const __uint8_t * name,int namelen)1500 xfs_da_hashname(const __uint8_t *name, int namelen)
1501 {
1502 	xfs_dahash_t hash;
1503 
1504 	/*
1505 	 * Do four characters at a time as long as we can.
1506 	 */
1507 	for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
1508 		hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
1509 		       (name[3] << 0) ^ rol32(hash, 7 * 4);
1510 
1511 	/*
1512 	 * Now do the rest of the characters.
1513 	 */
1514 	switch (namelen) {
1515 	case 3:
1516 		return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
1517 		       rol32(hash, 7 * 3);
1518 	case 2:
1519 		return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
1520 	case 1:
1521 		return (name[0] << 0) ^ rol32(hash, 7 * 1);
1522 	default: /* case 0: */
1523 		return hash;
1524 	}
1525 }
1526 
1527 enum xfs_dacmp
xfs_da_compname(struct xfs_da_args * args,const unsigned char * name,int len)1528 xfs_da_compname(
1529 	struct xfs_da_args *args,
1530 	const unsigned char *name,
1531 	int		len)
1532 {
1533 	return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
1534 					XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
1535 }
1536 
1537 static xfs_dahash_t
xfs_default_hashname(struct xfs_name * name)1538 xfs_default_hashname(
1539 	struct xfs_name	*name)
1540 {
1541 	return xfs_da_hashname(name->name, name->len);
1542 }
1543 
1544 const struct xfs_nameops xfs_default_nameops = {
1545 	.hashname	= xfs_default_hashname,
1546 	.compname	= xfs_da_compname
1547 };
1548 
1549 /*
1550  * Add a block to the btree ahead of the file.
1551  * Return the new block number to the caller.
1552  */
1553 int
xfs_da_grow_inode(xfs_da_args_t * args,xfs_dablk_t * new_blkno)1554 xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1555 {
1556 	xfs_fileoff_t bno, b;
1557 	xfs_bmbt_irec_t map;
1558 	xfs_bmbt_irec_t	*mapp;
1559 	xfs_inode_t *dp;
1560 	int nmap, error, w, count, c, got, i, mapi;
1561 	xfs_trans_t *tp;
1562 	xfs_mount_t *mp;
1563 	xfs_drfsbno_t	nblks;
1564 
1565 	dp = args->dp;
1566 	mp = dp->i_mount;
1567 	w = args->whichfork;
1568 	tp = args->trans;
1569 	nblks = dp->i_d.di_nblocks;
1570 
1571 	/*
1572 	 * For new directories adjust the file offset and block count.
1573 	 */
1574 	if (w == XFS_DATA_FORK) {
1575 		bno = mp->m_dirleafblk;
1576 		count = mp->m_dirblkfsbs;
1577 	} else {
1578 		bno = 0;
1579 		count = 1;
1580 	}
1581 	/*
1582 	 * Find a spot in the file space to put the new block.
1583 	 */
1584 	if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w)))
1585 		return error;
1586 	if (w == XFS_DATA_FORK)
1587 		ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
1588 	/*
1589 	 * Try mapping it in one filesystem block.
1590 	 */
1591 	nmap = 1;
1592 	ASSERT(args->firstblock != NULL);
1593 	if ((error = xfs_bmapi(tp, dp, bno, count,
1594 			xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
1595 			XFS_BMAPI_CONTIG,
1596 			args->firstblock, args->total, &map, &nmap,
1597 			args->flist))) {
1598 		return error;
1599 	}
1600 	ASSERT(nmap <= 1);
1601 	if (nmap == 1) {
1602 		mapp = &map;
1603 		mapi = 1;
1604 	}
1605 	/*
1606 	 * If we didn't get it and the block might work if fragmented,
1607 	 * try without the CONTIG flag.  Loop until we get it all.
1608 	 */
1609 	else if (nmap == 0 && count > 1) {
1610 		mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
1611 		for (b = bno, mapi = 0; b < bno + count; ) {
1612 			nmap = MIN(XFS_BMAP_MAX_NMAP, count);
1613 			c = (int)(bno + count - b);
1614 			if ((error = xfs_bmapi(tp, dp, b, c,
1615 					xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
1616 					XFS_BMAPI_METADATA,
1617 					args->firstblock, args->total,
1618 					&mapp[mapi], &nmap, args->flist))) {
1619 				kmem_free(mapp);
1620 				return error;
1621 			}
1622 			if (nmap < 1)
1623 				break;
1624 			mapi += nmap;
1625 			b = mapp[mapi - 1].br_startoff +
1626 			    mapp[mapi - 1].br_blockcount;
1627 		}
1628 	} else {
1629 		mapi = 0;
1630 		mapp = NULL;
1631 	}
1632 	/*
1633 	 * Count the blocks we got, make sure it matches the total.
1634 	 */
1635 	for (i = 0, got = 0; i < mapi; i++)
1636 		got += mapp[i].br_blockcount;
1637 	if (got != count || mapp[0].br_startoff != bno ||
1638 	    mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1639 	    bno + count) {
1640 		if (mapp != &map)
1641 			kmem_free(mapp);
1642 		return XFS_ERROR(ENOSPC);
1643 	}
1644 	if (mapp != &map)
1645 		kmem_free(mapp);
1646 	/* account for newly allocated blocks in reserved blocks total */
1647 	args->total -= dp->i_d.di_nblocks - nblks;
1648 	*new_blkno = (xfs_dablk_t)bno;
1649 	return 0;
1650 }
1651 
1652 /*
1653  * Ick.  We need to always be able to remove a btree block, even
1654  * if there's no space reservation because the filesystem is full.
1655  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
1656  * It swaps the target block with the last block in the file.  The
1657  * last block in the file can always be removed since it can't cause
1658  * a bmap btree split to do that.
1659  */
1660 STATIC int
xfs_da_swap_lastblock(xfs_da_args_t * args,xfs_dablk_t * dead_blknop,xfs_dabuf_t ** dead_bufp)1661 xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1662 		      xfs_dabuf_t **dead_bufp)
1663 {
1664 	xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
1665 	xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
1666 	xfs_fileoff_t lastoff;
1667 	xfs_inode_t *ip;
1668 	xfs_trans_t *tp;
1669 	xfs_mount_t *mp;
1670 	int error, w, entno, level, dead_level;
1671 	xfs_da_blkinfo_t *dead_info, *sib_info;
1672 	xfs_da_intnode_t *par_node, *dead_node;
1673 	xfs_dir2_leaf_t *dead_leaf2;
1674 	xfs_dahash_t dead_hash;
1675 
1676 	dead_buf = *dead_bufp;
1677 	dead_blkno = *dead_blknop;
1678 	tp = args->trans;
1679 	ip = args->dp;
1680 	w = args->whichfork;
1681 	ASSERT(w == XFS_DATA_FORK);
1682 	mp = ip->i_mount;
1683 	lastoff = mp->m_dirfreeblk;
1684 	error = xfs_bmap_last_before(tp, ip, &lastoff, w);
1685 	if (error)
1686 		return error;
1687 	if (unlikely(lastoff == 0)) {
1688 		XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
1689 				 mp);
1690 		return XFS_ERROR(EFSCORRUPTED);
1691 	}
1692 	/*
1693 	 * Read the last block in the btree space.
1694 	 */
1695 	last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
1696 	if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)))
1697 		return error;
1698 	/*
1699 	 * Copy the last block into the dead buffer and log it.
1700 	 */
1701 	memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
1702 	xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
1703 	dead_info = dead_buf->data;
1704 	/*
1705 	 * Get values from the moved block.
1706 	 */
1707 	if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
1708 		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
1709 		dead_level = 0;
1710 		dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
1711 	} else {
1712 		ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
1713 		dead_node = (xfs_da_intnode_t *)dead_info;
1714 		dead_level = be16_to_cpu(dead_node->hdr.level);
1715 		dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
1716 	}
1717 	sib_buf = par_buf = NULL;
1718 	/*
1719 	 * If the moved block has a left sibling, fix up the pointers.
1720 	 */
1721 	if ((sib_blkno = be32_to_cpu(dead_info->back))) {
1722 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1723 			goto done;
1724 		sib_info = sib_buf->data;
1725 		if (unlikely(
1726 		    be32_to_cpu(sib_info->forw) != last_blkno ||
1727 		    sib_info->magic != dead_info->magic)) {
1728 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
1729 					 XFS_ERRLEVEL_LOW, mp);
1730 			error = XFS_ERROR(EFSCORRUPTED);
1731 			goto done;
1732 		}
1733 		sib_info->forw = cpu_to_be32(dead_blkno);
1734 		xfs_da_log_buf(tp, sib_buf,
1735 			XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
1736 					sizeof(sib_info->forw)));
1737 		xfs_da_buf_done(sib_buf);
1738 		sib_buf = NULL;
1739 	}
1740 	/*
1741 	 * If the moved block has a right sibling, fix up the pointers.
1742 	 */
1743 	if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
1744 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1745 			goto done;
1746 		sib_info = sib_buf->data;
1747 		if (unlikely(
1748 		       be32_to_cpu(sib_info->back) != last_blkno ||
1749 		       sib_info->magic != dead_info->magic)) {
1750 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
1751 					 XFS_ERRLEVEL_LOW, mp);
1752 			error = XFS_ERROR(EFSCORRUPTED);
1753 			goto done;
1754 		}
1755 		sib_info->back = cpu_to_be32(dead_blkno);
1756 		xfs_da_log_buf(tp, sib_buf,
1757 			XFS_DA_LOGRANGE(sib_info, &sib_info->back,
1758 					sizeof(sib_info->back)));
1759 		xfs_da_buf_done(sib_buf);
1760 		sib_buf = NULL;
1761 	}
1762 	par_blkno = mp->m_dirleafblk;
1763 	level = -1;
1764 	/*
1765 	 * Walk down the tree looking for the parent of the moved block.
1766 	 */
1767 	for (;;) {
1768 		if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1769 			goto done;
1770 		par_node = par_buf->data;
1771 		if (unlikely(
1772 		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC ||
1773 		    (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
1774 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
1775 					 XFS_ERRLEVEL_LOW, mp);
1776 			error = XFS_ERROR(EFSCORRUPTED);
1777 			goto done;
1778 		}
1779 		level = be16_to_cpu(par_node->hdr.level);
1780 		for (entno = 0;
1781 		     entno < be16_to_cpu(par_node->hdr.count) &&
1782 		     be32_to_cpu(par_node->btree[entno].hashval) < dead_hash;
1783 		     entno++)
1784 			continue;
1785 		if (unlikely(entno == be16_to_cpu(par_node->hdr.count))) {
1786 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
1787 					 XFS_ERRLEVEL_LOW, mp);
1788 			error = XFS_ERROR(EFSCORRUPTED);
1789 			goto done;
1790 		}
1791 		par_blkno = be32_to_cpu(par_node->btree[entno].before);
1792 		if (level == dead_level + 1)
1793 			break;
1794 		xfs_da_brelse(tp, par_buf);
1795 		par_buf = NULL;
1796 	}
1797 	/*
1798 	 * We're in the right parent block.
1799 	 * Look for the right entry.
1800 	 */
1801 	for (;;) {
1802 		for (;
1803 		     entno < be16_to_cpu(par_node->hdr.count) &&
1804 		     be32_to_cpu(par_node->btree[entno].before) != last_blkno;
1805 		     entno++)
1806 			continue;
1807 		if (entno < be16_to_cpu(par_node->hdr.count))
1808 			break;
1809 		par_blkno = be32_to_cpu(par_node->hdr.info.forw);
1810 		xfs_da_brelse(tp, par_buf);
1811 		par_buf = NULL;
1812 		if (unlikely(par_blkno == 0)) {
1813 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
1814 					 XFS_ERRLEVEL_LOW, mp);
1815 			error = XFS_ERROR(EFSCORRUPTED);
1816 			goto done;
1817 		}
1818 		if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1819 			goto done;
1820 		par_node = par_buf->data;
1821 		if (unlikely(
1822 		    be16_to_cpu(par_node->hdr.level) != level ||
1823 		    be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) {
1824 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
1825 					 XFS_ERRLEVEL_LOW, mp);
1826 			error = XFS_ERROR(EFSCORRUPTED);
1827 			goto done;
1828 		}
1829 		entno = 0;
1830 	}
1831 	/*
1832 	 * Update the parent entry pointing to the moved block.
1833 	 */
1834 	par_node->btree[entno].before = cpu_to_be32(dead_blkno);
1835 	xfs_da_log_buf(tp, par_buf,
1836 		XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
1837 				sizeof(par_node->btree[entno].before)));
1838 	xfs_da_buf_done(par_buf);
1839 	xfs_da_buf_done(dead_buf);
1840 	*dead_blknop = last_blkno;
1841 	*dead_bufp = last_buf;
1842 	return 0;
1843 done:
1844 	if (par_buf)
1845 		xfs_da_brelse(tp, par_buf);
1846 	if (sib_buf)
1847 		xfs_da_brelse(tp, sib_buf);
1848 	xfs_da_brelse(tp, last_buf);
1849 	return error;
1850 }
1851 
1852 /*
1853  * Remove a btree block from a directory or attribute.
1854  */
1855 int
xfs_da_shrink_inode(xfs_da_args_t * args,xfs_dablk_t dead_blkno,xfs_dabuf_t * dead_buf)1856 xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1857 		    xfs_dabuf_t *dead_buf)
1858 {
1859 	xfs_inode_t *dp;
1860 	int done, error, w, count;
1861 	xfs_trans_t *tp;
1862 	xfs_mount_t *mp;
1863 
1864 	dp = args->dp;
1865 	w = args->whichfork;
1866 	tp = args->trans;
1867 	mp = dp->i_mount;
1868 	if (w == XFS_DATA_FORK)
1869 		count = mp->m_dirblkfsbs;
1870 	else
1871 		count = 1;
1872 	for (;;) {
1873 		/*
1874 		 * Remove extents.  If we get ENOSPC for a dir we have to move
1875 		 * the last block to the place we want to kill.
1876 		 */
1877 		if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
1878 				xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
1879 				0, args->firstblock, args->flist,
1880 				&done)) == ENOSPC) {
1881 			if (w != XFS_DATA_FORK)
1882 				break;
1883 			if ((error = xfs_da_swap_lastblock(args, &dead_blkno,
1884 					&dead_buf)))
1885 				break;
1886 		} else {
1887 			break;
1888 		}
1889 	}
1890 	xfs_da_binval(tp, dead_buf);
1891 	return error;
1892 }
1893 
1894 /*
1895  * See if the mapping(s) for this btree block are valid, i.e.
1896  * don't contain holes, are logically contiguous, and cover the whole range.
1897  */
1898 STATIC int
xfs_da_map_covers_blocks(int nmap,xfs_bmbt_irec_t * mapp,xfs_dablk_t bno,int count)1899 xfs_da_map_covers_blocks(
1900 	int		nmap,
1901 	xfs_bmbt_irec_t	*mapp,
1902 	xfs_dablk_t	bno,
1903 	int		count)
1904 {
1905 	int		i;
1906 	xfs_fileoff_t	off;
1907 
1908 	for (i = 0, off = bno; i < nmap; i++) {
1909 		if (mapp[i].br_startblock == HOLESTARTBLOCK ||
1910 		    mapp[i].br_startblock == DELAYSTARTBLOCK) {
1911 			return 0;
1912 		}
1913 		if (off != mapp[i].br_startoff) {
1914 			return 0;
1915 		}
1916 		off += mapp[i].br_blockcount;
1917 	}
1918 	return off == bno + count;
1919 }
1920 
1921 /*
1922  * Make a dabuf.
1923  * Used for get_buf, read_buf, read_bufr, and reada_buf.
1924  */
1925 STATIC int
xfs_da_do_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,xfs_daddr_t * mappedbnop,xfs_dabuf_t ** bpp,int whichfork,int caller,inst_t * ra)1926 xfs_da_do_buf(
1927 	xfs_trans_t	*trans,
1928 	xfs_inode_t	*dp,
1929 	xfs_dablk_t	bno,
1930 	xfs_daddr_t	*mappedbnop,
1931 	xfs_dabuf_t	**bpp,
1932 	int		whichfork,
1933 	int		caller,
1934 	inst_t		*ra)
1935 {
1936 	xfs_buf_t	*bp = NULL;
1937 	xfs_buf_t	**bplist;
1938 	int		error=0;
1939 	int		i;
1940 	xfs_bmbt_irec_t	map;
1941 	xfs_bmbt_irec_t	*mapp;
1942 	xfs_daddr_t	mappedbno;
1943 	xfs_mount_t	*mp;
1944 	int		nbplist=0;
1945 	int		nfsb;
1946 	int		nmap;
1947 	xfs_dabuf_t	*rbp;
1948 
1949 	mp = dp->i_mount;
1950 	nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1;
1951 	mappedbno = *mappedbnop;
1952 	/*
1953 	 * Caller doesn't have a mapping.  -2 means don't complain
1954 	 * if we land in a hole.
1955 	 */
1956 	if (mappedbno == -1 || mappedbno == -2) {
1957 		/*
1958 		 * Optimize the one-block case.
1959 		 */
1960 		if (nfsb == 1) {
1961 			xfs_fsblock_t	fsb;
1962 
1963 			if ((error =
1964 			    xfs_bmapi_single(trans, dp, whichfork, &fsb,
1965 				    (xfs_fileoff_t)bno))) {
1966 				return error;
1967 			}
1968 			mapp = &map;
1969 			if (fsb == NULLFSBLOCK) {
1970 				nmap = 0;
1971 			} else {
1972 				map.br_startblock = fsb;
1973 				map.br_startoff = (xfs_fileoff_t)bno;
1974 				map.br_blockcount = 1;
1975 				nmap = 1;
1976 			}
1977 		} else {
1978 			mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
1979 			nmap = nfsb;
1980 			if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
1981 					nfsb,
1982 					XFS_BMAPI_METADATA |
1983 						xfs_bmapi_aflag(whichfork),
1984 					NULL, 0, mapp, &nmap, NULL)))
1985 				goto exit0;
1986 		}
1987 	} else {
1988 		map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
1989 		map.br_startoff = (xfs_fileoff_t)bno;
1990 		map.br_blockcount = nfsb;
1991 		mapp = &map;
1992 		nmap = 1;
1993 	}
1994 	if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
1995 		error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
1996 		if (unlikely(error == EFSCORRUPTED)) {
1997 			if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
1998 				xfs_alert(mp, "%s: bno %lld dir: inode %lld",
1999 					__func__, (long long)bno,
2000 					(long long)dp->i_ino);
2001 				for (i = 0; i < nmap; i++) {
2002 					xfs_alert(mp,
2003 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2004 						i,
2005 						(long long)mapp[i].br_startoff,
2006 						(long long)mapp[i].br_startblock,
2007 						(long long)mapp[i].br_blockcount,
2008 						mapp[i].br_state);
2009 				}
2010 			}
2011 			XFS_ERROR_REPORT("xfs_da_do_buf(1)",
2012 					 XFS_ERRLEVEL_LOW, mp);
2013 		}
2014 		goto exit0;
2015 	}
2016 	if (caller != 3 && nmap > 1) {
2017 		bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
2018 		nbplist = 0;
2019 	} else
2020 		bplist = NULL;
2021 	/*
2022 	 * Turn the mapping(s) into buffer(s).
2023 	 */
2024 	for (i = 0; i < nmap; i++) {
2025 		int	nmapped;
2026 
2027 		mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
2028 		if (i == 0)
2029 			*mappedbnop = mappedbno;
2030 		nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
2031 		switch (caller) {
2032 		case 0:
2033 			bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
2034 				mappedbno, nmapped, 0);
2035 			error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
2036 			break;
2037 		case 1:
2038 		case 2:
2039 			bp = NULL;
2040 			error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
2041 				mappedbno, nmapped, 0, &bp);
2042 			break;
2043 		case 3:
2044 			xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped);
2045 			error = 0;
2046 			bp = NULL;
2047 			break;
2048 		}
2049 		if (error) {
2050 			if (bp)
2051 				xfs_trans_brelse(trans, bp);
2052 			goto exit1;
2053 		}
2054 		if (!bp)
2055 			continue;
2056 		if (caller == 1) {
2057 			if (whichfork == XFS_ATTR_FORK) {
2058 				XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE,
2059 						XFS_ATTR_BTREE_REF);
2060 			} else {
2061 				XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
2062 						XFS_DIR_BTREE_REF);
2063 			}
2064 		}
2065 		if (bplist) {
2066 			bplist[nbplist++] = bp;
2067 		}
2068 	}
2069 	/*
2070 	 * Build a dabuf structure.
2071 	 */
2072 	if (bplist) {
2073 		rbp = xfs_da_buf_make(nbplist, bplist, ra);
2074 	} else if (bp)
2075 		rbp = xfs_da_buf_make(1, &bp, ra);
2076 	else
2077 		rbp = NULL;
2078 	/*
2079 	 * For read_buf, check the magic number.
2080 	 */
2081 	if (caller == 1) {
2082 		xfs_dir2_data_t		*data;
2083 		xfs_dir2_free_t		*free;
2084 		xfs_da_blkinfo_t	*info;
2085 		uint			magic, magic1;
2086 
2087 		info = rbp->data;
2088 		data = rbp->data;
2089 		free = rbp->data;
2090 		magic = be16_to_cpu(info->magic);
2091 		magic1 = be32_to_cpu(data->hdr.magic);
2092 		if (unlikely(
2093 		    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2094 				   (magic != XFS_ATTR_LEAF_MAGIC) &&
2095 				   (magic != XFS_DIR2_LEAF1_MAGIC) &&
2096 				   (magic != XFS_DIR2_LEAFN_MAGIC) &&
2097 				   (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
2098 				   (magic1 != XFS_DIR2_DATA_MAGIC) &&
2099 				   (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC),
2100 				mp, XFS_ERRTAG_DA_READ_BUF,
2101 				XFS_RANDOM_DA_READ_BUF))) {
2102 			trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
2103 			XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
2104 					     XFS_ERRLEVEL_LOW, mp, info);
2105 			error = XFS_ERROR(EFSCORRUPTED);
2106 			xfs_da_brelse(trans, rbp);
2107 			nbplist = 0;
2108 			goto exit1;
2109 		}
2110 	}
2111 	if (bplist) {
2112 		kmem_free(bplist);
2113 	}
2114 	if (mapp != &map) {
2115 		kmem_free(mapp);
2116 	}
2117 	if (bpp)
2118 		*bpp = rbp;
2119 	return 0;
2120 exit1:
2121 	if (bplist) {
2122 		for (i = 0; i < nbplist; i++)
2123 			xfs_trans_brelse(trans, bplist[i]);
2124 		kmem_free(bplist);
2125 	}
2126 exit0:
2127 	if (mapp != &map)
2128 		kmem_free(mapp);
2129 	if (bpp)
2130 		*bpp = NULL;
2131 	return error;
2132 }
2133 
2134 /*
2135  * Get a buffer for the dir/attr block.
2136  */
2137 int
xfs_da_get_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,xfs_daddr_t mappedbno,xfs_dabuf_t ** bpp,int whichfork)2138 xfs_da_get_buf(
2139 	xfs_trans_t	*trans,
2140 	xfs_inode_t	*dp,
2141 	xfs_dablk_t	bno,
2142 	xfs_daddr_t		mappedbno,
2143 	xfs_dabuf_t	**bpp,
2144 	int		whichfork)
2145 {
2146 	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
2147 						 (inst_t *)__return_address);
2148 }
2149 
2150 /*
2151  * Get a buffer for the dir/attr block, fill in the contents.
2152  */
2153 int
xfs_da_read_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,xfs_daddr_t mappedbno,xfs_dabuf_t ** bpp,int whichfork)2154 xfs_da_read_buf(
2155 	xfs_trans_t	*trans,
2156 	xfs_inode_t	*dp,
2157 	xfs_dablk_t	bno,
2158 	xfs_daddr_t		mappedbno,
2159 	xfs_dabuf_t	**bpp,
2160 	int		whichfork)
2161 {
2162 	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
2163 		(inst_t *)__return_address);
2164 }
2165 
2166 /*
2167  * Readahead the dir/attr block.
2168  */
2169 xfs_daddr_t
xfs_da_reada_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,int whichfork)2170 xfs_da_reada_buf(
2171 	xfs_trans_t	*trans,
2172 	xfs_inode_t	*dp,
2173 	xfs_dablk_t	bno,
2174 	int		whichfork)
2175 {
2176 	xfs_daddr_t		rval;
2177 
2178 	rval = -1;
2179 	if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3,
2180 			(inst_t *)__return_address))
2181 		return -1;
2182 	else
2183 		return rval;
2184 }
2185 
2186 kmem_zone_t *xfs_da_state_zone;	/* anchor for state struct zone */
2187 kmem_zone_t *xfs_dabuf_zone;		/* dabuf zone */
2188 
2189 /*
2190  * Allocate a dir-state structure.
2191  * We don't put them on the stack since they're large.
2192  */
2193 xfs_da_state_t *
xfs_da_state_alloc(void)2194 xfs_da_state_alloc(void)
2195 {
2196 	return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
2197 }
2198 
2199 /*
2200  * Kill the altpath contents of a da-state structure.
2201  */
2202 STATIC void
xfs_da_state_kill_altpath(xfs_da_state_t * state)2203 xfs_da_state_kill_altpath(xfs_da_state_t *state)
2204 {
2205 	int	i;
2206 
2207 	for (i = 0; i < state->altpath.active; i++) {
2208 		if (state->altpath.blk[i].bp) {
2209 			if (state->altpath.blk[i].bp != state->path.blk[i].bp)
2210 				xfs_da_buf_done(state->altpath.blk[i].bp);
2211 			state->altpath.blk[i].bp = NULL;
2212 		}
2213 	}
2214 	state->altpath.active = 0;
2215 }
2216 
2217 /*
2218  * Free a da-state structure.
2219  */
2220 void
xfs_da_state_free(xfs_da_state_t * state)2221 xfs_da_state_free(xfs_da_state_t *state)
2222 {
2223 	int	i;
2224 
2225 	xfs_da_state_kill_altpath(state);
2226 	for (i = 0; i < state->path.active; i++) {
2227 		if (state->path.blk[i].bp)
2228 			xfs_da_buf_done(state->path.blk[i].bp);
2229 	}
2230 	if (state->extravalid && state->extrablk.bp)
2231 		xfs_da_buf_done(state->extrablk.bp);
2232 #ifdef DEBUG
2233 	memset((char *)state, 0, sizeof(*state));
2234 #endif /* DEBUG */
2235 	kmem_zone_free(xfs_da_state_zone, state);
2236 }
2237 
2238 #ifdef XFS_DABUF_DEBUG
2239 xfs_dabuf_t	*xfs_dabuf_global_list;
2240 static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
2241 #endif
2242 
2243 /*
2244  * Create a dabuf.
2245  */
2246 /* ARGSUSED */
2247 STATIC xfs_dabuf_t *
xfs_da_buf_make(int nbuf,xfs_buf_t ** bps,inst_t * ra)2248 xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2249 {
2250 	xfs_buf_t	*bp;
2251 	xfs_dabuf_t	*dabuf;
2252 	int		i;
2253 	int		off;
2254 
2255 	if (nbuf == 1)
2256 		dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
2257 	else
2258 		dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
2259 	dabuf->dirty = 0;
2260 #ifdef XFS_DABUF_DEBUG
2261 	dabuf->ra = ra;
2262 	dabuf->target = XFS_BUF_TARGET(bps[0]);
2263 	dabuf->blkno = XFS_BUF_ADDR(bps[0]);
2264 #endif
2265 	if (nbuf == 1) {
2266 		dabuf->nbuf = 1;
2267 		bp = bps[0];
2268 		dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
2269 		dabuf->data = XFS_BUF_PTR(bp);
2270 		dabuf->bps[0] = bp;
2271 	} else {
2272 		dabuf->nbuf = nbuf;
2273 		for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
2274 			dabuf->bps[i] = bp = bps[i];
2275 			dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
2276 		}
2277 		dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
2278 		for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2279 			bp = bps[i];
2280 			memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
2281 				XFS_BUF_COUNT(bp));
2282 		}
2283 	}
2284 #ifdef XFS_DABUF_DEBUG
2285 	{
2286 		xfs_dabuf_t	*p;
2287 
2288 		spin_lock(&xfs_dabuf_global_lock);
2289 		for (p = xfs_dabuf_global_list; p; p = p->next) {
2290 			ASSERT(p->blkno != dabuf->blkno ||
2291 			       p->target != dabuf->target);
2292 		}
2293 		dabuf->prev = NULL;
2294 		if (xfs_dabuf_global_list)
2295 			xfs_dabuf_global_list->prev = dabuf;
2296 		dabuf->next = xfs_dabuf_global_list;
2297 		xfs_dabuf_global_list = dabuf;
2298 		spin_unlock(&xfs_dabuf_global_lock);
2299 	}
2300 #endif
2301 	return dabuf;
2302 }
2303 
2304 /*
2305  * Un-dirty a dabuf.
2306  */
2307 STATIC void
xfs_da_buf_clean(xfs_dabuf_t * dabuf)2308 xfs_da_buf_clean(xfs_dabuf_t *dabuf)
2309 {
2310 	xfs_buf_t	*bp;
2311 	int		i;
2312 	int		off;
2313 
2314 	if (dabuf->dirty) {
2315 		ASSERT(dabuf->nbuf > 1);
2316 		dabuf->dirty = 0;
2317 		for (i = off = 0; i < dabuf->nbuf;
2318 				i++, off += XFS_BUF_COUNT(bp)) {
2319 			bp = dabuf->bps[i];
2320 			memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
2321 				XFS_BUF_COUNT(bp));
2322 		}
2323 	}
2324 }
2325 
2326 /*
2327  * Release a dabuf.
2328  */
2329 void
xfs_da_buf_done(xfs_dabuf_t * dabuf)2330 xfs_da_buf_done(xfs_dabuf_t *dabuf)
2331 {
2332 	ASSERT(dabuf);
2333 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2334 	if (dabuf->dirty)
2335 		xfs_da_buf_clean(dabuf);
2336 	if (dabuf->nbuf > 1)
2337 		kmem_free(dabuf->data);
2338 #ifdef XFS_DABUF_DEBUG
2339 	{
2340 		spin_lock(&xfs_dabuf_global_lock);
2341 		if (dabuf->prev)
2342 			dabuf->prev->next = dabuf->next;
2343 		else
2344 			xfs_dabuf_global_list = dabuf->next;
2345 		if (dabuf->next)
2346 			dabuf->next->prev = dabuf->prev;
2347 		spin_unlock(&xfs_dabuf_global_lock);
2348 	}
2349 	memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
2350 #endif
2351 	if (dabuf->nbuf == 1)
2352 		kmem_zone_free(xfs_dabuf_zone, dabuf);
2353 	else
2354 		kmem_free(dabuf);
2355 }
2356 
2357 /*
2358  * Log transaction from a dabuf.
2359  */
2360 void
xfs_da_log_buf(xfs_trans_t * tp,xfs_dabuf_t * dabuf,uint first,uint last)2361 xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
2362 {
2363 	xfs_buf_t	*bp;
2364 	uint		f;
2365 	int		i;
2366 	uint		l;
2367 	int		off;
2368 
2369 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2370 	if (dabuf->nbuf == 1) {
2371 		ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
2372 		xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
2373 		return;
2374 	}
2375 	dabuf->dirty = 1;
2376 	ASSERT(first <= last);
2377 	for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2378 		bp = dabuf->bps[i];
2379 		f = off;
2380 		l = f + XFS_BUF_COUNT(bp) - 1;
2381 		if (f < first)
2382 			f = first;
2383 		if (l > last)
2384 			l = last;
2385 		if (f <= l)
2386 			xfs_trans_log_buf(tp, bp, f - off, l - off);
2387 		/*
2388 		 * B_DONE is set by xfs_trans_log buf.
2389 		 * If we don't set it on a new buffer (get not read)
2390 		 * then if we don't put anything in the buffer it won't
2391 		 * be set, and at commit it it released into the cache,
2392 		 * and then a read will fail.
2393 		 */
2394 		else if (!(XFS_BUF_ISDONE(bp)))
2395 		  XFS_BUF_DONE(bp);
2396 	}
2397 	ASSERT(last < off);
2398 }
2399 
2400 /*
2401  * Release dabuf from a transaction.
2402  * Have to free up the dabuf before the buffers are released,
2403  * since the synchronization on the dabuf is really the lock on the buffer.
2404  */
2405 void
xfs_da_brelse(xfs_trans_t * tp,xfs_dabuf_t * dabuf)2406 xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2407 {
2408 	xfs_buf_t	*bp;
2409 	xfs_buf_t	**bplist;
2410 	int		i;
2411 	int		nbuf;
2412 
2413 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2414 	if ((nbuf = dabuf->nbuf) == 1) {
2415 		bplist = &bp;
2416 		bp = dabuf->bps[0];
2417 	} else {
2418 		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2419 		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2420 	}
2421 	xfs_da_buf_done(dabuf);
2422 	for (i = 0; i < nbuf; i++)
2423 		xfs_trans_brelse(tp, bplist[i]);
2424 	if (bplist != &bp)
2425 		kmem_free(bplist);
2426 }
2427 
2428 /*
2429  * Invalidate dabuf from a transaction.
2430  */
2431 void
xfs_da_binval(xfs_trans_t * tp,xfs_dabuf_t * dabuf)2432 xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2433 {
2434 	xfs_buf_t	*bp;
2435 	xfs_buf_t	**bplist;
2436 	int		i;
2437 	int		nbuf;
2438 
2439 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2440 	if ((nbuf = dabuf->nbuf) == 1) {
2441 		bplist = &bp;
2442 		bp = dabuf->bps[0];
2443 	} else {
2444 		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2445 		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2446 	}
2447 	xfs_da_buf_done(dabuf);
2448 	for (i = 0; i < nbuf; i++)
2449 		xfs_trans_binval(tp, bplist[i]);
2450 	if (bplist != &bp)
2451 		kmem_free(bplist);
2452 }
2453 
2454 /*
2455  * Get the first daddr from a dabuf.
2456  */
2457 xfs_daddr_t
xfs_da_blkno(xfs_dabuf_t * dabuf)2458 xfs_da_blkno(xfs_dabuf_t *dabuf)
2459 {
2460 	ASSERT(dabuf->nbuf);
2461 	ASSERT(dabuf->data);
2462 	return XFS_BUF_ADDR(dabuf->bps[0]);
2463 }
2464