1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_mount.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_bmap_btree.h"
30 #include "xfs_dir2.h"
31 #include "xfs_dir2_format.h"
32 #include "xfs_dir2_priv.h"
33 #include "xfs_dinode.h"
34 #include "xfs_inode.h"
35 #include "xfs_inode_item.h"
36 #include "xfs_alloc.h"
37 #include "xfs_bmap.h"
38 #include "xfs_attr.h"
39 #include "xfs_attr_leaf.h"
40 #include "xfs_error.h"
41 #include "xfs_trace.h"
42 
43 /*
44  * xfs_da_btree.c
45  *
46  * Routines to implement directories as Btrees of hashed names.
47  */
48 
49 /*========================================================================
50  * Function prototypes for the kernel.
51  *========================================================================*/
52 
53 /*
54  * Routines used for growing the Btree.
55  */
56 STATIC int xfs_da_root_split(xfs_da_state_t *state,
57 					    xfs_da_state_blk_t *existing_root,
58 					    xfs_da_state_blk_t *new_child);
59 STATIC int xfs_da_node_split(xfs_da_state_t *state,
60 					    xfs_da_state_blk_t *existing_blk,
61 					    xfs_da_state_blk_t *split_blk,
62 					    xfs_da_state_blk_t *blk_to_add,
63 					    int treelevel,
64 					    int *result);
65 STATIC void xfs_da_node_rebalance(xfs_da_state_t *state,
66 					 xfs_da_state_blk_t *node_blk_1,
67 					 xfs_da_state_blk_t *node_blk_2);
68 STATIC void xfs_da_node_add(xfs_da_state_t *state,
69 				   xfs_da_state_blk_t *old_node_blk,
70 				   xfs_da_state_blk_t *new_node_blk);
71 
72 /*
73  * Routines used for shrinking the Btree.
74  */
75 STATIC int xfs_da_root_join(xfs_da_state_t *state,
76 					   xfs_da_state_blk_t *root_blk);
77 STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval);
78 STATIC void xfs_da_node_remove(xfs_da_state_t *state,
79 					      xfs_da_state_blk_t *drop_blk);
80 STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
81 					 xfs_da_state_blk_t *src_node_blk,
82 					 xfs_da_state_blk_t *dst_node_blk);
83 
84 /*
85  * Utility routines.
86  */
87 STATIC uint	xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
88 STATIC int	xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
89 STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps);
90 STATIC int	xfs_da_blk_unlink(xfs_da_state_t *state,
91 				  xfs_da_state_blk_t *drop_blk,
92 				  xfs_da_state_blk_t *save_blk);
93 STATIC void	xfs_da_state_kill_altpath(xfs_da_state_t *state);
94 
95 /*========================================================================
96  * Routines used for growing the Btree.
97  *========================================================================*/
98 
99 /*
100  * Create the initial contents of an intermediate node.
101  */
102 int
xfs_da_node_create(xfs_da_args_t * args,xfs_dablk_t blkno,int level,xfs_dabuf_t ** bpp,int whichfork)103 xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
104 				 xfs_dabuf_t **bpp, int whichfork)
105 {
106 	xfs_da_intnode_t *node;
107 	xfs_dabuf_t *bp;
108 	int error;
109 	xfs_trans_t *tp;
110 
111 	trace_xfs_da_node_create(args);
112 
113 	tp = args->trans;
114 	error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
115 	if (error)
116 		return(error);
117 	ASSERT(bp != NULL);
118 	node = bp->data;
119 	node->hdr.info.forw = 0;
120 	node->hdr.info.back = 0;
121 	node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
122 	node->hdr.info.pad = 0;
123 	node->hdr.count = 0;
124 	node->hdr.level = cpu_to_be16(level);
125 
126 	xfs_da_log_buf(tp, bp,
127 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
128 
129 	*bpp = bp;
130 	return(0);
131 }
132 
133 /*
134  * Split a leaf node, rebalance, then possibly split
135  * intermediate nodes, rebalance, etc.
136  */
137 int							/* error */
xfs_da_split(xfs_da_state_t * state)138 xfs_da_split(xfs_da_state_t *state)
139 {
140 	xfs_da_state_blk_t *oldblk, *newblk, *addblk;
141 	xfs_da_intnode_t *node;
142 	xfs_dabuf_t *bp;
143 	int max, action, error, i;
144 
145 	trace_xfs_da_split(state->args);
146 
147 	/*
148 	 * Walk back up the tree splitting/inserting/adjusting as necessary.
149 	 * If we need to insert and there isn't room, split the node, then
150 	 * decide which fragment to insert the new block from below into.
151 	 * Note that we may split the root this way, but we need more fixup.
152 	 */
153 	max = state->path.active - 1;
154 	ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
155 	ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
156 	       state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
157 
158 	addblk = &state->path.blk[max];		/* initial dummy value */
159 	for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
160 		oldblk = &state->path.blk[i];
161 		newblk = &state->altpath.blk[i];
162 
163 		/*
164 		 * If a leaf node then
165 		 *     Allocate a new leaf node, then rebalance across them.
166 		 * else if an intermediate node then
167 		 *     We split on the last layer, must we split the node?
168 		 */
169 		switch (oldblk->magic) {
170 		case XFS_ATTR_LEAF_MAGIC:
171 			error = xfs_attr_leaf_split(state, oldblk, newblk);
172 			if ((error != 0) && (error != ENOSPC)) {
173 				return(error);	/* GROT: attr is inconsistent */
174 			}
175 			if (!error) {
176 				addblk = newblk;
177 				break;
178 			}
179 			/*
180 			 * Entry wouldn't fit, split the leaf again.
181 			 */
182 			state->extravalid = 1;
183 			if (state->inleaf) {
184 				state->extraafter = 0;	/* before newblk */
185 				trace_xfs_attr_leaf_split_before(state->args);
186 				error = xfs_attr_leaf_split(state, oldblk,
187 							    &state->extrablk);
188 			} else {
189 				state->extraafter = 1;	/* after newblk */
190 				trace_xfs_attr_leaf_split_after(state->args);
191 				error = xfs_attr_leaf_split(state, newblk,
192 							    &state->extrablk);
193 			}
194 			if (error)
195 				return(error);	/* GROT: attr inconsistent */
196 			addblk = newblk;
197 			break;
198 		case XFS_DIR2_LEAFN_MAGIC:
199 			error = xfs_dir2_leafn_split(state, oldblk, newblk);
200 			if (error)
201 				return error;
202 			addblk = newblk;
203 			break;
204 		case XFS_DA_NODE_MAGIC:
205 			error = xfs_da_node_split(state, oldblk, newblk, addblk,
206 							 max - i, &action);
207 			xfs_da_buf_done(addblk->bp);
208 			addblk->bp = NULL;
209 			if (error)
210 				return(error);	/* GROT: dir is inconsistent */
211 			/*
212 			 * Record the newly split block for the next time thru?
213 			 */
214 			if (action)
215 				addblk = newblk;
216 			else
217 				addblk = NULL;
218 			break;
219 		}
220 
221 		/*
222 		 * Update the btree to show the new hashval for this child.
223 		 */
224 		xfs_da_fixhashpath(state, &state->path);
225 		/*
226 		 * If we won't need this block again, it's getting dropped
227 		 * from the active path by the loop control, so we need
228 		 * to mark it done now.
229 		 */
230 		if (i > 0 || !addblk)
231 			xfs_da_buf_done(oldblk->bp);
232 	}
233 	if (!addblk)
234 		return(0);
235 
236 	/*
237 	 * Split the root node.
238 	 */
239 	ASSERT(state->path.active == 0);
240 	oldblk = &state->path.blk[0];
241 	error = xfs_da_root_split(state, oldblk, addblk);
242 	if (error) {
243 		xfs_da_buf_done(oldblk->bp);
244 		xfs_da_buf_done(addblk->bp);
245 		addblk->bp = NULL;
246 		return(error);	/* GROT: dir is inconsistent */
247 	}
248 
249 	/*
250 	 * Update pointers to the node which used to be block 0 and
251 	 * just got bumped because of the addition of a new root node.
252 	 * There might be three blocks involved if a double split occurred,
253 	 * and the original block 0 could be at any position in the list.
254 	 */
255 
256 	node = oldblk->bp->data;
257 	if (node->hdr.info.forw) {
258 		if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
259 			bp = addblk->bp;
260 		} else {
261 			ASSERT(state->extravalid);
262 			bp = state->extrablk.bp;
263 		}
264 		node = bp->data;
265 		node->hdr.info.back = cpu_to_be32(oldblk->blkno);
266 		xfs_da_log_buf(state->args->trans, bp,
267 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
268 		    sizeof(node->hdr.info)));
269 	}
270 	node = oldblk->bp->data;
271 	if (node->hdr.info.back) {
272 		if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
273 			bp = addblk->bp;
274 		} else {
275 			ASSERT(state->extravalid);
276 			bp = state->extrablk.bp;
277 		}
278 		node = bp->data;
279 		node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
280 		xfs_da_log_buf(state->args->trans, bp,
281 		    XFS_DA_LOGRANGE(node, &node->hdr.info,
282 		    sizeof(node->hdr.info)));
283 	}
284 	xfs_da_buf_done(oldblk->bp);
285 	xfs_da_buf_done(addblk->bp);
286 	addblk->bp = NULL;
287 	return(0);
288 }
289 
290 /*
291  * Split the root.  We have to create a new root and point to the two
292  * parts (the split old root) that we just created.  Copy block zero to
293  * the EOF, extending the inode in process.
294  */
295 STATIC int						/* error */
xfs_da_root_split(xfs_da_state_t * state,xfs_da_state_blk_t * blk1,xfs_da_state_blk_t * blk2)296 xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
297 				 xfs_da_state_blk_t *blk2)
298 {
299 	xfs_da_intnode_t *node, *oldroot;
300 	xfs_da_args_t *args;
301 	xfs_dablk_t blkno;
302 	xfs_dabuf_t *bp;
303 	int error, size;
304 	xfs_inode_t *dp;
305 	xfs_trans_t *tp;
306 	xfs_mount_t *mp;
307 	xfs_dir2_leaf_t *leaf;
308 
309 	trace_xfs_da_root_split(state->args);
310 
311 	/*
312 	 * Copy the existing (incorrect) block from the root node position
313 	 * to a free space somewhere.
314 	 */
315 	args = state->args;
316 	ASSERT(args != NULL);
317 	error = xfs_da_grow_inode(args, &blkno);
318 	if (error)
319 		return(error);
320 	dp = args->dp;
321 	tp = args->trans;
322 	mp = state->mp;
323 	error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
324 	if (error)
325 		return(error);
326 	ASSERT(bp != NULL);
327 	node = bp->data;
328 	oldroot = blk1->bp->data;
329 	if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
330 		size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
331 			     (char *)oldroot);
332 	} else {
333 		ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
334 		leaf = (xfs_dir2_leaf_t *)oldroot;
335 		size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
336 			     (char *)leaf);
337 	}
338 	memcpy(node, oldroot, size);
339 	xfs_da_log_buf(tp, bp, 0, size - 1);
340 	xfs_da_buf_done(blk1->bp);
341 	blk1->bp = bp;
342 	blk1->blkno = blkno;
343 
344 	/*
345 	 * Set up the new root node.
346 	 */
347 	error = xfs_da_node_create(args,
348 		(args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0,
349 		be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork);
350 	if (error)
351 		return(error);
352 	node = bp->data;
353 	node->btree[0].hashval = cpu_to_be32(blk1->hashval);
354 	node->btree[0].before = cpu_to_be32(blk1->blkno);
355 	node->btree[1].hashval = cpu_to_be32(blk2->hashval);
356 	node->btree[1].before = cpu_to_be32(blk2->blkno);
357 	node->hdr.count = cpu_to_be16(2);
358 
359 #ifdef DEBUG
360 	if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
361 		ASSERT(blk1->blkno >= mp->m_dirleafblk &&
362 		       blk1->blkno < mp->m_dirfreeblk);
363 		ASSERT(blk2->blkno >= mp->m_dirleafblk &&
364 		       blk2->blkno < mp->m_dirfreeblk);
365 	}
366 #endif
367 
368 	/* Header is already logged by xfs_da_node_create */
369 	xfs_da_log_buf(tp, bp,
370 		XFS_DA_LOGRANGE(node, node->btree,
371 			sizeof(xfs_da_node_entry_t) * 2));
372 	xfs_da_buf_done(bp);
373 
374 	return(0);
375 }
376 
377 /*
378  * Split the node, rebalance, then add the new entry.
379  */
380 STATIC int						/* error */
xfs_da_node_split(xfs_da_state_t * state,xfs_da_state_blk_t * oldblk,xfs_da_state_blk_t * newblk,xfs_da_state_blk_t * addblk,int treelevel,int * result)381 xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
382 				 xfs_da_state_blk_t *newblk,
383 				 xfs_da_state_blk_t *addblk,
384 				 int treelevel, int *result)
385 {
386 	xfs_da_intnode_t *node;
387 	xfs_dablk_t blkno;
388 	int newcount, error;
389 	int useextra;
390 
391 	trace_xfs_da_node_split(state->args);
392 
393 	node = oldblk->bp->data;
394 	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
395 
396 	/*
397 	 * With V2 dirs the extra block is data or freespace.
398 	 */
399 	useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK;
400 	newcount = 1 + useextra;
401 	/*
402 	 * Do we have to split the node?
403 	 */
404 	if ((be16_to_cpu(node->hdr.count) + newcount) > state->node_ents) {
405 		/*
406 		 * Allocate a new node, add to the doubly linked chain of
407 		 * nodes, then move some of our excess entries into it.
408 		 */
409 		error = xfs_da_grow_inode(state->args, &blkno);
410 		if (error)
411 			return(error);	/* GROT: dir is inconsistent */
412 
413 		error = xfs_da_node_create(state->args, blkno, treelevel,
414 					   &newblk->bp, state->args->whichfork);
415 		if (error)
416 			return(error);	/* GROT: dir is inconsistent */
417 		newblk->blkno = blkno;
418 		newblk->magic = XFS_DA_NODE_MAGIC;
419 		xfs_da_node_rebalance(state, oldblk, newblk);
420 		error = xfs_da_blk_link(state, oldblk, newblk);
421 		if (error)
422 			return(error);
423 		*result = 1;
424 	} else {
425 		*result = 0;
426 	}
427 
428 	/*
429 	 * Insert the new entry(s) into the correct block
430 	 * (updating last hashval in the process).
431 	 *
432 	 * xfs_da_node_add() inserts BEFORE the given index,
433 	 * and as a result of using node_lookup_int() we always
434 	 * point to a valid entry (not after one), but a split
435 	 * operation always results in a new block whose hashvals
436 	 * FOLLOW the current block.
437 	 *
438 	 * If we had double-split op below us, then add the extra block too.
439 	 */
440 	node = oldblk->bp->data;
441 	if (oldblk->index <= be16_to_cpu(node->hdr.count)) {
442 		oldblk->index++;
443 		xfs_da_node_add(state, oldblk, addblk);
444 		if (useextra) {
445 			if (state->extraafter)
446 				oldblk->index++;
447 			xfs_da_node_add(state, oldblk, &state->extrablk);
448 			state->extravalid = 0;
449 		}
450 	} else {
451 		newblk->index++;
452 		xfs_da_node_add(state, newblk, addblk);
453 		if (useextra) {
454 			if (state->extraafter)
455 				newblk->index++;
456 			xfs_da_node_add(state, newblk, &state->extrablk);
457 			state->extravalid = 0;
458 		}
459 	}
460 
461 	return(0);
462 }
463 
464 /*
465  * Balance the btree elements between two intermediate nodes,
466  * usually one full and one empty.
467  *
468  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
469  */
470 STATIC void
xfs_da_node_rebalance(xfs_da_state_t * state,xfs_da_state_blk_t * blk1,xfs_da_state_blk_t * blk2)471 xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
472 				     xfs_da_state_blk_t *blk2)
473 {
474 	xfs_da_intnode_t *node1, *node2, *tmpnode;
475 	xfs_da_node_entry_t *btree_s, *btree_d;
476 	int count, tmp;
477 	xfs_trans_t *tp;
478 
479 	trace_xfs_da_node_rebalance(state->args);
480 
481 	node1 = blk1->bp->data;
482 	node2 = blk2->bp->data;
483 	/*
484 	 * Figure out how many entries need to move, and in which direction.
485 	 * Swap the nodes around if that makes it simpler.
486 	 */
487 	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
488 	    ((be32_to_cpu(node2->btree[0].hashval) < be32_to_cpu(node1->btree[0].hashval)) ||
489 	     (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
490 	      be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
491 		tmpnode = node1;
492 		node1 = node2;
493 		node2 = tmpnode;
494 	}
495 	ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
496 	ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
497 	count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
498 	if (count == 0)
499 		return;
500 	tp = state->args->trans;
501 	/*
502 	 * Two cases: high-to-low and low-to-high.
503 	 */
504 	if (count > 0) {
505 		/*
506 		 * Move elements in node2 up to make a hole.
507 		 */
508 		if ((tmp = be16_to_cpu(node2->hdr.count)) > 0) {
509 			tmp *= (uint)sizeof(xfs_da_node_entry_t);
510 			btree_s = &node2->btree[0];
511 			btree_d = &node2->btree[count];
512 			memmove(btree_d, btree_s, tmp);
513 		}
514 
515 		/*
516 		 * Move the req'd B-tree elements from high in node1 to
517 		 * low in node2.
518 		 */
519 		be16_add_cpu(&node2->hdr.count, count);
520 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
521 		btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count];
522 		btree_d = &node2->btree[0];
523 		memcpy(btree_d, btree_s, tmp);
524 		be16_add_cpu(&node1->hdr.count, -count);
525 	} else {
526 		/*
527 		 * Move the req'd B-tree elements from low in node2 to
528 		 * high in node1.
529 		 */
530 		count = -count;
531 		tmp = count * (uint)sizeof(xfs_da_node_entry_t);
532 		btree_s = &node2->btree[0];
533 		btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)];
534 		memcpy(btree_d, btree_s, tmp);
535 		be16_add_cpu(&node1->hdr.count, count);
536 		xfs_da_log_buf(tp, blk1->bp,
537 			XFS_DA_LOGRANGE(node1, btree_d, tmp));
538 
539 		/*
540 		 * Move elements in node2 down to fill the hole.
541 		 */
542 		tmp  = be16_to_cpu(node2->hdr.count) - count;
543 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
544 		btree_s = &node2->btree[count];
545 		btree_d = &node2->btree[0];
546 		memmove(btree_d, btree_s, tmp);
547 		be16_add_cpu(&node2->hdr.count, -count);
548 	}
549 
550 	/*
551 	 * Log header of node 1 and all current bits of node 2.
552 	 */
553 	xfs_da_log_buf(tp, blk1->bp,
554 		XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
555 	xfs_da_log_buf(tp, blk2->bp,
556 		XFS_DA_LOGRANGE(node2, &node2->hdr,
557 			sizeof(node2->hdr) +
558 			sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count)));
559 
560 	/*
561 	 * Record the last hashval from each block for upward propagation.
562 	 * (note: don't use the swapped node pointers)
563 	 */
564 	node1 = blk1->bp->data;
565 	node2 = blk2->bp->data;
566 	blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval);
567 	blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval);
568 
569 	/*
570 	 * Adjust the expected index for insertion.
571 	 */
572 	if (blk1->index >= be16_to_cpu(node1->hdr.count)) {
573 		blk2->index = blk1->index - be16_to_cpu(node1->hdr.count);
574 		blk1->index = be16_to_cpu(node1->hdr.count) + 1;	/* make it invalid */
575 	}
576 }
577 
578 /*
579  * Add a new entry to an intermediate node.
580  */
581 STATIC void
xfs_da_node_add(xfs_da_state_t * state,xfs_da_state_blk_t * oldblk,xfs_da_state_blk_t * newblk)582 xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
583 			       xfs_da_state_blk_t *newblk)
584 {
585 	xfs_da_intnode_t *node;
586 	xfs_da_node_entry_t *btree;
587 	int tmp;
588 
589 	trace_xfs_da_node_add(state->args);
590 
591 	node = oldblk->bp->data;
592 	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
593 	ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
594 	ASSERT(newblk->blkno != 0);
595 	if (state->args->whichfork == XFS_DATA_FORK)
596 		ASSERT(newblk->blkno >= state->mp->m_dirleafblk &&
597 		       newblk->blkno < state->mp->m_dirfreeblk);
598 
599 	/*
600 	 * We may need to make some room before we insert the new node.
601 	 */
602 	tmp = 0;
603 	btree = &node->btree[ oldblk->index ];
604 	if (oldblk->index < be16_to_cpu(node->hdr.count)) {
605 		tmp = (be16_to_cpu(node->hdr.count) - oldblk->index) * (uint)sizeof(*btree);
606 		memmove(btree + 1, btree, tmp);
607 	}
608 	btree->hashval = cpu_to_be32(newblk->hashval);
609 	btree->before = cpu_to_be32(newblk->blkno);
610 	xfs_da_log_buf(state->args->trans, oldblk->bp,
611 		XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
612 	be16_add_cpu(&node->hdr.count, 1);
613 	xfs_da_log_buf(state->args->trans, oldblk->bp,
614 		XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
615 
616 	/*
617 	 * Copy the last hash value from the oldblk to propagate upwards.
618 	 */
619 	oldblk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1 ].hashval);
620 }
621 
622 /*========================================================================
623  * Routines used for shrinking the Btree.
624  *========================================================================*/
625 
626 /*
627  * Deallocate an empty leaf node, remove it from its parent,
628  * possibly deallocating that block, etc...
629  */
630 int
xfs_da_join(xfs_da_state_t * state)631 xfs_da_join(xfs_da_state_t *state)
632 {
633 	xfs_da_state_blk_t *drop_blk, *save_blk;
634 	int action, error;
635 
636 	trace_xfs_da_join(state->args);
637 
638 	action = 0;
639 	drop_blk = &state->path.blk[ state->path.active-1 ];
640 	save_blk = &state->altpath.blk[ state->path.active-1 ];
641 	ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
642 	ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
643 	       drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
644 
645 	/*
646 	 * Walk back up the tree joining/deallocating as necessary.
647 	 * When we stop dropping blocks, break out.
648 	 */
649 	for (  ; state->path.active >= 2; drop_blk--, save_blk--,
650 		 state->path.active--) {
651 		/*
652 		 * See if we can combine the block with a neighbor.
653 		 *   (action == 0) => no options, just leave
654 		 *   (action == 1) => coalesce, then unlink
655 		 *   (action == 2) => block empty, unlink it
656 		 */
657 		switch (drop_blk->magic) {
658 		case XFS_ATTR_LEAF_MAGIC:
659 			error = xfs_attr_leaf_toosmall(state, &action);
660 			if (error)
661 				return(error);
662 			if (action == 0)
663 				return(0);
664 			xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
665 			break;
666 		case XFS_DIR2_LEAFN_MAGIC:
667 			error = xfs_dir2_leafn_toosmall(state, &action);
668 			if (error)
669 				return error;
670 			if (action == 0)
671 				return 0;
672 			xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
673 			break;
674 		case XFS_DA_NODE_MAGIC:
675 			/*
676 			 * Remove the offending node, fixup hashvals,
677 			 * check for a toosmall neighbor.
678 			 */
679 			xfs_da_node_remove(state, drop_blk);
680 			xfs_da_fixhashpath(state, &state->path);
681 			error = xfs_da_node_toosmall(state, &action);
682 			if (error)
683 				return(error);
684 			if (action == 0)
685 				return 0;
686 			xfs_da_node_unbalance(state, drop_blk, save_blk);
687 			break;
688 		}
689 		xfs_da_fixhashpath(state, &state->altpath);
690 		error = xfs_da_blk_unlink(state, drop_blk, save_blk);
691 		xfs_da_state_kill_altpath(state);
692 		if (error)
693 			return(error);
694 		error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
695 							 drop_blk->bp);
696 		drop_blk->bp = NULL;
697 		if (error)
698 			return(error);
699 	}
700 	/*
701 	 * We joined all the way to the top.  If it turns out that
702 	 * we only have one entry in the root, make the child block
703 	 * the new root.
704 	 */
705 	xfs_da_node_remove(state, drop_blk);
706 	xfs_da_fixhashpath(state, &state->path);
707 	error = xfs_da_root_join(state, &state->path.blk[0]);
708 	return(error);
709 }
710 
711 #ifdef	DEBUG
712 static void
xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo * blkinfo,__u16 level)713 xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
714 {
715 	__be16	magic = blkinfo->magic;
716 
717 	if (level == 1) {
718 		ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
719 		       magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
720 	} else
721 		ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
722 	ASSERT(!blkinfo->forw);
723 	ASSERT(!blkinfo->back);
724 }
725 #else	/* !DEBUG */
726 #define	xfs_da_blkinfo_onlychild_validate(blkinfo, level)
727 #endif	/* !DEBUG */
728 
729 /*
730  * We have only one entry in the root.  Copy the only remaining child of
731  * the old root to block 0 as the new root node.
732  */
733 STATIC int
xfs_da_root_join(xfs_da_state_t * state,xfs_da_state_blk_t * root_blk)734 xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
735 {
736 	xfs_da_intnode_t *oldroot;
737 	xfs_da_args_t *args;
738 	xfs_dablk_t child;
739 	xfs_dabuf_t *bp;
740 	int error;
741 
742 	trace_xfs_da_root_join(state->args);
743 
744 	args = state->args;
745 	ASSERT(args != NULL);
746 	ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
747 	oldroot = root_blk->bp->data;
748 	ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
749 	ASSERT(!oldroot->hdr.info.forw);
750 	ASSERT(!oldroot->hdr.info.back);
751 
752 	/*
753 	 * If the root has more than one child, then don't do anything.
754 	 */
755 	if (be16_to_cpu(oldroot->hdr.count) > 1)
756 		return(0);
757 
758 	/*
759 	 * Read in the (only) child block, then copy those bytes into
760 	 * the root block's buffer and free the original child block.
761 	 */
762 	child = be32_to_cpu(oldroot->btree[0].before);
763 	ASSERT(child != 0);
764 	error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
765 					     args->whichfork);
766 	if (error)
767 		return(error);
768 	ASSERT(bp != NULL);
769 	xfs_da_blkinfo_onlychild_validate(bp->data,
770 					be16_to_cpu(oldroot->hdr.level));
771 
772 	memcpy(root_blk->bp->data, bp->data, state->blocksize);
773 	xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
774 	error = xfs_da_shrink_inode(args, child, bp);
775 	return(error);
776 }
777 
778 /*
779  * Check a node block and its neighbors to see if the block should be
780  * collapsed into one or the other neighbor.  Always keep the block
781  * with the smaller block number.
782  * If the current block is over 50% full, don't try to join it, return 0.
783  * If the block is empty, fill in the state structure and return 2.
784  * If it can be collapsed, fill in the state structure and return 1.
785  * If nothing can be done, return 0.
786  */
787 STATIC int
xfs_da_node_toosmall(xfs_da_state_t * state,int * action)788 xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
789 {
790 	xfs_da_intnode_t *node;
791 	xfs_da_state_blk_t *blk;
792 	xfs_da_blkinfo_t *info;
793 	int count, forward, error, retval, i;
794 	xfs_dablk_t blkno;
795 	xfs_dabuf_t *bp;
796 
797 	/*
798 	 * Check for the degenerate case of the block being over 50% full.
799 	 * If so, it's not worth even looking to see if we might be able
800 	 * to coalesce with a sibling.
801 	 */
802 	blk = &state->path.blk[ state->path.active-1 ];
803 	info = blk->bp->data;
804 	ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
805 	node = (xfs_da_intnode_t *)info;
806 	count = be16_to_cpu(node->hdr.count);
807 	if (count > (state->node_ents >> 1)) {
808 		*action = 0;	/* blk over 50%, don't try to join */
809 		return(0);	/* blk over 50%, don't try to join */
810 	}
811 
812 	/*
813 	 * Check for the degenerate case of the block being empty.
814 	 * If the block is empty, we'll simply delete it, no need to
815 	 * coalesce it with a sibling block.  We choose (arbitrarily)
816 	 * to merge with the forward block unless it is NULL.
817 	 */
818 	if (count == 0) {
819 		/*
820 		 * Make altpath point to the block we want to keep and
821 		 * path point to the block we want to drop (this one).
822 		 */
823 		forward = (info->forw != 0);
824 		memcpy(&state->altpath, &state->path, sizeof(state->path));
825 		error = xfs_da_path_shift(state, &state->altpath, forward,
826 						 0, &retval);
827 		if (error)
828 			return(error);
829 		if (retval) {
830 			*action = 0;
831 		} else {
832 			*action = 2;
833 		}
834 		return(0);
835 	}
836 
837 	/*
838 	 * Examine each sibling block to see if we can coalesce with
839 	 * at least 25% free space to spare.  We need to figure out
840 	 * whether to merge with the forward or the backward block.
841 	 * We prefer coalescing with the lower numbered sibling so as
842 	 * to shrink a directory over time.
843 	 */
844 	/* start with smaller blk num */
845 	forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));
846 	for (i = 0; i < 2; forward = !forward, i++) {
847 		if (forward)
848 			blkno = be32_to_cpu(info->forw);
849 		else
850 			blkno = be32_to_cpu(info->back);
851 		if (blkno == 0)
852 			continue;
853 		error = xfs_da_read_buf(state->args->trans, state->args->dp,
854 					blkno, -1, &bp, state->args->whichfork);
855 		if (error)
856 			return(error);
857 		ASSERT(bp != NULL);
858 
859 		node = (xfs_da_intnode_t *)info;
860 		count  = state->node_ents;
861 		count -= state->node_ents >> 2;
862 		count -= be16_to_cpu(node->hdr.count);
863 		node = bp->data;
864 		ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
865 		count -= be16_to_cpu(node->hdr.count);
866 		xfs_da_brelse(state->args->trans, bp);
867 		if (count >= 0)
868 			break;	/* fits with at least 25% to spare */
869 	}
870 	if (i >= 2) {
871 		*action = 0;
872 		return(0);
873 	}
874 
875 	/*
876 	 * Make altpath point to the block we want to keep (the lower
877 	 * numbered block) and path point to the block we want to drop.
878 	 */
879 	memcpy(&state->altpath, &state->path, sizeof(state->path));
880 	if (blkno < blk->blkno) {
881 		error = xfs_da_path_shift(state, &state->altpath, forward,
882 						 0, &retval);
883 		if (error) {
884 			return(error);
885 		}
886 		if (retval) {
887 			*action = 0;
888 			return(0);
889 		}
890 	} else {
891 		error = xfs_da_path_shift(state, &state->path, forward,
892 						 0, &retval);
893 		if (error) {
894 			return(error);
895 		}
896 		if (retval) {
897 			*action = 0;
898 			return(0);
899 		}
900 	}
901 	*action = 1;
902 	return(0);
903 }
904 
905 /*
906  * Walk back up the tree adjusting hash values as necessary,
907  * when we stop making changes, return.
908  */
909 void
xfs_da_fixhashpath(xfs_da_state_t * state,xfs_da_state_path_t * path)910 xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
911 {
912 	xfs_da_state_blk_t *blk;
913 	xfs_da_intnode_t *node;
914 	xfs_da_node_entry_t *btree;
915 	xfs_dahash_t lasthash=0;
916 	int level, count;
917 
918 	level = path->active-1;
919 	blk = &path->blk[ level ];
920 	switch (blk->magic) {
921 	case XFS_ATTR_LEAF_MAGIC:
922 		lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
923 		if (count == 0)
924 			return;
925 		break;
926 	case XFS_DIR2_LEAFN_MAGIC:
927 		lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
928 		if (count == 0)
929 			return;
930 		break;
931 	case XFS_DA_NODE_MAGIC:
932 		lasthash = xfs_da_node_lasthash(blk->bp, &count);
933 		if (count == 0)
934 			return;
935 		break;
936 	}
937 	for (blk--, level--; level >= 0; blk--, level--) {
938 		node = blk->bp->data;
939 		ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
940 		btree = &node->btree[ blk->index ];
941 		if (be32_to_cpu(btree->hashval) == lasthash)
942 			break;
943 		blk->hashval = lasthash;
944 		btree->hashval = cpu_to_be32(lasthash);
945 		xfs_da_log_buf(state->args->trans, blk->bp,
946 				  XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
947 
948 		lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
949 	}
950 }
951 
952 /*
953  * Remove an entry from an intermediate node.
954  */
955 STATIC void
xfs_da_node_remove(xfs_da_state_t * state,xfs_da_state_blk_t * drop_blk)956 xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
957 {
958 	xfs_da_intnode_t *node;
959 	xfs_da_node_entry_t *btree;
960 	int tmp;
961 
962 	trace_xfs_da_node_remove(state->args);
963 
964 	node = drop_blk->bp->data;
965 	ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
966 	ASSERT(drop_blk->index >= 0);
967 
968 	/*
969 	 * Copy over the offending entry, or just zero it out.
970 	 */
971 	btree = &node->btree[drop_blk->index];
972 	if (drop_blk->index < (be16_to_cpu(node->hdr.count)-1)) {
973 		tmp  = be16_to_cpu(node->hdr.count) - drop_blk->index - 1;
974 		tmp *= (uint)sizeof(xfs_da_node_entry_t);
975 		memmove(btree, btree + 1, tmp);
976 		xfs_da_log_buf(state->args->trans, drop_blk->bp,
977 		    XFS_DA_LOGRANGE(node, btree, tmp));
978 		btree = &node->btree[be16_to_cpu(node->hdr.count)-1];
979 	}
980 	memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
981 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
982 	    XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
983 	be16_add_cpu(&node->hdr.count, -1);
984 	xfs_da_log_buf(state->args->trans, drop_blk->bp,
985 	    XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
986 
987 	/*
988 	 * Copy the last hash value from the block to propagate upwards.
989 	 */
990 	btree--;
991 	drop_blk->hashval = be32_to_cpu(btree->hashval);
992 }
993 
994 /*
995  * Unbalance the btree elements between two intermediate nodes,
996  * move all Btree elements from one node into another.
997  */
998 STATIC void
xfs_da_node_unbalance(xfs_da_state_t * state,xfs_da_state_blk_t * drop_blk,xfs_da_state_blk_t * save_blk)999 xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1000 				     xfs_da_state_blk_t *save_blk)
1001 {
1002 	xfs_da_intnode_t *drop_node, *save_node;
1003 	xfs_da_node_entry_t *btree;
1004 	int tmp;
1005 	xfs_trans_t *tp;
1006 
1007 	trace_xfs_da_node_unbalance(state->args);
1008 
1009 	drop_node = drop_blk->bp->data;
1010 	save_node = save_blk->bp->data;
1011 	ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1012 	ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1013 	tp = state->args->trans;
1014 
1015 	/*
1016 	 * If the dying block has lower hashvals, then move all the
1017 	 * elements in the remaining block up to make a hole.
1018 	 */
1019 	if ((be32_to_cpu(drop_node->btree[0].hashval) < be32_to_cpu(save_node->btree[ 0 ].hashval)) ||
1020 	    (be32_to_cpu(drop_node->btree[be16_to_cpu(drop_node->hdr.count)-1].hashval) <
1021 	     be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval)))
1022 	{
1023 		btree = &save_node->btree[be16_to_cpu(drop_node->hdr.count)];
1024 		tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
1025 		memmove(btree, &save_node->btree[0], tmp);
1026 		btree = &save_node->btree[0];
1027 		xfs_da_log_buf(tp, save_blk->bp,
1028 			XFS_DA_LOGRANGE(save_node, btree,
1029 				(be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) *
1030 				sizeof(xfs_da_node_entry_t)));
1031 	} else {
1032 		btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)];
1033 		xfs_da_log_buf(tp, save_blk->bp,
1034 			XFS_DA_LOGRANGE(save_node, btree,
1035 				be16_to_cpu(drop_node->hdr.count) *
1036 				sizeof(xfs_da_node_entry_t)));
1037 	}
1038 
1039 	/*
1040 	 * Move all the B-tree elements from drop_blk to save_blk.
1041 	 */
1042 	tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
1043 	memcpy(btree, &drop_node->btree[0], tmp);
1044 	be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
1045 
1046 	xfs_da_log_buf(tp, save_blk->bp,
1047 		XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1048 			sizeof(save_node->hdr)));
1049 
1050 	/*
1051 	 * Save the last hashval in the remaining block for upward propagation.
1052 	 */
1053 	save_blk->hashval = be32_to_cpu(save_node->btree[be16_to_cpu(save_node->hdr.count)-1].hashval);
1054 }
1055 
1056 /*========================================================================
1057  * Routines used for finding things in the Btree.
1058  *========================================================================*/
1059 
1060 /*
1061  * Walk down the Btree looking for a particular filename, filling
1062  * in the state structure as we go.
1063  *
1064  * We will set the state structure to point to each of the elements
1065  * in each of the nodes where either the hashval is or should be.
1066  *
1067  * We support duplicate hashval's so for each entry in the current
1068  * node that could contain the desired hashval, descend.  This is a
1069  * pruned depth-first tree search.
1070  */
1071 int							/* error */
xfs_da_node_lookup_int(xfs_da_state_t * state,int * result)1072 xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1073 {
1074 	xfs_da_state_blk_t *blk;
1075 	xfs_da_blkinfo_t *curr;
1076 	xfs_da_intnode_t *node;
1077 	xfs_da_node_entry_t *btree;
1078 	xfs_dablk_t blkno;
1079 	int probe, span, max, error, retval;
1080 	xfs_dahash_t hashval, btreehashval;
1081 	xfs_da_args_t *args;
1082 
1083 	args = state->args;
1084 
1085 	/*
1086 	 * Descend thru the B-tree searching each level for the right
1087 	 * node to use, until the right hashval is found.
1088 	 */
1089 	blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0;
1090 	for (blk = &state->path.blk[0], state->path.active = 1;
1091 			 state->path.active <= XFS_DA_NODE_MAXDEPTH;
1092 			 blk++, state->path.active++) {
1093 		/*
1094 		 * Read the next node down in the tree.
1095 		 */
1096 		blk->blkno = blkno;
1097 		error = xfs_da_read_buf(args->trans, args->dp, blkno,
1098 					-1, &blk->bp, args->whichfork);
1099 		if (error) {
1100 			blk->blkno = 0;
1101 			state->path.active--;
1102 			return(error);
1103 		}
1104 		curr = blk->bp->data;
1105 		blk->magic = be16_to_cpu(curr->magic);
1106 		ASSERT(blk->magic == XFS_DA_NODE_MAGIC ||
1107 		       blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1108 		       blk->magic == XFS_ATTR_LEAF_MAGIC);
1109 
1110 		/*
1111 		 * Search an intermediate node for a match.
1112 		 */
1113 		if (blk->magic == XFS_DA_NODE_MAGIC) {
1114 			node = blk->bp->data;
1115 			max = be16_to_cpu(node->hdr.count);
1116 			blk->hashval = be32_to_cpu(node->btree[max-1].hashval);
1117 
1118 			/*
1119 			 * Binary search.  (note: small blocks will skip loop)
1120 			 */
1121 			probe = span = max / 2;
1122 			hashval = args->hashval;
1123 			for (btree = &node->btree[probe]; span > 4;
1124 				   btree = &node->btree[probe]) {
1125 				span /= 2;
1126 				btreehashval = be32_to_cpu(btree->hashval);
1127 				if (btreehashval < hashval)
1128 					probe += span;
1129 				else if (btreehashval > hashval)
1130 					probe -= span;
1131 				else
1132 					break;
1133 			}
1134 			ASSERT((probe >= 0) && (probe < max));
1135 			ASSERT((span <= 4) || (be32_to_cpu(btree->hashval) == hashval));
1136 
1137 			/*
1138 			 * Since we may have duplicate hashval's, find the first
1139 			 * matching hashval in the node.
1140 			 */
1141 			while ((probe > 0) && (be32_to_cpu(btree->hashval) >= hashval)) {
1142 				btree--;
1143 				probe--;
1144 			}
1145 			while ((probe < max) && (be32_to_cpu(btree->hashval) < hashval)) {
1146 				btree++;
1147 				probe++;
1148 			}
1149 
1150 			/*
1151 			 * Pick the right block to descend on.
1152 			 */
1153 			if (probe == max) {
1154 				blk->index = max-1;
1155 				blkno = be32_to_cpu(node->btree[max-1].before);
1156 			} else {
1157 				blk->index = probe;
1158 				blkno = be32_to_cpu(btree->before);
1159 			}
1160 		} else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1161 			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1162 			break;
1163 		} else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1164 			blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1165 			break;
1166 		}
1167 	}
1168 
1169 	/*
1170 	 * A leaf block that ends in the hashval that we are interested in
1171 	 * (final hashval == search hashval) means that the next block may
1172 	 * contain more entries with the same hashval, shift upward to the
1173 	 * next leaf and keep searching.
1174 	 */
1175 	for (;;) {
1176 		if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1177 			retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1178 							&blk->index, state);
1179 		} else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1180 			retval = xfs_attr_leaf_lookup_int(blk->bp, args);
1181 			blk->index = args->index;
1182 			args->blkno = blk->blkno;
1183 		} else {
1184 			ASSERT(0);
1185 			return XFS_ERROR(EFSCORRUPTED);
1186 		}
1187 		if (((retval == ENOENT) || (retval == ENOATTR)) &&
1188 		    (blk->hashval == args->hashval)) {
1189 			error = xfs_da_path_shift(state, &state->path, 1, 1,
1190 							 &retval);
1191 			if (error)
1192 				return(error);
1193 			if (retval == 0) {
1194 				continue;
1195 			} else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1196 				/* path_shift() gives ENOENT */
1197 				retval = XFS_ERROR(ENOATTR);
1198 			}
1199 		}
1200 		break;
1201 	}
1202 	*result = retval;
1203 	return(0);
1204 }
1205 
1206 /*========================================================================
1207  * Utility routines.
1208  *========================================================================*/
1209 
1210 /*
1211  * Link a new block into a doubly linked list of blocks (of whatever type).
1212  */
1213 int							/* error */
xfs_da_blk_link(xfs_da_state_t * state,xfs_da_state_blk_t * old_blk,xfs_da_state_blk_t * new_blk)1214 xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1215 			       xfs_da_state_blk_t *new_blk)
1216 {
1217 	xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
1218 	xfs_da_args_t *args;
1219 	int before=0, error;
1220 	xfs_dabuf_t *bp;
1221 
1222 	/*
1223 	 * Set up environment.
1224 	 */
1225 	args = state->args;
1226 	ASSERT(args != NULL);
1227 	old_info = old_blk->bp->data;
1228 	new_info = new_blk->bp->data;
1229 	ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1230 	       old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1231 	       old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1232 	ASSERT(old_blk->magic == be16_to_cpu(old_info->magic));
1233 	ASSERT(new_blk->magic == be16_to_cpu(new_info->magic));
1234 	ASSERT(old_blk->magic == new_blk->magic);
1235 
1236 	switch (old_blk->magic) {
1237 	case XFS_ATTR_LEAF_MAGIC:
1238 		before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1239 		break;
1240 	case XFS_DIR2_LEAFN_MAGIC:
1241 		before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1242 		break;
1243 	case XFS_DA_NODE_MAGIC:
1244 		before = xfs_da_node_order(old_blk->bp, new_blk->bp);
1245 		break;
1246 	}
1247 
1248 	/*
1249 	 * Link blocks in appropriate order.
1250 	 */
1251 	if (before) {
1252 		/*
1253 		 * Link new block in before existing block.
1254 		 */
1255 		trace_xfs_da_link_before(args);
1256 		new_info->forw = cpu_to_be32(old_blk->blkno);
1257 		new_info->back = old_info->back;
1258 		if (old_info->back) {
1259 			error = xfs_da_read_buf(args->trans, args->dp,
1260 						be32_to_cpu(old_info->back),
1261 						-1, &bp, args->whichfork);
1262 			if (error)
1263 				return(error);
1264 			ASSERT(bp != NULL);
1265 			tmp_info = bp->data;
1266 			ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic));
1267 			ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
1268 			tmp_info->forw = cpu_to_be32(new_blk->blkno);
1269 			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1270 			xfs_da_buf_done(bp);
1271 		}
1272 		old_info->back = cpu_to_be32(new_blk->blkno);
1273 	} else {
1274 		/*
1275 		 * Link new block in after existing block.
1276 		 */
1277 		trace_xfs_da_link_after(args);
1278 		new_info->forw = old_info->forw;
1279 		new_info->back = cpu_to_be32(old_blk->blkno);
1280 		if (old_info->forw) {
1281 			error = xfs_da_read_buf(args->trans, args->dp,
1282 						be32_to_cpu(old_info->forw),
1283 						-1, &bp, args->whichfork);
1284 			if (error)
1285 				return(error);
1286 			ASSERT(bp != NULL);
1287 			tmp_info = bp->data;
1288 			ASSERT(tmp_info->magic == old_info->magic);
1289 			ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
1290 			tmp_info->back = cpu_to_be32(new_blk->blkno);
1291 			xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1292 			xfs_da_buf_done(bp);
1293 		}
1294 		old_info->forw = cpu_to_be32(new_blk->blkno);
1295 	}
1296 
1297 	xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1298 	xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1299 	return(0);
1300 }
1301 
1302 /*
1303  * Compare two intermediate nodes for "order".
1304  */
1305 STATIC int
xfs_da_node_order(xfs_dabuf_t * node1_bp,xfs_dabuf_t * node2_bp)1306 xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
1307 {
1308 	xfs_da_intnode_t *node1, *node2;
1309 
1310 	node1 = node1_bp->data;
1311 	node2 = node2_bp->data;
1312 	ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) &&
1313 	       node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1314 	if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
1315 	    ((be32_to_cpu(node2->btree[0].hashval) <
1316 	      be32_to_cpu(node1->btree[0].hashval)) ||
1317 	     (be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval) <
1318 	      be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval)))) {
1319 		return(1);
1320 	}
1321 	return(0);
1322 }
1323 
1324 /*
1325  * Pick up the last hashvalue from an intermediate node.
1326  */
1327 STATIC uint
xfs_da_node_lasthash(xfs_dabuf_t * bp,int * count)1328 xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
1329 {
1330 	xfs_da_intnode_t *node;
1331 
1332 	node = bp->data;
1333 	ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1334 	if (count)
1335 		*count = be16_to_cpu(node->hdr.count);
1336 	if (!node->hdr.count)
1337 		return(0);
1338 	return be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1339 }
1340 
1341 /*
1342  * Unlink a block from a doubly linked list of blocks.
1343  */
1344 STATIC int						/* error */
xfs_da_blk_unlink(xfs_da_state_t * state,xfs_da_state_blk_t * drop_blk,xfs_da_state_blk_t * save_blk)1345 xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1346 				 xfs_da_state_blk_t *save_blk)
1347 {
1348 	xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
1349 	xfs_da_args_t *args;
1350 	xfs_dabuf_t *bp;
1351 	int error;
1352 
1353 	/*
1354 	 * Set up environment.
1355 	 */
1356 	args = state->args;
1357 	ASSERT(args != NULL);
1358 	save_info = save_blk->bp->data;
1359 	drop_info = drop_blk->bp->data;
1360 	ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1361 	       save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
1362 	       save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1363 	ASSERT(save_blk->magic == be16_to_cpu(save_info->magic));
1364 	ASSERT(drop_blk->magic == be16_to_cpu(drop_info->magic));
1365 	ASSERT(save_blk->magic == drop_blk->magic);
1366 	ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) ||
1367 	       (be32_to_cpu(save_info->back) == drop_blk->blkno));
1368 	ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) ||
1369 	       (be32_to_cpu(drop_info->back) == save_blk->blkno));
1370 
1371 	/*
1372 	 * Unlink the leaf block from the doubly linked chain of leaves.
1373 	 */
1374 	if (be32_to_cpu(save_info->back) == drop_blk->blkno) {
1375 		trace_xfs_da_unlink_back(args);
1376 		save_info->back = drop_info->back;
1377 		if (drop_info->back) {
1378 			error = xfs_da_read_buf(args->trans, args->dp,
1379 						be32_to_cpu(drop_info->back),
1380 						-1, &bp, args->whichfork);
1381 			if (error)
1382 				return(error);
1383 			ASSERT(bp != NULL);
1384 			tmp_info = bp->data;
1385 			ASSERT(tmp_info->magic == save_info->magic);
1386 			ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
1387 			tmp_info->forw = cpu_to_be32(save_blk->blkno);
1388 			xfs_da_log_buf(args->trans, bp, 0,
1389 						    sizeof(*tmp_info) - 1);
1390 			xfs_da_buf_done(bp);
1391 		}
1392 	} else {
1393 		trace_xfs_da_unlink_forward(args);
1394 		save_info->forw = drop_info->forw;
1395 		if (drop_info->forw) {
1396 			error = xfs_da_read_buf(args->trans, args->dp,
1397 						be32_to_cpu(drop_info->forw),
1398 						-1, &bp, args->whichfork);
1399 			if (error)
1400 				return(error);
1401 			ASSERT(bp != NULL);
1402 			tmp_info = bp->data;
1403 			ASSERT(tmp_info->magic == save_info->magic);
1404 			ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
1405 			tmp_info->back = cpu_to_be32(save_blk->blkno);
1406 			xfs_da_log_buf(args->trans, bp, 0,
1407 						    sizeof(*tmp_info) - 1);
1408 			xfs_da_buf_done(bp);
1409 		}
1410 	}
1411 
1412 	xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1413 	return(0);
1414 }
1415 
1416 /*
1417  * Move a path "forward" or "!forward" one block at the current level.
1418  *
1419  * This routine will adjust a "path" to point to the next block
1420  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1421  * Btree, including updating pointers to the intermediate nodes between
1422  * the new bottom and the root.
1423  */
1424 int							/* error */
xfs_da_path_shift(xfs_da_state_t * state,xfs_da_state_path_t * path,int forward,int release,int * result)1425 xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1426 				 int forward, int release, int *result)
1427 {
1428 	xfs_da_state_blk_t *blk;
1429 	xfs_da_blkinfo_t *info;
1430 	xfs_da_intnode_t *node;
1431 	xfs_da_args_t *args;
1432 	xfs_dablk_t blkno=0;
1433 	int level, error;
1434 
1435 	/*
1436 	 * Roll up the Btree looking for the first block where our
1437 	 * current index is not at the edge of the block.  Note that
1438 	 * we skip the bottom layer because we want the sibling block.
1439 	 */
1440 	args = state->args;
1441 	ASSERT(args != NULL);
1442 	ASSERT(path != NULL);
1443 	ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1444 	level = (path->active-1) - 1;	/* skip bottom layer in path */
1445 	for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1446 		ASSERT(blk->bp != NULL);
1447 		node = blk->bp->data;
1448 		ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1449 		if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
1450 			blk->index++;
1451 			blkno = be32_to_cpu(node->btree[blk->index].before);
1452 			break;
1453 		} else if (!forward && (blk->index > 0)) {
1454 			blk->index--;
1455 			blkno = be32_to_cpu(node->btree[blk->index].before);
1456 			break;
1457 		}
1458 	}
1459 	if (level < 0) {
1460 		*result = XFS_ERROR(ENOENT);	/* we're out of our tree */
1461 		ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1462 		return(0);
1463 	}
1464 
1465 	/*
1466 	 * Roll down the edge of the subtree until we reach the
1467 	 * same depth we were at originally.
1468 	 */
1469 	for (blk++, level++; level < path->active; blk++, level++) {
1470 		/*
1471 		 * Release the old block.
1472 		 * (if it's dirty, trans won't actually let go)
1473 		 */
1474 		if (release)
1475 			xfs_da_brelse(args->trans, blk->bp);
1476 
1477 		/*
1478 		 * Read the next child block.
1479 		 */
1480 		blk->blkno = blkno;
1481 		error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
1482 						     &blk->bp, args->whichfork);
1483 		if (error)
1484 			return(error);
1485 		ASSERT(blk->bp != NULL);
1486 		info = blk->bp->data;
1487 		ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1488 		       info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1489 		       info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1490 		blk->magic = be16_to_cpu(info->magic);
1491 		if (blk->magic == XFS_DA_NODE_MAGIC) {
1492 			node = (xfs_da_intnode_t *)info;
1493 			blk->hashval = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
1494 			if (forward)
1495 				blk->index = 0;
1496 			else
1497 				blk->index = be16_to_cpu(node->hdr.count)-1;
1498 			blkno = be32_to_cpu(node->btree[blk->index].before);
1499 		} else {
1500 			ASSERT(level == path->active-1);
1501 			blk->index = 0;
1502 			switch(blk->magic) {
1503 			case XFS_ATTR_LEAF_MAGIC:
1504 				blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1505 								      NULL);
1506 				break;
1507 			case XFS_DIR2_LEAFN_MAGIC:
1508 				blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1509 								       NULL);
1510 				break;
1511 			default:
1512 				ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
1513 				       blk->magic == XFS_DIR2_LEAFN_MAGIC);
1514 				break;
1515 			}
1516 		}
1517 	}
1518 	*result = 0;
1519 	return(0);
1520 }
1521 
1522 
1523 /*========================================================================
1524  * Utility routines.
1525  *========================================================================*/
1526 
1527 /*
1528  * Implement a simple hash on a character string.
1529  * Rotate the hash value by 7 bits, then XOR each character in.
1530  * This is implemented with some source-level loop unrolling.
1531  */
1532 xfs_dahash_t
xfs_da_hashname(const __uint8_t * name,int namelen)1533 xfs_da_hashname(const __uint8_t *name, int namelen)
1534 {
1535 	xfs_dahash_t hash;
1536 
1537 	/*
1538 	 * Do four characters at a time as long as we can.
1539 	 */
1540 	for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
1541 		hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
1542 		       (name[3] << 0) ^ rol32(hash, 7 * 4);
1543 
1544 	/*
1545 	 * Now do the rest of the characters.
1546 	 */
1547 	switch (namelen) {
1548 	case 3:
1549 		return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
1550 		       rol32(hash, 7 * 3);
1551 	case 2:
1552 		return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
1553 	case 1:
1554 		return (name[0] << 0) ^ rol32(hash, 7 * 1);
1555 	default: /* case 0: */
1556 		return hash;
1557 	}
1558 }
1559 
1560 enum xfs_dacmp
xfs_da_compname(struct xfs_da_args * args,const unsigned char * name,int len)1561 xfs_da_compname(
1562 	struct xfs_da_args *args,
1563 	const unsigned char *name,
1564 	int		len)
1565 {
1566 	return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
1567 					XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
1568 }
1569 
1570 static xfs_dahash_t
xfs_default_hashname(struct xfs_name * name)1571 xfs_default_hashname(
1572 	struct xfs_name	*name)
1573 {
1574 	return xfs_da_hashname(name->name, name->len);
1575 }
1576 
1577 const struct xfs_nameops xfs_default_nameops = {
1578 	.hashname	= xfs_default_hashname,
1579 	.compname	= xfs_da_compname
1580 };
1581 
1582 int
xfs_da_grow_inode_int(struct xfs_da_args * args,xfs_fileoff_t * bno,int count)1583 xfs_da_grow_inode_int(
1584 	struct xfs_da_args	*args,
1585 	xfs_fileoff_t		*bno,
1586 	int			count)
1587 {
1588 	struct xfs_trans	*tp = args->trans;
1589 	struct xfs_inode	*dp = args->dp;
1590 	int			w = args->whichfork;
1591 	xfs_drfsbno_t		nblks = dp->i_d.di_nblocks;
1592 	struct xfs_bmbt_irec	map, *mapp;
1593 	int			nmap, error, got, i, mapi;
1594 
1595 	/*
1596 	 * Find a spot in the file space to put the new block.
1597 	 */
1598 	error = xfs_bmap_first_unused(tp, dp, count, bno, w);
1599 	if (error)
1600 		return error;
1601 
1602 	/*
1603 	 * Try mapping it in one filesystem block.
1604 	 */
1605 	nmap = 1;
1606 	ASSERT(args->firstblock != NULL);
1607 	error = xfs_bmapi_write(tp, dp, *bno, count,
1608 			xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
1609 			args->firstblock, args->total, &map, &nmap,
1610 			args->flist);
1611 	if (error)
1612 		return error;
1613 
1614 	ASSERT(nmap <= 1);
1615 	if (nmap == 1) {
1616 		mapp = &map;
1617 		mapi = 1;
1618 	} else if (nmap == 0 && count > 1) {
1619 		xfs_fileoff_t		b;
1620 		int			c;
1621 
1622 		/*
1623 		 * If we didn't get it and the block might work if fragmented,
1624 		 * try without the CONTIG flag.  Loop until we get it all.
1625 		 */
1626 		mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
1627 		for (b = *bno, mapi = 0; b < *bno + count; ) {
1628 			nmap = MIN(XFS_BMAP_MAX_NMAP, count);
1629 			c = (int)(*bno + count - b);
1630 			error = xfs_bmapi_write(tp, dp, b, c,
1631 					xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
1632 					args->firstblock, args->total,
1633 					&mapp[mapi], &nmap, args->flist);
1634 			if (error)
1635 				goto out_free_map;
1636 			if (nmap < 1)
1637 				break;
1638 			mapi += nmap;
1639 			b = mapp[mapi - 1].br_startoff +
1640 			    mapp[mapi - 1].br_blockcount;
1641 		}
1642 	} else {
1643 		mapi = 0;
1644 		mapp = NULL;
1645 	}
1646 
1647 	/*
1648 	 * Count the blocks we got, make sure it matches the total.
1649 	 */
1650 	for (i = 0, got = 0; i < mapi; i++)
1651 		got += mapp[i].br_blockcount;
1652 	if (got != count || mapp[0].br_startoff != *bno ||
1653 	    mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1654 	    *bno + count) {
1655 		error = XFS_ERROR(ENOSPC);
1656 		goto out_free_map;
1657 	}
1658 
1659 	/* account for newly allocated blocks in reserved blocks total */
1660 	args->total -= dp->i_d.di_nblocks - nblks;
1661 
1662 out_free_map:
1663 	if (mapp != &map)
1664 		kmem_free(mapp);
1665 	return error;
1666 }
1667 
1668 /*
1669  * Add a block to the btree ahead of the file.
1670  * Return the new block number to the caller.
1671  */
1672 int
xfs_da_grow_inode(struct xfs_da_args * args,xfs_dablk_t * new_blkno)1673 xfs_da_grow_inode(
1674 	struct xfs_da_args	*args,
1675 	xfs_dablk_t		*new_blkno)
1676 {
1677 	xfs_fileoff_t		bno;
1678 	int			count;
1679 	int			error;
1680 
1681 	trace_xfs_da_grow_inode(args);
1682 
1683 	if (args->whichfork == XFS_DATA_FORK) {
1684 		bno = args->dp->i_mount->m_dirleafblk;
1685 		count = args->dp->i_mount->m_dirblkfsbs;
1686 	} else {
1687 		bno = 0;
1688 		count = 1;
1689 	}
1690 
1691 	error = xfs_da_grow_inode_int(args, &bno, count);
1692 	if (!error)
1693 		*new_blkno = (xfs_dablk_t)bno;
1694 	return error;
1695 }
1696 
1697 /*
1698  * Ick.  We need to always be able to remove a btree block, even
1699  * if there's no space reservation because the filesystem is full.
1700  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
1701  * It swaps the target block with the last block in the file.  The
1702  * last block in the file can always be removed since it can't cause
1703  * a bmap btree split to do that.
1704  */
1705 STATIC int
xfs_da_swap_lastblock(xfs_da_args_t * args,xfs_dablk_t * dead_blknop,xfs_dabuf_t ** dead_bufp)1706 xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1707 		      xfs_dabuf_t **dead_bufp)
1708 {
1709 	xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
1710 	xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
1711 	xfs_fileoff_t lastoff;
1712 	xfs_inode_t *ip;
1713 	xfs_trans_t *tp;
1714 	xfs_mount_t *mp;
1715 	int error, w, entno, level, dead_level;
1716 	xfs_da_blkinfo_t *dead_info, *sib_info;
1717 	xfs_da_intnode_t *par_node, *dead_node;
1718 	xfs_dir2_leaf_t *dead_leaf2;
1719 	xfs_dahash_t dead_hash;
1720 
1721 	trace_xfs_da_swap_lastblock(args);
1722 
1723 	dead_buf = *dead_bufp;
1724 	dead_blkno = *dead_blknop;
1725 	tp = args->trans;
1726 	ip = args->dp;
1727 	w = args->whichfork;
1728 	ASSERT(w == XFS_DATA_FORK);
1729 	mp = ip->i_mount;
1730 	lastoff = mp->m_dirfreeblk;
1731 	error = xfs_bmap_last_before(tp, ip, &lastoff, w);
1732 	if (error)
1733 		return error;
1734 	if (unlikely(lastoff == 0)) {
1735 		XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
1736 				 mp);
1737 		return XFS_ERROR(EFSCORRUPTED);
1738 	}
1739 	/*
1740 	 * Read the last block in the btree space.
1741 	 */
1742 	last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
1743 	if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)))
1744 		return error;
1745 	/*
1746 	 * Copy the last block into the dead buffer and log it.
1747 	 */
1748 	memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
1749 	xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
1750 	dead_info = dead_buf->data;
1751 	/*
1752 	 * Get values from the moved block.
1753 	 */
1754 	if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
1755 		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
1756 		dead_level = 0;
1757 		dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
1758 	} else {
1759 		ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1760 		dead_node = (xfs_da_intnode_t *)dead_info;
1761 		dead_level = be16_to_cpu(dead_node->hdr.level);
1762 		dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
1763 	}
1764 	sib_buf = par_buf = NULL;
1765 	/*
1766 	 * If the moved block has a left sibling, fix up the pointers.
1767 	 */
1768 	if ((sib_blkno = be32_to_cpu(dead_info->back))) {
1769 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1770 			goto done;
1771 		sib_info = sib_buf->data;
1772 		if (unlikely(
1773 		    be32_to_cpu(sib_info->forw) != last_blkno ||
1774 		    sib_info->magic != dead_info->magic)) {
1775 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
1776 					 XFS_ERRLEVEL_LOW, mp);
1777 			error = XFS_ERROR(EFSCORRUPTED);
1778 			goto done;
1779 		}
1780 		sib_info->forw = cpu_to_be32(dead_blkno);
1781 		xfs_da_log_buf(tp, sib_buf,
1782 			XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
1783 					sizeof(sib_info->forw)));
1784 		xfs_da_buf_done(sib_buf);
1785 		sib_buf = NULL;
1786 	}
1787 	/*
1788 	 * If the moved block has a right sibling, fix up the pointers.
1789 	 */
1790 	if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
1791 		if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1792 			goto done;
1793 		sib_info = sib_buf->data;
1794 		if (unlikely(
1795 		       be32_to_cpu(sib_info->back) != last_blkno ||
1796 		       sib_info->magic != dead_info->magic)) {
1797 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
1798 					 XFS_ERRLEVEL_LOW, mp);
1799 			error = XFS_ERROR(EFSCORRUPTED);
1800 			goto done;
1801 		}
1802 		sib_info->back = cpu_to_be32(dead_blkno);
1803 		xfs_da_log_buf(tp, sib_buf,
1804 			XFS_DA_LOGRANGE(sib_info, &sib_info->back,
1805 					sizeof(sib_info->back)));
1806 		xfs_da_buf_done(sib_buf);
1807 		sib_buf = NULL;
1808 	}
1809 	par_blkno = mp->m_dirleafblk;
1810 	level = -1;
1811 	/*
1812 	 * Walk down the tree looking for the parent of the moved block.
1813 	 */
1814 	for (;;) {
1815 		if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1816 			goto done;
1817 		par_node = par_buf->data;
1818 		if (unlikely(par_node->hdr.info.magic !=
1819 		    cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1820 		    (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
1821 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
1822 					 XFS_ERRLEVEL_LOW, mp);
1823 			error = XFS_ERROR(EFSCORRUPTED);
1824 			goto done;
1825 		}
1826 		level = be16_to_cpu(par_node->hdr.level);
1827 		for (entno = 0;
1828 		     entno < be16_to_cpu(par_node->hdr.count) &&
1829 		     be32_to_cpu(par_node->btree[entno].hashval) < dead_hash;
1830 		     entno++)
1831 			continue;
1832 		if (unlikely(entno == be16_to_cpu(par_node->hdr.count))) {
1833 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
1834 					 XFS_ERRLEVEL_LOW, mp);
1835 			error = XFS_ERROR(EFSCORRUPTED);
1836 			goto done;
1837 		}
1838 		par_blkno = be32_to_cpu(par_node->btree[entno].before);
1839 		if (level == dead_level + 1)
1840 			break;
1841 		xfs_da_brelse(tp, par_buf);
1842 		par_buf = NULL;
1843 	}
1844 	/*
1845 	 * We're in the right parent block.
1846 	 * Look for the right entry.
1847 	 */
1848 	for (;;) {
1849 		for (;
1850 		     entno < be16_to_cpu(par_node->hdr.count) &&
1851 		     be32_to_cpu(par_node->btree[entno].before) != last_blkno;
1852 		     entno++)
1853 			continue;
1854 		if (entno < be16_to_cpu(par_node->hdr.count))
1855 			break;
1856 		par_blkno = be32_to_cpu(par_node->hdr.info.forw);
1857 		xfs_da_brelse(tp, par_buf);
1858 		par_buf = NULL;
1859 		if (unlikely(par_blkno == 0)) {
1860 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
1861 					 XFS_ERRLEVEL_LOW, mp);
1862 			error = XFS_ERROR(EFSCORRUPTED);
1863 			goto done;
1864 		}
1865 		if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1866 			goto done;
1867 		par_node = par_buf->data;
1868 		if (unlikely(
1869 		    be16_to_cpu(par_node->hdr.level) != level ||
1870 		    par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) {
1871 			XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
1872 					 XFS_ERRLEVEL_LOW, mp);
1873 			error = XFS_ERROR(EFSCORRUPTED);
1874 			goto done;
1875 		}
1876 		entno = 0;
1877 	}
1878 	/*
1879 	 * Update the parent entry pointing to the moved block.
1880 	 */
1881 	par_node->btree[entno].before = cpu_to_be32(dead_blkno);
1882 	xfs_da_log_buf(tp, par_buf,
1883 		XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
1884 				sizeof(par_node->btree[entno].before)));
1885 	xfs_da_buf_done(par_buf);
1886 	xfs_da_buf_done(dead_buf);
1887 	*dead_blknop = last_blkno;
1888 	*dead_bufp = last_buf;
1889 	return 0;
1890 done:
1891 	if (par_buf)
1892 		xfs_da_brelse(tp, par_buf);
1893 	if (sib_buf)
1894 		xfs_da_brelse(tp, sib_buf);
1895 	xfs_da_brelse(tp, last_buf);
1896 	return error;
1897 }
1898 
1899 /*
1900  * Remove a btree block from a directory or attribute.
1901  */
1902 int
xfs_da_shrink_inode(xfs_da_args_t * args,xfs_dablk_t dead_blkno,xfs_dabuf_t * dead_buf)1903 xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1904 		    xfs_dabuf_t *dead_buf)
1905 {
1906 	xfs_inode_t *dp;
1907 	int done, error, w, count;
1908 	xfs_trans_t *tp;
1909 	xfs_mount_t *mp;
1910 
1911 	trace_xfs_da_shrink_inode(args);
1912 
1913 	dp = args->dp;
1914 	w = args->whichfork;
1915 	tp = args->trans;
1916 	mp = dp->i_mount;
1917 	if (w == XFS_DATA_FORK)
1918 		count = mp->m_dirblkfsbs;
1919 	else
1920 		count = 1;
1921 	for (;;) {
1922 		/*
1923 		 * Remove extents.  If we get ENOSPC for a dir we have to move
1924 		 * the last block to the place we want to kill.
1925 		 */
1926 		if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
1927 				xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
1928 				0, args->firstblock, args->flist,
1929 				&done)) == ENOSPC) {
1930 			if (w != XFS_DATA_FORK)
1931 				break;
1932 			if ((error = xfs_da_swap_lastblock(args, &dead_blkno,
1933 					&dead_buf)))
1934 				break;
1935 		} else {
1936 			break;
1937 		}
1938 	}
1939 	xfs_da_binval(tp, dead_buf);
1940 	return error;
1941 }
1942 
1943 /*
1944  * See if the mapping(s) for this btree block are valid, i.e.
1945  * don't contain holes, are logically contiguous, and cover the whole range.
1946  */
1947 STATIC int
xfs_da_map_covers_blocks(int nmap,xfs_bmbt_irec_t * mapp,xfs_dablk_t bno,int count)1948 xfs_da_map_covers_blocks(
1949 	int		nmap,
1950 	xfs_bmbt_irec_t	*mapp,
1951 	xfs_dablk_t	bno,
1952 	int		count)
1953 {
1954 	int		i;
1955 	xfs_fileoff_t	off;
1956 
1957 	for (i = 0, off = bno; i < nmap; i++) {
1958 		if (mapp[i].br_startblock == HOLESTARTBLOCK ||
1959 		    mapp[i].br_startblock == DELAYSTARTBLOCK) {
1960 			return 0;
1961 		}
1962 		if (off != mapp[i].br_startoff) {
1963 			return 0;
1964 		}
1965 		off += mapp[i].br_blockcount;
1966 	}
1967 	return off == bno + count;
1968 }
1969 
1970 /*
1971  * Make a dabuf.
1972  * Used for get_buf, read_buf, read_bufr, and reada_buf.
1973  */
1974 STATIC int
xfs_da_do_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,xfs_daddr_t * mappedbnop,xfs_dabuf_t ** bpp,int whichfork,int caller)1975 xfs_da_do_buf(
1976 	xfs_trans_t	*trans,
1977 	xfs_inode_t	*dp,
1978 	xfs_dablk_t	bno,
1979 	xfs_daddr_t	*mappedbnop,
1980 	xfs_dabuf_t	**bpp,
1981 	int		whichfork,
1982 	int		caller)
1983 {
1984 	xfs_buf_t	*bp = NULL;
1985 	xfs_buf_t	**bplist;
1986 	int		error=0;
1987 	int		i;
1988 	xfs_bmbt_irec_t	map;
1989 	xfs_bmbt_irec_t	*mapp;
1990 	xfs_daddr_t	mappedbno;
1991 	xfs_mount_t	*mp;
1992 	int		nbplist=0;
1993 	int		nfsb;
1994 	int		nmap;
1995 	xfs_dabuf_t	*rbp;
1996 
1997 	mp = dp->i_mount;
1998 	nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1;
1999 	mappedbno = *mappedbnop;
2000 	/*
2001 	 * Caller doesn't have a mapping.  -2 means don't complain
2002 	 * if we land in a hole.
2003 	 */
2004 	if (mappedbno == -1 || mappedbno == -2) {
2005 		/*
2006 		 * Optimize the one-block case.
2007 		 */
2008 		if (nfsb == 1)
2009 			mapp = &map;
2010 		else
2011 			mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
2012 
2013 		nmap = nfsb;
2014 		error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, mapp,
2015 				       &nmap, xfs_bmapi_aflag(whichfork));
2016 		if (error)
2017 			goto exit0;
2018 	} else {
2019 		map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
2020 		map.br_startoff = (xfs_fileoff_t)bno;
2021 		map.br_blockcount = nfsb;
2022 		mapp = &map;
2023 		nmap = 1;
2024 	}
2025 	if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
2026 		error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
2027 		if (unlikely(error == EFSCORRUPTED)) {
2028 			if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
2029 				xfs_alert(mp, "%s: bno %lld dir: inode %lld",
2030 					__func__, (long long)bno,
2031 					(long long)dp->i_ino);
2032 				for (i = 0; i < nmap; i++) {
2033 					xfs_alert(mp,
2034 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2035 						i,
2036 						(long long)mapp[i].br_startoff,
2037 						(long long)mapp[i].br_startblock,
2038 						(long long)mapp[i].br_blockcount,
2039 						mapp[i].br_state);
2040 				}
2041 			}
2042 			XFS_ERROR_REPORT("xfs_da_do_buf(1)",
2043 					 XFS_ERRLEVEL_LOW, mp);
2044 		}
2045 		goto exit0;
2046 	}
2047 	if (caller != 3 && nmap > 1) {
2048 		bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
2049 		nbplist = 0;
2050 	} else
2051 		bplist = NULL;
2052 	/*
2053 	 * Turn the mapping(s) into buffer(s).
2054 	 */
2055 	for (i = 0; i < nmap; i++) {
2056 		int	nmapped;
2057 
2058 		mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
2059 		if (i == 0)
2060 			*mappedbnop = mappedbno;
2061 		nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
2062 		switch (caller) {
2063 		case 0:
2064 			bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
2065 				mappedbno, nmapped, 0);
2066 			error = bp ? bp->b_error : XFS_ERROR(EIO);
2067 			break;
2068 		case 1:
2069 		case 2:
2070 			bp = NULL;
2071 			error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
2072 				mappedbno, nmapped, 0, &bp);
2073 			break;
2074 		case 3:
2075 			xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped);
2076 			error = 0;
2077 			bp = NULL;
2078 			break;
2079 		}
2080 		if (error) {
2081 			if (bp)
2082 				xfs_trans_brelse(trans, bp);
2083 			goto exit1;
2084 		}
2085 		if (!bp)
2086 			continue;
2087 		if (caller == 1) {
2088 			if (whichfork == XFS_ATTR_FORK)
2089 				xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
2090 			else
2091 				xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
2092 		}
2093 		if (bplist) {
2094 			bplist[nbplist++] = bp;
2095 		}
2096 	}
2097 	/*
2098 	 * Build a dabuf structure.
2099 	 */
2100 	if (bplist) {
2101 		rbp = xfs_da_buf_make(nbplist, bplist);
2102 	} else if (bp)
2103 		rbp = xfs_da_buf_make(1, &bp);
2104 	else
2105 		rbp = NULL;
2106 	/*
2107 	 * For read_buf, check the magic number.
2108 	 */
2109 	if (caller == 1) {
2110 		xfs_dir2_data_hdr_t	*hdr = rbp->data;
2111 		xfs_dir2_free_t		*free = rbp->data;
2112 		xfs_da_blkinfo_t	*info = rbp->data;
2113 		uint			magic, magic1;
2114 
2115 		magic = be16_to_cpu(info->magic);
2116 		magic1 = be32_to_cpu(hdr->magic);
2117 		if (unlikely(
2118 		    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2119 				   (magic != XFS_ATTR_LEAF_MAGIC) &&
2120 				   (magic != XFS_DIR2_LEAF1_MAGIC) &&
2121 				   (magic != XFS_DIR2_LEAFN_MAGIC) &&
2122 				   (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
2123 				   (magic1 != XFS_DIR2_DATA_MAGIC) &&
2124 				   (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)),
2125 				mp, XFS_ERRTAG_DA_READ_BUF,
2126 				XFS_RANDOM_DA_READ_BUF))) {
2127 			trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
2128 			XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
2129 					     XFS_ERRLEVEL_LOW, mp, info);
2130 			error = XFS_ERROR(EFSCORRUPTED);
2131 			xfs_da_brelse(trans, rbp);
2132 			nbplist = 0;
2133 			goto exit1;
2134 		}
2135 	}
2136 	if (bplist) {
2137 		kmem_free(bplist);
2138 	}
2139 	if (mapp != &map) {
2140 		kmem_free(mapp);
2141 	}
2142 	if (bpp)
2143 		*bpp = rbp;
2144 	return 0;
2145 exit1:
2146 	if (bplist) {
2147 		for (i = 0; i < nbplist; i++)
2148 			xfs_trans_brelse(trans, bplist[i]);
2149 		kmem_free(bplist);
2150 	}
2151 exit0:
2152 	if (mapp != &map)
2153 		kmem_free(mapp);
2154 	if (bpp)
2155 		*bpp = NULL;
2156 	return error;
2157 }
2158 
2159 /*
2160  * Get a buffer for the dir/attr block.
2161  */
2162 int
xfs_da_get_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,xfs_daddr_t mappedbno,xfs_dabuf_t ** bpp,int whichfork)2163 xfs_da_get_buf(
2164 	xfs_trans_t	*trans,
2165 	xfs_inode_t	*dp,
2166 	xfs_dablk_t	bno,
2167 	xfs_daddr_t		mappedbno,
2168 	xfs_dabuf_t	**bpp,
2169 	int		whichfork)
2170 {
2171 	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0);
2172 }
2173 
2174 /*
2175  * Get a buffer for the dir/attr block, fill in the contents.
2176  */
2177 int
xfs_da_read_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,xfs_daddr_t mappedbno,xfs_dabuf_t ** bpp,int whichfork)2178 xfs_da_read_buf(
2179 	xfs_trans_t	*trans,
2180 	xfs_inode_t	*dp,
2181 	xfs_dablk_t	bno,
2182 	xfs_daddr_t		mappedbno,
2183 	xfs_dabuf_t	**bpp,
2184 	int		whichfork)
2185 {
2186 	return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1);
2187 }
2188 
2189 /*
2190  * Readahead the dir/attr block.
2191  */
2192 xfs_daddr_t
xfs_da_reada_buf(xfs_trans_t * trans,xfs_inode_t * dp,xfs_dablk_t bno,int whichfork)2193 xfs_da_reada_buf(
2194 	xfs_trans_t	*trans,
2195 	xfs_inode_t	*dp,
2196 	xfs_dablk_t	bno,
2197 	int		whichfork)
2198 {
2199 	xfs_daddr_t		rval;
2200 
2201 	rval = -1;
2202 	if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3))
2203 		return -1;
2204 	else
2205 		return rval;
2206 }
2207 
2208 kmem_zone_t *xfs_da_state_zone;	/* anchor for state struct zone */
2209 kmem_zone_t *xfs_dabuf_zone;		/* dabuf zone */
2210 
2211 /*
2212  * Allocate a dir-state structure.
2213  * We don't put them on the stack since they're large.
2214  */
2215 xfs_da_state_t *
xfs_da_state_alloc(void)2216 xfs_da_state_alloc(void)
2217 {
2218 	return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
2219 }
2220 
2221 /*
2222  * Kill the altpath contents of a da-state structure.
2223  */
2224 STATIC void
xfs_da_state_kill_altpath(xfs_da_state_t * state)2225 xfs_da_state_kill_altpath(xfs_da_state_t *state)
2226 {
2227 	int	i;
2228 
2229 	for (i = 0; i < state->altpath.active; i++) {
2230 		if (state->altpath.blk[i].bp) {
2231 			if (state->altpath.blk[i].bp != state->path.blk[i].bp)
2232 				xfs_da_buf_done(state->altpath.blk[i].bp);
2233 			state->altpath.blk[i].bp = NULL;
2234 		}
2235 	}
2236 	state->altpath.active = 0;
2237 }
2238 
2239 /*
2240  * Free a da-state structure.
2241  */
2242 void
xfs_da_state_free(xfs_da_state_t * state)2243 xfs_da_state_free(xfs_da_state_t *state)
2244 {
2245 	int	i;
2246 
2247 	xfs_da_state_kill_altpath(state);
2248 	for (i = 0; i < state->path.active; i++) {
2249 		if (state->path.blk[i].bp)
2250 			xfs_da_buf_done(state->path.blk[i].bp);
2251 	}
2252 	if (state->extravalid && state->extrablk.bp)
2253 		xfs_da_buf_done(state->extrablk.bp);
2254 #ifdef DEBUG
2255 	memset((char *)state, 0, sizeof(*state));
2256 #endif /* DEBUG */
2257 	kmem_zone_free(xfs_da_state_zone, state);
2258 }
2259 
2260 /*
2261  * Create a dabuf.
2262  */
2263 /* ARGSUSED */
2264 STATIC xfs_dabuf_t *
xfs_da_buf_make(int nbuf,xfs_buf_t ** bps)2265 xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
2266 {
2267 	xfs_buf_t	*bp;
2268 	xfs_dabuf_t	*dabuf;
2269 	int		i;
2270 	int		off;
2271 
2272 	if (nbuf == 1)
2273 		dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
2274 	else
2275 		dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
2276 	dabuf->dirty = 0;
2277 	if (nbuf == 1) {
2278 		dabuf->nbuf = 1;
2279 		bp = bps[0];
2280 		dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
2281 		dabuf->data = bp->b_addr;
2282 		dabuf->bps[0] = bp;
2283 	} else {
2284 		dabuf->nbuf = nbuf;
2285 		for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
2286 			dabuf->bps[i] = bp = bps[i];
2287 			dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
2288 		}
2289 		dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
2290 		for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2291 			bp = bps[i];
2292 			memcpy((char *)dabuf->data + off, bp->b_addr,
2293 				XFS_BUF_COUNT(bp));
2294 		}
2295 	}
2296 	return dabuf;
2297 }
2298 
2299 /*
2300  * Un-dirty a dabuf.
2301  */
2302 STATIC void
xfs_da_buf_clean(xfs_dabuf_t * dabuf)2303 xfs_da_buf_clean(xfs_dabuf_t *dabuf)
2304 {
2305 	xfs_buf_t	*bp;
2306 	int		i;
2307 	int		off;
2308 
2309 	if (dabuf->dirty) {
2310 		ASSERT(dabuf->nbuf > 1);
2311 		dabuf->dirty = 0;
2312 		for (i = off = 0; i < dabuf->nbuf;
2313 				i++, off += XFS_BUF_COUNT(bp)) {
2314 			bp = dabuf->bps[i];
2315 			memcpy(bp->b_addr, dabuf->data + off,
2316 						XFS_BUF_COUNT(bp));
2317 		}
2318 	}
2319 }
2320 
2321 /*
2322  * Release a dabuf.
2323  */
2324 void
xfs_da_buf_done(xfs_dabuf_t * dabuf)2325 xfs_da_buf_done(xfs_dabuf_t *dabuf)
2326 {
2327 	ASSERT(dabuf);
2328 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2329 	if (dabuf->dirty)
2330 		xfs_da_buf_clean(dabuf);
2331 	if (dabuf->nbuf > 1) {
2332 		kmem_free(dabuf->data);
2333 		kmem_free(dabuf);
2334 	} else {
2335 		kmem_zone_free(xfs_dabuf_zone, dabuf);
2336 	}
2337 }
2338 
2339 /*
2340  * Log transaction from a dabuf.
2341  */
2342 void
xfs_da_log_buf(xfs_trans_t * tp,xfs_dabuf_t * dabuf,uint first,uint last)2343 xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
2344 {
2345 	xfs_buf_t	*bp;
2346 	uint		f;
2347 	int		i;
2348 	uint		l;
2349 	int		off;
2350 
2351 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2352 	if (dabuf->nbuf == 1) {
2353 		ASSERT(dabuf->data == dabuf->bps[0]->b_addr);
2354 		xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
2355 		return;
2356 	}
2357 	dabuf->dirty = 1;
2358 	ASSERT(first <= last);
2359 	for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2360 		bp = dabuf->bps[i];
2361 		f = off;
2362 		l = f + XFS_BUF_COUNT(bp) - 1;
2363 		if (f < first)
2364 			f = first;
2365 		if (l > last)
2366 			l = last;
2367 		if (f <= l)
2368 			xfs_trans_log_buf(tp, bp, f - off, l - off);
2369 		/*
2370 		 * B_DONE is set by xfs_trans_log buf.
2371 		 * If we don't set it on a new buffer (get not read)
2372 		 * then if we don't put anything in the buffer it won't
2373 		 * be set, and at commit it it released into the cache,
2374 		 * and then a read will fail.
2375 		 */
2376 		else if (!(XFS_BUF_ISDONE(bp)))
2377 		  XFS_BUF_DONE(bp);
2378 	}
2379 	ASSERT(last < off);
2380 }
2381 
2382 /*
2383  * Release dabuf from a transaction.
2384  * Have to free up the dabuf before the buffers are released,
2385  * since the synchronization on the dabuf is really the lock on the buffer.
2386  */
2387 void
xfs_da_brelse(xfs_trans_t * tp,xfs_dabuf_t * dabuf)2388 xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2389 {
2390 	xfs_buf_t	*bp;
2391 	xfs_buf_t	**bplist;
2392 	int		i;
2393 	int		nbuf;
2394 
2395 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2396 	if ((nbuf = dabuf->nbuf) == 1) {
2397 		bplist = &bp;
2398 		bp = dabuf->bps[0];
2399 	} else {
2400 		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2401 		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2402 	}
2403 	xfs_da_buf_done(dabuf);
2404 	for (i = 0; i < nbuf; i++)
2405 		xfs_trans_brelse(tp, bplist[i]);
2406 	if (bplist != &bp)
2407 		kmem_free(bplist);
2408 }
2409 
2410 /*
2411  * Invalidate dabuf from a transaction.
2412  */
2413 void
xfs_da_binval(xfs_trans_t * tp,xfs_dabuf_t * dabuf)2414 xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2415 {
2416 	xfs_buf_t	*bp;
2417 	xfs_buf_t	**bplist;
2418 	int		i;
2419 	int		nbuf;
2420 
2421 	ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2422 	if ((nbuf = dabuf->nbuf) == 1) {
2423 		bplist = &bp;
2424 		bp = dabuf->bps[0];
2425 	} else {
2426 		bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2427 		memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2428 	}
2429 	xfs_da_buf_done(dabuf);
2430 	for (i = 0; i < nbuf; i++)
2431 		xfs_trans_binval(tp, bplist[i]);
2432 	if (bplist != &bp)
2433 		kmem_free(bplist);
2434 }
2435 
2436 /*
2437  * Get the first daddr from a dabuf.
2438  */
2439 xfs_daddr_t
xfs_da_blkno(xfs_dabuf_t * dabuf)2440 xfs_da_blkno(xfs_dabuf_t *dabuf)
2441 {
2442 	ASSERT(dabuf->nbuf);
2443 	ASSERT(dabuf->data);
2444 	return XFS_BUF_ADDR(dabuf->bps[0]);
2445 }
2446