1 /*
2  * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32 
33 /*
34  * xfs_dir2_leaf.c
35  * XFS directory version 2 implementation - single leaf form
36  * see xfs_dir2_leaf.h for data structures.
37  * These directories have multiple XFS_DIR2_DATA blocks and one
38  * XFS_DIR2_LEAF1 block containing the hash table and freespace map.
39  */
40 
41 #include "xfs.h"
42 
43 #include "xfs_macros.h"
44 #include "xfs_types.h"
45 #include "xfs_inum.h"
46 #include "xfs_log.h"
47 #include "xfs_trans.h"
48 #include "xfs_sb.h"
49 #include "xfs_ag.h"
50 #include "xfs_dir.h"
51 #include "xfs_dir2.h"
52 #include "xfs_dmapi.h"
53 #include "xfs_mount.h"
54 #include "xfs_bmap_btree.h"
55 #include "xfs_attr_sf.h"
56 #include "xfs_dir_sf.h"
57 #include "xfs_dir2_sf.h"
58 #include "xfs_dinode.h"
59 #include "xfs_inode.h"
60 #include "xfs_bmap.h"
61 #include "xfs_da_btree.h"
62 #include "xfs_dir2_data.h"
63 #include "xfs_dir2_leaf.h"
64 #include "xfs_dir2_block.h"
65 #include "xfs_dir2_node.h"
66 #include "xfs_dir2_trace.h"
67 #include "xfs_error.h"
68 #include "xfs_bit.h"
69 
70 /*
71  * Local function declarations.
72  */
73 #ifdef DEBUG
74 static void xfs_dir2_leaf_check(xfs_inode_t *dp, xfs_dabuf_t *bp);
75 #else
76 #define	xfs_dir2_leaf_check(dp, bp)
77 #endif
78 static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **lbpp,
79 				    int *indexp, xfs_dabuf_t **dbpp);
80 
81 /*
82  * Convert a block form directory to a leaf form directory.
83  */
84 int						/* error */
xfs_dir2_block_to_leaf(xfs_da_args_t * args,xfs_dabuf_t * dbp)85 xfs_dir2_block_to_leaf(
86 	xfs_da_args_t		*args,		/* operation arguments */
87 	xfs_dabuf_t		*dbp)		/* input block's buffer */
88 {
89 	xfs_dir2_data_off_t	*bestsp;	/* leaf's bestsp entries */
90 	xfs_dablk_t		blkno;		/* leaf block's bno */
91 	xfs_dir2_block_t	*block;		/* block structure */
92 	xfs_dir2_leaf_entry_t	*blp;		/* block's leaf entries */
93 	xfs_dir2_block_tail_t	*btp;		/* block's tail */
94 	xfs_inode_t		*dp;		/* incore directory inode */
95 	int			error;		/* error return code */
96 	xfs_dabuf_t		*lbp;		/* leaf block's buffer */
97 	xfs_dir2_db_t		ldb;		/* leaf block's bno */
98 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
99 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf's tail */
100 	xfs_mount_t		*mp;		/* filesystem mount point */
101 	int			needlog;	/* need to log block header */
102 	int			needscan;	/* need to rescan bestfree */
103 	xfs_trans_t		*tp;		/* transaction pointer */
104 
105 	xfs_dir2_trace_args_b("block_to_leaf", args, dbp);
106 	dp = args->dp;
107 	mp = dp->i_mount;
108 	tp = args->trans;
109 	/*
110 	 * Add the leaf block to the inode.
111 	 * This interface will only put blocks in the leaf/node range.
112 	 * Since that's empty now, we'll get the root (block 0 in range).
113 	 */
114 	if ((error = xfs_da_grow_inode(args, &blkno))) {
115 		return error;
116 	}
117 	ldb = XFS_DIR2_DA_TO_DB(mp, blkno);
118 	ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
119 	/*
120 	 * Initialize the leaf block, get a buffer for it.
121 	 */
122 	if ((error = xfs_dir2_leaf_init(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC))) {
123 		return error;
124 	}
125 	ASSERT(lbp != NULL);
126 	leaf = lbp->data;
127 	block = dbp->data;
128 	xfs_dir2_data_check(dp, dbp);
129 	btp = XFS_DIR2_BLOCK_TAIL_P(mp, block);
130 	blp = XFS_DIR2_BLOCK_LEAF_P_ARCH(btp, ARCH_CONVERT);
131 	/*
132 	 * Set the counts in the leaf header.
133 	 */
134 	INT_COPY(leaf->hdr.count, btp->count, ARCH_CONVERT); /* INT_: type change */
135 	INT_COPY(leaf->hdr.stale, btp->stale, ARCH_CONVERT); /* INT_: type change */
136 	/*
137 	 * Could compact these but I think we always do the conversion
138 	 * after squeezing out stale entries.
139 	 */
140 	memcpy(leaf->ents, blp, INT_GET(btp->count, ARCH_CONVERT) * sizeof(xfs_dir2_leaf_entry_t));
141 	xfs_dir2_leaf_log_ents(tp, lbp, 0, INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1);
142 	needscan = 0;
143 	needlog = 1;
144 	/*
145 	 * Make the space formerly occupied by the leaf entries and block
146 	 * tail be free.
147 	 */
148 	xfs_dir2_data_make_free(tp, dbp,
149 		(xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
150 		(xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize -
151 				       (char *)blp),
152 		&needlog, &needscan);
153 	/*
154 	 * Fix up the block header, make it a data block.
155 	 */
156 	INT_SET(block->hdr.magic, ARCH_CONVERT, XFS_DIR2_DATA_MAGIC);
157 	if (needscan)
158 		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog,
159 			NULL);
160 	/*
161 	 * Set up leaf tail and bests table.
162 	 */
163 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
164 	INT_SET(ltp->bestcount, ARCH_CONVERT, 1);
165 	bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
166 	INT_COPY(bestsp[0], block->hdr.bestfree[0].length, ARCH_CONVERT);
167 	/*
168 	 * Log the data header and leaf bests table.
169 	 */
170 	if (needlog)
171 		xfs_dir2_data_log_header(tp, dbp);
172 	xfs_dir2_leaf_check(dp, lbp);
173 	xfs_dir2_data_check(dp, dbp);
174 	xfs_dir2_leaf_log_bests(tp, lbp, 0, 0);
175 	xfs_da_buf_done(lbp);
176 	return 0;
177 }
178 
179 /*
180  * Add an entry to a leaf form directory.
181  */
182 int						/* error */
xfs_dir2_leaf_addname(xfs_da_args_t * args)183 xfs_dir2_leaf_addname(
184 	xfs_da_args_t		*args)		/* operation arguments */
185 {
186 	xfs_dir2_data_off_t	*bestsp;	/* freespace table in leaf */
187 	int			compact;	/* need to compact leaves */
188 	xfs_dir2_data_t		*data;		/* data block structure */
189 	xfs_dabuf_t		*dbp;		/* data block buffer */
190 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
191 	xfs_inode_t		*dp;		/* incore directory inode */
192 	xfs_dir2_data_unused_t	*dup;		/* data unused entry */
193 	int			error;		/* error return value */
194 	int			grown;		/* allocated new data block */
195 	int			highstale;	/* index of next stale leaf */
196 	int			i;		/* temporary, index */
197 	int			index;		/* leaf table position */
198 	xfs_dabuf_t		*lbp;		/* leaf's buffer */
199 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
200 	int			length;		/* length of new entry */
201 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry table pointer */
202 	int			lfloglow;	/* low leaf logging index */
203 	int			lfloghigh;	/* high leaf logging index */
204 	int			lowstale;	/* index of prev stale leaf */
205 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail pointer */
206 	xfs_mount_t		*mp;		/* filesystem mount point */
207 	int			needbytes;	/* leaf block bytes needed */
208 	int			needlog;	/* need to log data header */
209 	int			needscan;	/* need to rescan data free */
210 	xfs_dir2_data_off_t	*tagp;		/* end of data entry */
211 	xfs_trans_t		*tp;		/* transaction pointer */
212 	xfs_dir2_db_t		use_block;	/* data block number */
213 
214 	xfs_dir2_trace_args("leaf_addname", args);
215 	dp = args->dp;
216 	tp = args->trans;
217 	mp = dp->i_mount;
218 	/*
219 	 * Read the leaf block.
220 	 */
221 	error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
222 		XFS_DATA_FORK);
223 	if (error) {
224 		return error;
225 	}
226 	ASSERT(lbp != NULL);
227 	/*
228 	 * Look up the entry by hash value and name.
229 	 * We know it's not there, our caller has already done a lookup.
230 	 * So the index is of the entry to insert in front of.
231 	 * But if there are dup hash values the index is of the first of those.
232 	 */
233 	index = xfs_dir2_leaf_search_hash(args, lbp);
234 	leaf = lbp->data;
235 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
236 	bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
237 	length = XFS_DIR2_DATA_ENTSIZE(args->namelen);
238 	/*
239 	 * See if there are any entries with the same hash value
240 	 * and space in their block for the new entry.
241 	 * This is good because it puts multiple same-hash value entries
242 	 * in a data block, improving the lookup of those entries.
243 	 */
244 	for (use_block = -1, lep = &leaf->ents[index];
245 	     index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
246 	     index++, lep++) {
247 		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
248 			continue;
249 		i = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
250 		ASSERT(i < INT_GET(ltp->bestcount, ARCH_CONVERT));
251 		ASSERT(INT_GET(bestsp[i], ARCH_CONVERT) != NULLDATAOFF);
252 		if (INT_GET(bestsp[i], ARCH_CONVERT) >= length) {
253 			use_block = i;
254 			break;
255 		}
256 	}
257 	/*
258 	 * Didn't find a block yet, linear search all the data blocks.
259 	 */
260 	if (use_block == -1) {
261 		for (i = 0; i < INT_GET(ltp->bestcount, ARCH_CONVERT); i++) {
262 			/*
263 			 * Remember a block we see that's missing.
264 			 */
265 			if (INT_GET(bestsp[i], ARCH_CONVERT) == NULLDATAOFF && use_block == -1)
266 				use_block = i;
267 			else if (INT_GET(bestsp[i], ARCH_CONVERT) >= length) {
268 				use_block = i;
269 				break;
270 			}
271 		}
272 	}
273 	/*
274 	 * How many bytes do we need in the leaf block?
275 	 */
276 	needbytes =
277 		(!INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT) ? 0 : (uint)sizeof(leaf->ents[0])) +
278 		(use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0]));
279 	/*
280 	 * Now kill use_block if it refers to a missing block, so we
281 	 * can use it as an indication of allocation needed.
282 	 */
283 	if (use_block != -1 && INT_GET(bestsp[use_block], ARCH_CONVERT) == NULLDATAOFF)
284 		use_block = -1;
285 	/*
286 	 * If we don't have enough free bytes but we can make enough
287 	 * by compacting out stale entries, we'll do that.
288 	 */
289 	if ((char *)bestsp - (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] < needbytes &&
290 	    INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1) {
291 		compact = 1;
292 	}
293 	/*
294 	 * Otherwise if we don't have enough free bytes we need to
295 	 * convert to node form.
296 	 */
297 	else if ((char *)bestsp - (char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] <
298 		 needbytes) {
299 		/*
300 		 * Just checking or no space reservation, give up.
301 		 */
302 		if (args->justcheck || args->total == 0) {
303 			xfs_da_brelse(tp, lbp);
304 			return XFS_ERROR(ENOSPC);
305 		}
306 		/*
307 		 * Convert to node form.
308 		 */
309 		error = xfs_dir2_leaf_to_node(args, lbp);
310 		xfs_da_buf_done(lbp);
311 		if (error)
312 			return error;
313 		/*
314 		 * Then add the new entry.
315 		 */
316 		return xfs_dir2_node_addname(args);
317 	}
318 	/*
319 	 * Otherwise it will fit without compaction.
320 	 */
321 	else
322 		compact = 0;
323 	/*
324 	 * If just checking, then it will fit unless we needed to allocate
325 	 * a new data block.
326 	 */
327 	if (args->justcheck) {
328 		xfs_da_brelse(tp, lbp);
329 		return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
330 	}
331 	/*
332 	 * If no allocations are allowed, return now before we've
333 	 * changed anything.
334 	 */
335 	if (args->total == 0 && use_block == -1) {
336 		xfs_da_brelse(tp, lbp);
337 		return XFS_ERROR(ENOSPC);
338 	}
339 	/*
340 	 * Need to compact the leaf entries, removing stale ones.
341 	 * Leave one stale entry behind - the one closest to our
342 	 * insertion index - and we'll shift that one to our insertion
343 	 * point later.
344 	 */
345 	if (compact) {
346 		xfs_dir2_leaf_compact_x1(lbp, &index, &lowstale, &highstale,
347 			&lfloglow, &lfloghigh);
348 	}
349 	/*
350 	 * There are stale entries, so we'll need log-low and log-high
351 	 * impossibly bad values later.
352 	 */
353 	else if (INT_GET(leaf->hdr.stale, ARCH_CONVERT)) {
354 		lfloglow = INT_GET(leaf->hdr.count, ARCH_CONVERT);
355 		lfloghigh = -1;
356 	}
357 	/*
358 	 * If there was no data block space found, we need to allocate
359 	 * a new one.
360 	 */
361 	if (use_block == -1) {
362 		/*
363 		 * Add the new data block.
364 		 */
365 		if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
366 				&use_block))) {
367 			xfs_da_brelse(tp, lbp);
368 			return error;
369 		}
370 		/*
371 		 * Initialize the block.
372 		 */
373 		if ((error = xfs_dir2_data_init(args, use_block, &dbp))) {
374 			xfs_da_brelse(tp, lbp);
375 			return error;
376 		}
377 		/*
378 		 * If we're adding a new data block on the end we need to
379 		 * extend the bests table.  Copy it up one entry.
380 		 */
381 		if (use_block >= INT_GET(ltp->bestcount, ARCH_CONVERT)) {
382 			bestsp--;
383 			memmove(&bestsp[0], &bestsp[1],
384 				INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(bestsp[0]));
385 			INT_MOD(ltp->bestcount, ARCH_CONVERT, +1);
386 			xfs_dir2_leaf_log_tail(tp, lbp);
387 			xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
388 		}
389 		/*
390 		 * If we're filling in a previously empty block just log it.
391 		 */
392 		else
393 			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
394 		data = dbp->data;
395 		INT_COPY(bestsp[use_block], data->hdr.bestfree[0].length, ARCH_CONVERT);
396 		grown = 1;
397 	}
398 	/*
399 	 * Already had space in some data block.
400 	 * Just read that one in.
401 	 */
402 	else {
403 		if ((error =
404 		    xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, use_block),
405 			    -1, &dbp, XFS_DATA_FORK))) {
406 			xfs_da_brelse(tp, lbp);
407 			return error;
408 		}
409 		data = dbp->data;
410 		grown = 0;
411 	}
412 	xfs_dir2_data_check(dp, dbp);
413 	/*
414 	 * Point to the biggest freespace in our data block.
415 	 */
416 	dup = (xfs_dir2_data_unused_t *)
417 	      ((char *)data + INT_GET(data->hdr.bestfree[0].offset, ARCH_CONVERT));
418 	ASSERT(INT_GET(dup->length, ARCH_CONVERT) >= length);
419 	needscan = needlog = 0;
420 	/*
421 	 * Mark the initial part of our freespace in use for the new entry.
422 	 */
423 	xfs_dir2_data_use_free(tp, dbp, dup,
424 		(xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
425 		&needlog, &needscan);
426 	/*
427 	 * Initialize our new entry (at last).
428 	 */
429 	dep = (xfs_dir2_data_entry_t *)dup;
430 	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
431 	dep->namelen = args->namelen;
432 	memcpy(dep->name, args->name, dep->namelen);
433 	tagp = XFS_DIR2_DATA_ENTRY_TAG_P(dep);
434 	INT_SET(*tagp, ARCH_CONVERT, (xfs_dir2_data_off_t)((char *)dep - (char *)data));
435 	/*
436 	 * Need to scan fix up the bestfree table.
437 	 */
438 	if (needscan)
439 		xfs_dir2_data_freescan(mp, data, &needlog, NULL);
440 	/*
441 	 * Need to log the data block's header.
442 	 */
443 	if (needlog)
444 		xfs_dir2_data_log_header(tp, dbp);
445 	xfs_dir2_data_log_entry(tp, dbp, dep);
446 	/*
447 	 * If the bests table needs to be changed, do it.
448 	 * Log the change unless we've already done that.
449 	 */
450 	if (INT_GET(bestsp[use_block], ARCH_CONVERT) != INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT)) {
451 		INT_COPY(bestsp[use_block], data->hdr.bestfree[0].length, ARCH_CONVERT);
452 		if (!grown)
453 			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
454 	}
455 	/*
456 	 * Now we need to make room to insert the leaf entry.
457 	 * If there are no stale entries, we just insert a hole at index.
458 	 */
459 	if (INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT)) {
460 		/*
461 		 * lep is still good as the index leaf entry.
462 		 */
463 		if (index < INT_GET(leaf->hdr.count, ARCH_CONVERT))
464 			memmove(lep + 1, lep,
465 				(INT_GET(leaf->hdr.count, ARCH_CONVERT) - index) * sizeof(*lep));
466 		/*
467 		 * Record low and high logging indices for the leaf.
468 		 */
469 		lfloglow = index;
470 		lfloghigh = INT_GET(leaf->hdr.count, ARCH_CONVERT);
471 		INT_MOD(leaf->hdr.count, ARCH_CONVERT, +1);
472 	}
473 	/*
474 	 * There are stale entries.
475 	 * We will use one of them for the new entry.
476 	 * It's probably not at the right location, so we'll have to
477 	 * shift some up or down first.
478 	 */
479 	else {
480 		/*
481 		 * If we didn't compact before, we need to find the nearest
482 		 * stale entries before and after our insertion point.
483 		 */
484 		if (compact == 0) {
485 			/*
486 			 * Find the first stale entry before the insertion
487 			 * point, if any.
488 			 */
489 			for (lowstale = index - 1;
490 			     lowstale >= 0 &&
491 				INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) !=
492 				XFS_DIR2_NULL_DATAPTR;
493 			     lowstale--)
494 				continue;
495 			/*
496 			 * Find the next stale entry at or after the insertion
497 			 * point, if any.   Stop if we go so far that the
498 			 * lowstale entry would be better.
499 			 */
500 			for (highstale = index;
501 			     highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
502 				INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) !=
503 				XFS_DIR2_NULL_DATAPTR &&
504 				(lowstale < 0 ||
505 				 index - lowstale - 1 >= highstale - index);
506 			     highstale++)
507 				continue;
508 		}
509 		/*
510 		 * If the low one is better, use it.
511 		 */
512 		if (lowstale >= 0 &&
513 		    (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
514 		     index - lowstale - 1 < highstale - index)) {
515 			ASSERT(index - lowstale - 1 >= 0);
516 			ASSERT(INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) ==
517 			       XFS_DIR2_NULL_DATAPTR);
518 			/*
519 			 * Copy entries up to cover the stale entry
520 			 * and make room for the new entry.
521 			 */
522 			if (index - lowstale - 1 > 0)
523 				memmove(&leaf->ents[lowstale],
524 					&leaf->ents[lowstale + 1],
525 					(index - lowstale - 1) * sizeof(*lep));
526 			lep = &leaf->ents[index - 1];
527 			lfloglow = MIN(lowstale, lfloglow);
528 			lfloghigh = MAX(index - 1, lfloghigh);
529 		}
530 		/*
531 		 * The high one is better, so use that one.
532 		 */
533 		else {
534 			ASSERT(highstale - index >= 0);
535 			ASSERT(INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) ==
536 			       XFS_DIR2_NULL_DATAPTR);
537 			/*
538 			 * Copy entries down to copver the stale entry
539 			 * and make room for the new entry.
540 			 */
541 			if (highstale - index > 0)
542 				memmove(&leaf->ents[index + 1],
543 					&leaf->ents[index],
544 					(highstale - index) * sizeof(*lep));
545 			lep = &leaf->ents[index];
546 			lfloglow = MIN(index, lfloglow);
547 			lfloghigh = MAX(highstale, lfloghigh);
548 		}
549 		INT_MOD(leaf->hdr.stale, ARCH_CONVERT, -1);
550 	}
551 	/*
552 	 * Fill in the new leaf entry.
553 	 */
554 	INT_SET(lep->hashval, ARCH_CONVERT, args->hashval);
555 	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_DB_OFF_TO_DATAPTR(mp, use_block, INT_GET(*tagp, ARCH_CONVERT)));
556 	/*
557 	 * Log the leaf fields and give up the buffers.
558 	 */
559 	xfs_dir2_leaf_log_header(tp, lbp);
560 	xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
561 	xfs_dir2_leaf_check(dp, lbp);
562 	xfs_da_buf_done(lbp);
563 	xfs_dir2_data_check(dp, dbp);
564 	xfs_da_buf_done(dbp);
565 	return 0;
566 }
567 
568 #ifdef DEBUG
569 /*
570  * Check the internal consistency of a leaf1 block.
571  * Pop an assert if something is wrong.
572  */
573 void
xfs_dir2_leaf_check(xfs_inode_t * dp,xfs_dabuf_t * bp)574 xfs_dir2_leaf_check(
575 	xfs_inode_t		*dp,		/* incore directory inode */
576 	xfs_dabuf_t		*bp)		/* leaf's buffer */
577 {
578 	int			i;		/* leaf index */
579 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
580 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail pointer */
581 	xfs_mount_t		*mp;		/* filesystem mount point */
582 	int			stale;		/* count of stale leaves */
583 
584 	leaf = bp->data;
585 	mp = dp->i_mount;
586 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
587 	/*
588 	 * This value is not restrictive enough.
589 	 * Should factor in the size of the bests table as well.
590 	 * We can deduce a value for that from di_size.
591 	 */
592 	ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) <= XFS_DIR2_MAX_LEAF_ENTS(mp));
593 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
594 	/*
595 	 * Leaves and bests don't overlap.
596 	 */
597 	ASSERT((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] <=
598 	       (char *)XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT));
599 	/*
600 	 * Check hash value order, count stale entries.
601 	 */
602 	for (i = stale = 0; i < INT_GET(leaf->hdr.count, ARCH_CONVERT); i++) {
603 		if (i + 1 < INT_GET(leaf->hdr.count, ARCH_CONVERT))
604 			ASSERT(INT_GET(leaf->ents[i].hashval, ARCH_CONVERT) <=
605 			       INT_GET(leaf->ents[i + 1].hashval, ARCH_CONVERT));
606 		if (INT_GET(leaf->ents[i].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
607 			stale++;
608 	}
609 	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == stale);
610 }
611 #endif	/* DEBUG */
612 
613 /*
614  * Compact out any stale entries in the leaf.
615  * Log the header and changed leaf entries, if any.
616  */
617 void
xfs_dir2_leaf_compact(xfs_da_args_t * args,xfs_dabuf_t * bp)618 xfs_dir2_leaf_compact(
619 	xfs_da_args_t	*args,		/* operation arguments */
620 	xfs_dabuf_t	*bp)		/* leaf buffer */
621 {
622 	int		from;		/* source leaf index */
623 	xfs_dir2_leaf_t	*leaf;		/* leaf structure */
624 	int		loglow;		/* first leaf entry to log */
625 	int		to;		/* target leaf index */
626 
627 	leaf = bp->data;
628 	if (INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT)) {
629 		return;
630 	}
631 	/*
632 	 * Compress out the stale entries in place.
633 	 */
634 	for (from = to = 0, loglow = -1; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
635 		if (INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
636 			continue;
637 		/*
638 		 * Only actually copy the entries that are different.
639 		 */
640 		if (from > to) {
641 			if (loglow == -1)
642 				loglow = to;
643 			leaf->ents[to] = leaf->ents[from];
644 		}
645 		to++;
646 	}
647 	/*
648 	 * Update and log the header, log the leaf entries.
649 	 */
650 	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) == from - to);
651 	INT_MOD(leaf->hdr.count, ARCH_CONVERT, -(INT_GET(leaf->hdr.stale, ARCH_CONVERT)));
652 	INT_ZERO(leaf->hdr.stale, ARCH_CONVERT);
653 	xfs_dir2_leaf_log_header(args->trans, bp);
654 	if (loglow != -1)
655 		xfs_dir2_leaf_log_ents(args->trans, bp, loglow, to - 1);
656 }
657 
658 /*
659  * Compact the leaf entries, removing stale ones.
660  * Leave one stale entry behind - the one closest to our
661  * insertion index - and the caller will shift that one to our insertion
662  * point later.
663  * Return new insertion index, where the remaining stale entry is,
664  * and leaf logging indices.
665  */
666 void
xfs_dir2_leaf_compact_x1(xfs_dabuf_t * bp,int * indexp,int * lowstalep,int * highstalep,int * lowlogp,int * highlogp)667 xfs_dir2_leaf_compact_x1(
668 	xfs_dabuf_t	*bp,		/* leaf buffer */
669 	int		*indexp,	/* insertion index */
670 	int		*lowstalep,	/* out: stale entry before us */
671 	int		*highstalep,	/* out: stale entry after us */
672 	int		*lowlogp,	/* out: low log index */
673 	int		*highlogp)	/* out: high log index */
674 {
675 	int		from;		/* source copy index */
676 	int		highstale;	/* stale entry at/after index */
677 	int		index;		/* insertion index */
678 	int		keepstale;	/* source index of kept stale */
679 	xfs_dir2_leaf_t	*leaf;		/* leaf structure */
680 	int		lowstale;	/* stale entry before index */
681 	int		newindex=0;	/* new insertion index */
682 	int		to;		/* destination copy index */
683 
684 	leaf = bp->data;
685 	ASSERT(INT_GET(leaf->hdr.stale, ARCH_CONVERT) > 1);
686 	index = *indexp;
687 	/*
688 	 * Find the first stale entry before our index, if any.
689 	 */
690 	for (lowstale = index - 1;
691 	     lowstale >= 0 &&
692 		INT_GET(leaf->ents[lowstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR;
693 	     lowstale--)
694 		continue;
695 	/*
696 	 * Find the first stale entry at or after our index, if any.
697 	 * Stop if the answer would be worse than lowstale.
698 	 */
699 	for (highstale = index;
700 	     highstale < INT_GET(leaf->hdr.count, ARCH_CONVERT) &&
701 		INT_GET(leaf->ents[highstale].address, ARCH_CONVERT) != XFS_DIR2_NULL_DATAPTR &&
702 		(lowstale < 0 || index - lowstale > highstale - index);
703 	     highstale++)
704 		continue;
705 	/*
706 	 * Pick the better of lowstale and highstale.
707 	 */
708 	if (lowstale >= 0 &&
709 	    (highstale == INT_GET(leaf->hdr.count, ARCH_CONVERT) ||
710 	     index - lowstale <= highstale - index))
711 		keepstale = lowstale;
712 	else
713 		keepstale = highstale;
714 	/*
715 	 * Copy the entries in place, removing all the stale entries
716 	 * except keepstale.
717 	 */
718 	for (from = to = 0; from < INT_GET(leaf->hdr.count, ARCH_CONVERT); from++) {
719 		/*
720 		 * Notice the new value of index.
721 		 */
722 		if (index == from)
723 			newindex = to;
724 		if (from != keepstale &&
725 		    INT_GET(leaf->ents[from].address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR) {
726 			if (from == to)
727 				*lowlogp = to;
728 			continue;
729 		}
730 		/*
731 		 * Record the new keepstale value for the insertion.
732 		 */
733 		if (from == keepstale)
734 			lowstale = highstale = to;
735 		/*
736 		 * Copy only the entries that have moved.
737 		 */
738 		if (from > to)
739 			leaf->ents[to] = leaf->ents[from];
740 		to++;
741 	}
742 	ASSERT(from > to);
743 	/*
744 	 * If the insertion point was past the last entry,
745 	 * set the new insertion point accordingly.
746 	 */
747 	if (index == from)
748 		newindex = to;
749 	*indexp = newindex;
750 	/*
751 	 * Adjust the leaf header values.
752 	 */
753 	INT_MOD(leaf->hdr.count, ARCH_CONVERT, -(from - to));
754 	INT_SET(leaf->hdr.stale, ARCH_CONVERT, 1);
755 	/*
756 	 * Remember the low/high stale value only in the "right"
757 	 * direction.
758 	 */
759 	if (lowstale >= newindex)
760 		lowstale = -1;
761 	else
762 		highstale = INT_GET(leaf->hdr.count, ARCH_CONVERT);
763 	*highlogp = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1;
764 	*lowstalep = lowstale;
765 	*highstalep = highstale;
766 }
767 
768 /*
769  * Getdents (readdir) for leaf and node directories.
770  * This reads the data blocks only, so is the same for both forms.
771  */
772 int						/* error */
xfs_dir2_leaf_getdents(xfs_trans_t * tp,xfs_inode_t * dp,uio_t * uio,int * eofp,xfs_dirent_t * dbp,xfs_dir2_put_t put)773 xfs_dir2_leaf_getdents(
774 	xfs_trans_t		*tp,		/* transaction pointer */
775 	xfs_inode_t		*dp,		/* incore directory inode */
776 	uio_t			*uio,		/* I/O control & vectors */
777 	int			*eofp,		/* out: reached end of dir */
778 	xfs_dirent_t		*dbp,		/* caller's buffer */
779 	xfs_dir2_put_t		put)		/* ABI formatting routine */
780 {
781 	xfs_dabuf_t		*bp;		/* data block buffer */
782 	int			byteoff;	/* offset in current block */
783 	xfs_dir2_db_t		curdb;		/* db for current block */
784 	xfs_dir2_off_t		curoff;		/* current overall offset */
785 	xfs_dir2_data_t		*data;		/* data block structure */
786 	xfs_dir2_data_entry_t	*dep;		/* data entry */
787 	xfs_dir2_data_unused_t	*dup;		/* unused entry */
788 	int			eof;		/* reached end of directory */
789 	int			error=0;		/* error return value */
790 	int			i;		/* temporary loop index */
791 	int			j;		/* temporary loop index */
792 	int			length;		/* temporary length value */
793 	xfs_bmbt_irec_t		*map;		/* map vector for blocks */
794 	xfs_extlen_t		map_blocks;	/* number of fsbs in map */
795 	xfs_dablk_t		map_off;	/* last mapped file offset */
796 	int			map_size;	/* total entries in *map */
797 	int			map_valid;	/* valid entries in *map */
798 	xfs_mount_t		*mp;		/* filesystem mount point */
799 	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
800 	int			nmap;		/* mappings to ask xfs_bmapi */
801 	xfs_dir2_put_args_t	p;		/* formatting arg bundle */
802 	char			*ptr=NULL;		/* pointer to current data */
803 	int			ra_current;	/* number of read-ahead blks */
804 	int			ra_index;	/* *map index for read-ahead */
805 	int			ra_offset;	/* map entry offset for ra */
806 	int			ra_want;	/* readahead count wanted */
807 
808 	/*
809 	 * If the offset is at or past the largest allowed value,
810 	 * give up right away, return eof.
811 	 */
812 	if (uio->uio_offset >= XFS_DIR2_MAX_DATAPTR) {
813 		*eofp = 1;
814 		return 0;
815 	}
816 	mp = dp->i_mount;
817 	/*
818 	 * Setup formatting arguments.
819 	 */
820 	p.dbp = dbp;
821 	p.put = put;
822 	p.uio = uio;
823 	/*
824 	 * Set up to bmap a number of blocks based on the caller's
825 	 * buffer size, the directory block size, and the filesystem
826 	 * block size.
827 	 */
828 	map_size =
829 		howmany(uio->uio_resid + mp->m_dirblksize,
830 			mp->m_sb.sb_blocksize);
831 	map = kmem_alloc(map_size * sizeof(*map), KM_SLEEP);
832 	map_valid = ra_index = ra_offset = ra_current = map_blocks = 0;
833 	bp = NULL;
834 	eof = 1;
835 	/*
836 	 * Inside the loop we keep the main offset value as a byte offset
837 	 * in the directory file.
838 	 */
839 	curoff = XFS_DIR2_DATAPTR_TO_BYTE(mp, uio->uio_offset);
840 	/*
841 	 * Force this conversion through db so we truncate the offset
842 	 * down to get the start of the data block.
843 	 */
844 	map_off = XFS_DIR2_DB_TO_DA(mp, XFS_DIR2_BYTE_TO_DB(mp, curoff));
845 	/*
846 	 * Loop over directory entries until we reach the end offset.
847 	 * Get more blocks and readahead as necessary.
848 	 */
849 	while (curoff < XFS_DIR2_LEAF_OFFSET) {
850 		/*
851 		 * If we have no buffer, or we're off the end of the
852 		 * current buffer, need to get another one.
853 		 */
854 		if (!bp || ptr >= (char *)bp->data + mp->m_dirblksize) {
855 			/*
856 			 * If we have a buffer, we need to release it and
857 			 * take it out of the mapping.
858 			 */
859 			if (bp) {
860 				xfs_da_brelse(tp, bp);
861 				bp = NULL;
862 				map_blocks -= mp->m_dirblkfsbs;
863 				/*
864 				 * Loop to get rid of the extents for the
865 				 * directory block.
866 				 */
867 				for (i = mp->m_dirblkfsbs; i > 0; ) {
868 					j = MIN((int)map->br_blockcount, i);
869 					map->br_blockcount -= j;
870 					map->br_startblock += j;
871 					map->br_startoff += j;
872 					/*
873 					 * If mapping is done, pitch it from
874 					 * the table.
875 					 */
876 					if (!map->br_blockcount && --map_valid)
877 						memmove(&map[0], &map[1],
878 							sizeof(map[0]) *
879 							map_valid);
880 					i -= j;
881 				}
882 			}
883 			/*
884 			 * Recalculate the readahead blocks wanted.
885 			 */
886 			ra_want = howmany(uio->uio_resid + mp->m_dirblksize,
887 					  mp->m_sb.sb_blocksize) - 1;
888 			/*
889 			 * If we don't have as many as we want, and we haven't
890 			 * run out of data blocks, get some more mappings.
891 			 */
892 			if (1 + ra_want > map_blocks &&
893 			    map_off <
894 			    XFS_DIR2_BYTE_TO_DA(mp, XFS_DIR2_LEAF_OFFSET)) {
895 				/*
896 				 * Get more bmaps, fill in after the ones
897 				 * we already have in the table.
898 				 */
899 				nmap = map_size - map_valid;
900 				error = xfs_bmapi(tp, dp,
901 					map_off,
902 					XFS_DIR2_BYTE_TO_DA(mp,
903 						XFS_DIR2_LEAF_OFFSET) - map_off,
904 					XFS_BMAPI_METADATA, NULL, 0,
905 					&map[map_valid], &nmap, NULL);
906 				/*
907 				 * Don't know if we should ignore this or
908 				 * try to return an error.
909 				 * The trouble with returning errors
910 				 * is that readdir will just stop without
911 				 * actually passing the error through.
912 				 */
913 				if (error)
914 					break;	/* XXX */
915 				/*
916 				 * If we got all the mappings we asked for,
917 				 * set the final map offset based on the
918 				 * last bmap value received.
919 				 * Otherwise, we've reached the end.
920 				 */
921 				if (nmap == map_size - map_valid)
922 					map_off =
923 					map[map_valid + nmap - 1].br_startoff +
924 					map[map_valid + nmap - 1].br_blockcount;
925 				else
926 					map_off =
927 						XFS_DIR2_BYTE_TO_DA(mp,
928 							XFS_DIR2_LEAF_OFFSET);
929 				/*
930 				 * Look for holes in the mapping, and
931 				 * eliminate them.  Count up the valid blocks.
932 				 */
933 				for (i = map_valid; i < map_valid + nmap; ) {
934 					if (map[i].br_startblock ==
935 					    HOLESTARTBLOCK) {
936 						nmap--;
937 						length = map_valid + nmap - i;
938 						if (length)
939 							memmove(&map[i],
940 								&map[i + 1],
941 								sizeof(map[i]) *
942 								length);
943 					} else {
944 						map_blocks +=
945 							map[i].br_blockcount;
946 						i++;
947 					}
948 				}
949 				map_valid += nmap;
950 			}
951 			/*
952 			 * No valid mappings, so no more data blocks.
953 			 */
954 			if (!map_valid) {
955 				curoff = XFS_DIR2_DA_TO_BYTE(mp, map_off);
956 				break;
957 			}
958 			/*
959 			 * Read the directory block starting at the first
960 			 * mapping.
961 			 */
962 			curdb = XFS_DIR2_DA_TO_DB(mp, map->br_startoff);
963 			error = xfs_da_read_buf(tp, dp, map->br_startoff,
964 				map->br_blockcount >= mp->m_dirblkfsbs ?
965 				    XFS_FSB_TO_DADDR(mp, map->br_startblock) :
966 				    -1,
967 				&bp, XFS_DATA_FORK);
968 			/*
969 			 * Should just skip over the data block instead
970 			 * of giving up.
971 			 */
972 			if (error)
973 				break;	/* XXX */
974 			/*
975 			 * Adjust the current amount of read-ahead: we just
976 			 * read a block that was previously ra.
977 			 */
978 			if (ra_current)
979 				ra_current -= mp->m_dirblkfsbs;
980 			/*
981 			 * Do we need more readahead?
982 			 */
983 			for (ra_index = ra_offset = i = 0;
984 			     ra_want > ra_current && i < map_blocks;
985 			     i += mp->m_dirblkfsbs) {
986 				ASSERT(ra_index < map_valid);
987 				/*
988 				 * Read-ahead a contiguous directory block.
989 				 */
990 				if (i > ra_current &&
991 				    map[ra_index].br_blockcount >=
992 				    mp->m_dirblkfsbs) {
993 					xfs_baread(mp->m_ddev_targp,
994 						XFS_FSB_TO_DADDR(mp,
995 						   map[ra_index].br_startblock +
996 						   ra_offset),
997 						(int)BTOBB(mp->m_dirblksize));
998 					ra_current = i;
999 				}
1000 				/*
1001 				 * Read-ahead a non-contiguous directory block.
1002 				 * This doesn't use our mapping, but this
1003 				 * is a very rare case.
1004 				 */
1005 				else if (i > ra_current) {
1006 					(void)xfs_da_reada_buf(tp, dp,
1007 						map[ra_index].br_startoff +
1008 						ra_offset, XFS_DATA_FORK);
1009 					ra_current = i;
1010 				}
1011 				/*
1012 				 * Advance offset through the mapping table.
1013 				 */
1014 				for (j = 0; j < mp->m_dirblkfsbs; j++) {
1015 					/*
1016 					 * The rest of this extent but not
1017 					 * more than a dir block.
1018 					 */
1019 					length = MIN(mp->m_dirblkfsbs,
1020 						(int)(map[ra_index].br_blockcount -
1021 						ra_offset));
1022 					j += length;
1023 					ra_offset += length;
1024 					/*
1025 					 * Advance to the next mapping if
1026 					 * this one is used up.
1027 					 */
1028 					if (ra_offset ==
1029 					    map[ra_index].br_blockcount) {
1030 						ra_offset = 0;
1031 						ra_index++;
1032 					}
1033 				}
1034 			}
1035 			/*
1036 			 * Having done a read, we need to set a new offset.
1037 			 */
1038 			newoff = XFS_DIR2_DB_OFF_TO_BYTE(mp, curdb, 0);
1039 			/*
1040 			 * Start of the current block.
1041 			 */
1042 			if (curoff < newoff)
1043 				curoff = newoff;
1044 			/*
1045 			 * Make sure we're in the right block.
1046 			 */
1047 			else if (curoff > newoff)
1048 				ASSERT(XFS_DIR2_BYTE_TO_DB(mp, curoff) ==
1049 				       curdb);
1050 			data = bp->data;
1051 			xfs_dir2_data_check(dp, bp);
1052 			/*
1053 			 * Find our position in the block.
1054 			 */
1055 			ptr = (char *)&data->u;
1056 			byteoff = XFS_DIR2_BYTE_TO_OFF(mp, curoff);
1057 			/*
1058 			 * Skip past the header.
1059 			 */
1060 			if (byteoff == 0)
1061 				curoff += (uint)sizeof(data->hdr);
1062 			/*
1063 			 * Skip past entries until we reach our offset.
1064 			 */
1065 			else {
1066 				while ((char *)ptr - (char *)data < byteoff) {
1067 					dup = (xfs_dir2_data_unused_t *)ptr;
1068 
1069 					if (INT_GET(dup->freetag, ARCH_CONVERT)
1070 						  == XFS_DIR2_DATA_FREE_TAG) {
1071 
1072 						length = INT_GET(dup->length,
1073 								 ARCH_CONVERT);
1074 						ptr += length;
1075 						continue;
1076 					}
1077 					dep = (xfs_dir2_data_entry_t *)ptr;
1078 					length =
1079 					   XFS_DIR2_DATA_ENTSIZE(dep->namelen);
1080 					ptr += length;
1081 				}
1082 				/*
1083 				 * Now set our real offset.
1084 				 */
1085 				curoff =
1086 					XFS_DIR2_DB_OFF_TO_BYTE(mp,
1087 					    XFS_DIR2_BYTE_TO_DB(mp, curoff),
1088 					    (char *)ptr - (char *)data);
1089 				if (ptr >= (char *)data + mp->m_dirblksize) {
1090 					continue;
1091 				}
1092 			}
1093 		}
1094 		/*
1095 		 * We have a pointer to an entry.
1096 		 * Is it a live one?
1097 		 */
1098 		dup = (xfs_dir2_data_unused_t *)ptr;
1099 		/*
1100 		 * No, it's unused, skip over it.
1101 		 */
1102 		if (INT_GET(dup->freetag, ARCH_CONVERT)
1103 						== XFS_DIR2_DATA_FREE_TAG) {
1104 			length = INT_GET(dup->length, ARCH_CONVERT);
1105 			ptr += length;
1106 			curoff += length;
1107 			continue;
1108 		}
1109 
1110 		/*
1111 		 * Copy the entry into the putargs, and try formatting it.
1112 		 */
1113 		dep = (xfs_dir2_data_entry_t *)ptr;
1114 
1115 		p.namelen = dep->namelen;
1116 
1117 		length = XFS_DIR2_DATA_ENTSIZE(p.namelen);
1118 
1119 		p.cook = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff + length);
1120 
1121 		p.ino = INT_GET(dep->inumber, ARCH_CONVERT);
1122 #if XFS_BIG_INUMS
1123 		p.ino += mp->m_inoadd;
1124 #endif
1125 		p.name = (char *)dep->name;
1126 
1127 		error = p.put(&p);
1128 
1129 		/*
1130 		 * Won't fit.  Return to caller.
1131 		 */
1132 		if (!p.done) {
1133 			eof = 0;
1134 			break;
1135 		}
1136 		/*
1137 		 * Advance to next entry in the block.
1138 		 */
1139 		ptr += length;
1140 		curoff += length;
1141 	}
1142 
1143 	/*
1144 	 * All done.  Set output offset value to current offset.
1145 	 */
1146 	*eofp = eof;
1147 	if (curoff > XFS_DIR2_DATAPTR_TO_BYTE(mp, XFS_DIR2_MAX_DATAPTR))
1148 		uio->uio_offset = XFS_DIR2_MAX_DATAPTR;
1149 	else
1150 		uio->uio_offset = XFS_DIR2_BYTE_TO_DATAPTR(mp, curoff);
1151 	kmem_free(map, map_size * sizeof(*map));
1152 	if (bp)
1153 		xfs_da_brelse(tp, bp);
1154 	return error;
1155 }
1156 
1157 /*
1158  * Initialize a new leaf block, leaf1 or leafn magic accepted.
1159  */
1160 int
xfs_dir2_leaf_init(xfs_da_args_t * args,xfs_dir2_db_t bno,xfs_dabuf_t ** bpp,int magic)1161 xfs_dir2_leaf_init(
1162 	xfs_da_args_t		*args,		/* operation arguments */
1163 	xfs_dir2_db_t		bno,		/* directory block number */
1164 	xfs_dabuf_t		**bpp,		/* out: leaf buffer */
1165 	int			magic)		/* magic number for block */
1166 {
1167 	xfs_dabuf_t		*bp;		/* leaf buffer */
1168 	xfs_inode_t		*dp;		/* incore directory inode */
1169 	int			error;		/* error return code */
1170 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1171 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1172 	xfs_mount_t		*mp;		/* filesystem mount point */
1173 	xfs_trans_t		*tp;		/* transaction pointer */
1174 
1175 	dp = args->dp;
1176 	ASSERT(dp != NULL);
1177 	tp = args->trans;
1178 	mp = dp->i_mount;
1179 	ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
1180 	       bno < XFS_DIR2_FREE_FIRSTDB(mp));
1181 	/*
1182 	 * Get the buffer for the block.
1183 	 */
1184 	error = xfs_da_get_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, bno), -1, &bp,
1185 		XFS_DATA_FORK);
1186 	if (error) {
1187 		return error;
1188 	}
1189 	ASSERT(bp != NULL);
1190 	leaf = bp->data;
1191 	/*
1192 	 * Initialize the header.
1193 	 */
1194 	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, magic);
1195 	INT_ZERO(leaf->hdr.info.forw, ARCH_CONVERT);
1196 	INT_ZERO(leaf->hdr.info.back, ARCH_CONVERT);
1197 	INT_ZERO(leaf->hdr.count, ARCH_CONVERT);
1198 	INT_ZERO(leaf->hdr.stale, ARCH_CONVERT);
1199 	xfs_dir2_leaf_log_header(tp, bp);
1200 	/*
1201 	 * If it's a leaf-format directory initialize the tail.
1202 	 * In this case our caller has the real bests table to copy into
1203 	 * the block.
1204 	 */
1205 	if (magic == XFS_DIR2_LEAF1_MAGIC) {
1206 		ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
1207 		INT_ZERO(ltp->bestcount, ARCH_CONVERT);
1208 		xfs_dir2_leaf_log_tail(tp, bp);
1209 	}
1210 	*bpp = bp;
1211 	return 0;
1212 }
1213 
1214 /*
1215  * Log the bests entries indicated from a leaf1 block.
1216  */
1217 void
xfs_dir2_leaf_log_bests(xfs_trans_t * tp,xfs_dabuf_t * bp,int first,int last)1218 xfs_dir2_leaf_log_bests(
1219 	xfs_trans_t		*tp,		/* transaction pointer */
1220 	xfs_dabuf_t		*bp,		/* leaf buffer */
1221 	int			first,		/* first entry to log */
1222 	int			last)		/* last entry to log */
1223 {
1224 	xfs_dir2_data_off_t	*firstb;	/* pointer to first entry */
1225 	xfs_dir2_data_off_t	*lastb;		/* pointer to last entry */
1226 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1227 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1228 
1229 	leaf = bp->data;
1230 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
1231 	ltp = XFS_DIR2_LEAF_TAIL_P(tp->t_mountp, leaf);
1232 	firstb = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT) + first;
1233 	lastb = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT) + last;
1234 	xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
1235 		(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
1236 }
1237 
1238 /*
1239  * Log the leaf entries indicated from a leaf1 or leafn block.
1240  */
1241 void
xfs_dir2_leaf_log_ents(xfs_trans_t * tp,xfs_dabuf_t * bp,int first,int last)1242 xfs_dir2_leaf_log_ents(
1243 	xfs_trans_t		*tp,		/* transaction pointer */
1244 	xfs_dabuf_t		*bp,		/* leaf buffer */
1245 	int			first,		/* first entry to log */
1246 	int			last)		/* last entry to log */
1247 {
1248 	xfs_dir2_leaf_entry_t	*firstlep;	/* pointer to first entry */
1249 	xfs_dir2_leaf_entry_t	*lastlep;	/* pointer to last entry */
1250 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1251 
1252 	leaf = bp->data;
1253 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC ||
1254 	       INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
1255 	firstlep = &leaf->ents[first];
1256 	lastlep = &leaf->ents[last];
1257 	xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
1258 		(uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
1259 }
1260 
1261 /*
1262  * Log the header of the leaf1 or leafn block.
1263  */
1264 void
xfs_dir2_leaf_log_header(xfs_trans_t * tp,xfs_dabuf_t * bp)1265 xfs_dir2_leaf_log_header(
1266 	xfs_trans_t		*tp,		/* transaction pointer */
1267 	xfs_dabuf_t		*bp)		/* leaf buffer */
1268 {
1269 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1270 
1271 	leaf = bp->data;
1272 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC ||
1273 	       INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
1274 	xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
1275 		(uint)(sizeof(leaf->hdr) - 1));
1276 }
1277 
1278 /*
1279  * Log the tail of the leaf1 block.
1280  */
1281 void
xfs_dir2_leaf_log_tail(xfs_trans_t * tp,xfs_dabuf_t * bp)1282 xfs_dir2_leaf_log_tail(
1283 	xfs_trans_t		*tp,		/* transaction pointer */
1284 	xfs_dabuf_t		*bp)		/* leaf buffer */
1285 {
1286 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1287 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1288 	xfs_mount_t		*mp;		/* filesystem mount point */
1289 
1290 	mp = tp->t_mountp;
1291 	leaf = bp->data;
1292 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAF1_MAGIC);
1293 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
1294 	xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
1295 		(uint)(mp->m_dirblksize - 1));
1296 }
1297 
1298 /*
1299  * Look up the entry referred to by args in the leaf format directory.
1300  * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which
1301  * is also used by the node-format code.
1302  */
1303 int
xfs_dir2_leaf_lookup(xfs_da_args_t * args)1304 xfs_dir2_leaf_lookup(
1305 	xfs_da_args_t		*args)		/* operation arguments */
1306 {
1307 	xfs_dabuf_t		*dbp;		/* data block buffer */
1308 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
1309 	xfs_inode_t		*dp;		/* incore directory inode */
1310 	int			error;		/* error return code */
1311 	int			index;		/* found entry index */
1312 	xfs_dabuf_t		*lbp;		/* leaf buffer */
1313 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1314 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1315 	xfs_trans_t		*tp;		/* transaction pointer */
1316 
1317 	xfs_dir2_trace_args("leaf_lookup", args);
1318 	/*
1319 	 * Look up name in the leaf block, returning both buffers and index.
1320 	 */
1321 	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1322 		return error;
1323 	}
1324 	tp = args->trans;
1325 	dp = args->dp;
1326 	xfs_dir2_leaf_check(dp, lbp);
1327 	leaf = lbp->data;
1328 	/*
1329 	 * Get to the leaf entry and contained data entry address.
1330 	 */
1331 	lep = &leaf->ents[index];
1332 	/*
1333 	 * Point to the data entry.
1334 	 */
1335 	dep = (xfs_dir2_data_entry_t *)
1336 	      ((char *)dbp->data +
1337 	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, INT_GET(lep->address, ARCH_CONVERT)));
1338 	/*
1339 	 * Return the found inode number.
1340 	 */
1341 	args->inumber = INT_GET(dep->inumber, ARCH_CONVERT);
1342 	xfs_da_brelse(tp, dbp);
1343 	xfs_da_brelse(tp, lbp);
1344 	return XFS_ERROR(EEXIST);
1345 }
1346 
1347 /*
1348  * Look up name/hash in the leaf block.
1349  * Fill in indexp with the found index, and dbpp with the data buffer.
1350  * If not found dbpp will be NULL, and ENOENT comes back.
1351  * lbpp will always be filled in with the leaf buffer unless there's an error.
1352  */
1353 static int					/* error */
xfs_dir2_leaf_lookup_int(xfs_da_args_t * args,xfs_dabuf_t ** lbpp,int * indexp,xfs_dabuf_t ** dbpp)1354 xfs_dir2_leaf_lookup_int(
1355 	xfs_da_args_t		*args,		/* operation arguments */
1356 	xfs_dabuf_t		**lbpp,		/* out: leaf buffer */
1357 	int			*indexp,	/* out: index in leaf block */
1358 	xfs_dabuf_t		**dbpp)		/* out: data buffer */
1359 {
1360 	xfs_dir2_db_t		curdb;		/* current data block number */
1361 	xfs_dabuf_t		*dbp;		/* data buffer */
1362 	xfs_dir2_data_entry_t	*dep;		/* data entry */
1363 	xfs_inode_t		*dp;		/* incore directory inode */
1364 	int			error;		/* error return code */
1365 	int			index;		/* index in leaf block */
1366 	xfs_dabuf_t		*lbp;		/* leaf buffer */
1367 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1368 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1369 	xfs_mount_t		*mp;		/* filesystem mount point */
1370 	xfs_dir2_db_t		newdb;		/* new data block number */
1371 	xfs_trans_t		*tp;		/* transaction pointer */
1372 
1373 	dp = args->dp;
1374 	tp = args->trans;
1375 	mp = dp->i_mount;
1376 	/*
1377 	 * Read the leaf block into the buffer.
1378 	 */
1379 	if ((error =
1380 	    xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
1381 		    XFS_DATA_FORK))) {
1382 		return error;
1383 	}
1384 	*lbpp = lbp;
1385 	leaf = lbp->data;
1386 	xfs_dir2_leaf_check(dp, lbp);
1387 	/*
1388 	 * Look for the first leaf entry with our hash value.
1389 	 */
1390 	index = xfs_dir2_leaf_search_hash(args, lbp);
1391 	/*
1392 	 * Loop over all the entries with the right hash value
1393 	 * looking to match the name.
1394 	 */
1395 	for (lep = &leaf->ents[index], dbp = NULL, curdb = -1;
1396 	     index < INT_GET(leaf->hdr.count, ARCH_CONVERT) && INT_GET(lep->hashval, ARCH_CONVERT) == args->hashval;
1397 	     lep++, index++) {
1398 		/*
1399 		 * Skip over stale leaf entries.
1400 		 */
1401 		if (INT_GET(lep->address, ARCH_CONVERT) == XFS_DIR2_NULL_DATAPTR)
1402 			continue;
1403 		/*
1404 		 * Get the new data block number.
1405 		 */
1406 		newdb = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
1407 		/*
1408 		 * If it's not the same as the old data block number,
1409 		 * need to pitch the old one and read the new one.
1410 		 */
1411 		if (newdb != curdb) {
1412 			if (dbp)
1413 				xfs_da_brelse(tp, dbp);
1414 			if ((error =
1415 			    xfs_da_read_buf(tp, dp,
1416 				    XFS_DIR2_DB_TO_DA(mp, newdb), -1, &dbp,
1417 				    XFS_DATA_FORK))) {
1418 				xfs_da_brelse(tp, lbp);
1419 				return error;
1420 			}
1421 			xfs_dir2_data_check(dp, dbp);
1422 			curdb = newdb;
1423 		}
1424 		/*
1425 		 * Point to the data entry.
1426 		 */
1427 		dep = (xfs_dir2_data_entry_t *)
1428 		      ((char *)dbp->data +
1429 		       XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
1430 		/*
1431 		 * If it matches then return it.
1432 		 */
1433 		if (dep->namelen == args->namelen &&
1434 		    dep->name[0] == args->name[0] &&
1435 		    memcmp(dep->name, args->name, args->namelen) == 0) {
1436 			*dbpp = dbp;
1437 			*indexp = index;
1438 			return 0;
1439 		}
1440 	}
1441 	/*
1442 	 * No match found, return ENOENT.
1443 	 */
1444 	ASSERT(args->oknoent);
1445 	if (dbp)
1446 		xfs_da_brelse(tp, dbp);
1447 	xfs_da_brelse(tp, lbp);
1448 	return XFS_ERROR(ENOENT);
1449 }
1450 
1451 /*
1452  * Remove an entry from a leaf format directory.
1453  */
1454 int						/* error */
xfs_dir2_leaf_removename(xfs_da_args_t * args)1455 xfs_dir2_leaf_removename(
1456 	xfs_da_args_t		*args)		/* operation arguments */
1457 {
1458 	xfs_dir2_data_off_t	*bestsp;	/* leaf block best freespace */
1459 	xfs_dir2_data_t		*data;		/* data block structure */
1460 	xfs_dir2_db_t		db;		/* data block number */
1461 	xfs_dabuf_t		*dbp;		/* data block buffer */
1462 	xfs_dir2_data_entry_t	*dep;		/* data entry structure */
1463 	xfs_inode_t		*dp;		/* incore directory inode */
1464 	int			error;		/* error return code */
1465 	xfs_dir2_db_t		i;		/* temporary data block # */
1466 	int			index;		/* index into leaf entries */
1467 	xfs_dabuf_t		*lbp;		/* leaf buffer */
1468 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1469 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1470 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1471 	xfs_mount_t		*mp;		/* filesystem mount point */
1472 	int			needlog;	/* need to log data header */
1473 	int			needscan;	/* need to rescan data frees */
1474 	xfs_dir2_data_off_t	oldbest;	/* old value of best free */
1475 	xfs_trans_t		*tp;		/* transaction pointer */
1476 
1477 	xfs_dir2_trace_args("leaf_removename", args);
1478 	/*
1479 	 * Lookup the leaf entry, get the leaf and data blocks read in.
1480 	 */
1481 	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1482 		return error;
1483 	}
1484 	dp = args->dp;
1485 	tp = args->trans;
1486 	mp = dp->i_mount;
1487 	leaf = lbp->data;
1488 	data = dbp->data;
1489 	xfs_dir2_data_check(dp, dbp);
1490 	/*
1491 	 * Point to the leaf entry, use that to point to the data entry.
1492 	 */
1493 	lep = &leaf->ents[index];
1494 	db = XFS_DIR2_DATAPTR_TO_DB(mp, INT_GET(lep->address, ARCH_CONVERT));
1495 	dep = (xfs_dir2_data_entry_t *)
1496 	      ((char *)data + XFS_DIR2_DATAPTR_TO_OFF(mp, INT_GET(lep->address, ARCH_CONVERT)));
1497 	needscan = needlog = 0;
1498 	oldbest = INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT);
1499 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
1500 	bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
1501 	ASSERT(INT_GET(bestsp[db], ARCH_CONVERT) == oldbest);
1502 	/*
1503 	 * Mark the former data entry unused.
1504 	 */
1505 	xfs_dir2_data_make_free(tp, dbp,
1506 		(xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
1507 		XFS_DIR2_DATA_ENTSIZE(dep->namelen), &needlog, &needscan);
1508 	/*
1509 	 * We just mark the leaf entry stale by putting a null in it.
1510 	 */
1511 	INT_MOD(leaf->hdr.stale, ARCH_CONVERT, +1);
1512 	xfs_dir2_leaf_log_header(tp, lbp);
1513 	INT_SET(lep->address, ARCH_CONVERT, XFS_DIR2_NULL_DATAPTR);
1514 	xfs_dir2_leaf_log_ents(tp, lbp, index, index);
1515 	/*
1516 	 * Scan the freespace in the data block again if necessary,
1517 	 * log the data block header if necessary.
1518 	 */
1519 	if (needscan)
1520 		xfs_dir2_data_freescan(mp, data, &needlog, NULL);
1521 	if (needlog)
1522 		xfs_dir2_data_log_header(tp, dbp);
1523 	/*
1524 	 * If the longest freespace in the data block has changed,
1525 	 * put the new value in the bests table and log that.
1526 	 */
1527 	if (INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) != oldbest) {
1528 		INT_COPY(bestsp[db], data->hdr.bestfree[0].length, ARCH_CONVERT);
1529 		xfs_dir2_leaf_log_bests(tp, lbp, db, db);
1530 	}
1531 	xfs_dir2_data_check(dp, dbp);
1532 	/*
1533 	 * If the data block is now empty then get rid of the data block.
1534 	 */
1535 	if (INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) ==
1536 	    mp->m_dirblksize - (uint)sizeof(data->hdr)) {
1537 		ASSERT(db != mp->m_dirdatablk);
1538 		if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1539 			/*
1540 			 * Nope, can't get rid of it because it caused
1541 			 * allocation of a bmap btree block to do so.
1542 			 * Just go on, returning success, leaving the
1543 			 * empty block in place.
1544 			 */
1545 			if (error == ENOSPC && args->total == 0) {
1546 				xfs_da_buf_done(dbp);
1547 				error = 0;
1548 			}
1549 			xfs_dir2_leaf_check(dp, lbp);
1550 			xfs_da_buf_done(lbp);
1551 			return error;
1552 		}
1553 		dbp = NULL;
1554 		/*
1555 		 * If this is the last data block then compact the
1556 		 * bests table by getting rid of entries.
1557 		 */
1558 		if (db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1) {
1559 			/*
1560 			 * Look for the last active entry (i).
1561 			 */
1562 			for (i = db - 1; i > 0; i--) {
1563 				if (INT_GET(bestsp[i], ARCH_CONVERT) != NULLDATAOFF)
1564 					break;
1565 			}
1566 			/*
1567 			 * Copy the table down so inactive entries at the
1568 			 * end are removed.
1569 			 */
1570 			memmove(&bestsp[db - i], bestsp,
1571 				(INT_GET(ltp->bestcount, ARCH_CONVERT) - (db - i)) * sizeof(*bestsp));
1572 			INT_MOD(ltp->bestcount, ARCH_CONVERT, -(db - i));
1573 			xfs_dir2_leaf_log_tail(tp, lbp);
1574 			xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
1575 		} else
1576 			INT_SET(bestsp[db], ARCH_CONVERT, NULLDATAOFF);
1577 	}
1578 	/*
1579 	 * If the data block was not the first one, drop it.
1580 	 */
1581 	else if (db != mp->m_dirdatablk && dbp != NULL) {
1582 		xfs_da_buf_done(dbp);
1583 		dbp = NULL;
1584 	}
1585 	xfs_dir2_leaf_check(dp, lbp);
1586 	/*
1587 	 * See if we can convert to block form.
1588 	 */
1589 	return xfs_dir2_leaf_to_block(args, lbp, dbp);
1590 }
1591 
1592 /*
1593  * Replace the inode number in a leaf format directory entry.
1594  */
1595 int						/* error */
xfs_dir2_leaf_replace(xfs_da_args_t * args)1596 xfs_dir2_leaf_replace(
1597 	xfs_da_args_t		*args)		/* operation arguments */
1598 {
1599 	xfs_dabuf_t		*dbp;		/* data block buffer */
1600 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
1601 	xfs_inode_t		*dp;		/* incore directory inode */
1602 	int			error;		/* error return code */
1603 	int			index;		/* index of leaf entry */
1604 	xfs_dabuf_t		*lbp;		/* leaf buffer */
1605 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1606 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1607 	xfs_trans_t		*tp;		/* transaction pointer */
1608 
1609 	xfs_dir2_trace_args("leaf_replace", args);
1610 	/*
1611 	 * Look up the entry.
1612 	 */
1613 	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1614 		return error;
1615 	}
1616 	dp = args->dp;
1617 	leaf = lbp->data;
1618 	/*
1619 	 * Point to the leaf entry, get data address from it.
1620 	 */
1621 	lep = &leaf->ents[index];
1622 	/*
1623 	 * Point to the data entry.
1624 	 */
1625 	dep = (xfs_dir2_data_entry_t *)
1626 	      ((char *)dbp->data +
1627 	       XFS_DIR2_DATAPTR_TO_OFF(dp->i_mount, INT_GET(lep->address, ARCH_CONVERT)));
1628 	ASSERT(args->inumber != INT_GET(dep->inumber, ARCH_CONVERT));
1629 	/*
1630 	 * Put the new inode number in, log it.
1631 	 */
1632 	INT_SET(dep->inumber, ARCH_CONVERT, args->inumber);
1633 	tp = args->trans;
1634 	xfs_dir2_data_log_entry(tp, dbp, dep);
1635 	xfs_da_buf_done(dbp);
1636 	xfs_dir2_leaf_check(dp, lbp);
1637 	xfs_da_brelse(tp, lbp);
1638 	return 0;
1639 }
1640 
1641 /*
1642  * Return index in the leaf block (lbp) which is either the first
1643  * one with this hash value, or if there are none, the insert point
1644  * for that hash value.
1645  */
1646 int						/* index value */
xfs_dir2_leaf_search_hash(xfs_da_args_t * args,xfs_dabuf_t * lbp)1647 xfs_dir2_leaf_search_hash(
1648 	xfs_da_args_t		*args,		/* operation arguments */
1649 	xfs_dabuf_t		*lbp)		/* leaf buffer */
1650 {
1651 	xfs_dahash_t		hash=0;		/* hash from this entry */
1652 	xfs_dahash_t		hashwant;	/* hash value looking for */
1653 	int			high;		/* high leaf index */
1654 	int			low;		/* low leaf index */
1655 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1656 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1657 	int			mid=0;		/* current leaf index */
1658 
1659 	leaf = lbp->data;
1660 #ifndef __KERNEL__
1661 	if (INT_ISZERO(leaf->hdr.count, ARCH_CONVERT))
1662 		return 0;
1663 #endif
1664 	/*
1665 	 * Note, the table cannot be empty, so we have to go through the loop.
1666 	 * Binary search the leaf entries looking for our hash value.
1667 	 */
1668 	for (lep = leaf->ents, low = 0, high = INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1,
1669 		hashwant = args->hashval;
1670 	     low <= high; ) {
1671 		mid = (low + high) >> 1;
1672 		if ((hash = INT_GET(lep[mid].hashval, ARCH_CONVERT)) == hashwant)
1673 			break;
1674 		if (hash < hashwant)
1675 			low = mid + 1;
1676 		else
1677 			high = mid - 1;
1678 	}
1679 	/*
1680 	 * Found one, back up through all the equal hash values.
1681 	 */
1682 	if (hash == hashwant) {
1683 		while (mid > 0 && INT_GET(lep[mid - 1].hashval, ARCH_CONVERT) == hashwant) {
1684 			mid--;
1685 		}
1686 	}
1687 	/*
1688 	 * Need to point to an entry higher than ours.
1689 	 */
1690 	else if (hash < hashwant)
1691 		mid++;
1692 	return mid;
1693 }
1694 
1695 /*
1696  * Trim off a trailing data block.  We know it's empty since the leaf
1697  * freespace table says so.
1698  */
1699 int						/* error */
xfs_dir2_leaf_trim_data(xfs_da_args_t * args,xfs_dabuf_t * lbp,xfs_dir2_db_t db)1700 xfs_dir2_leaf_trim_data(
1701 	xfs_da_args_t		*args,		/* operation arguments */
1702 	xfs_dabuf_t		*lbp,		/* leaf buffer */
1703 	xfs_dir2_db_t		db)		/* data block number */
1704 {
1705 	xfs_dir2_data_off_t	*bestsp;	/* leaf bests table */
1706 #ifdef DEBUG
1707 	xfs_dir2_data_t		*data;		/* data block structure */
1708 #endif
1709 	xfs_dabuf_t		*dbp;		/* data block buffer */
1710 	xfs_inode_t		*dp;		/* incore directory inode */
1711 	int			error;		/* error return value */
1712 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1713 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1714 	xfs_mount_t		*mp;		/* filesystem mount point */
1715 	xfs_trans_t		*tp;		/* transaction pointer */
1716 
1717 	dp = args->dp;
1718 	mp = dp->i_mount;
1719 	tp = args->trans;
1720 	/*
1721 	 * Read the offending data block.  We need its buffer.
1722 	 */
1723 	if ((error = xfs_da_read_buf(tp, dp, XFS_DIR2_DB_TO_DA(mp, db), -1, &dbp,
1724 			XFS_DATA_FORK))) {
1725 		return error;
1726 	}
1727 #ifdef DEBUG
1728 	data = dbp->data;
1729 	ASSERT(INT_GET(data->hdr.magic, ARCH_CONVERT) == XFS_DIR2_DATA_MAGIC);
1730 #endif
1731 	/* this seems to be an error
1732 	 * data is only valid if DEBUG is defined?
1733 	 * RMC 09/08/1999
1734 	 */
1735 
1736 	leaf = lbp->data;
1737 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
1738 	ASSERT(INT_GET(data->hdr.bestfree[0].length, ARCH_CONVERT) ==
1739 	       mp->m_dirblksize - (uint)sizeof(data->hdr));
1740 	ASSERT(db == INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
1741 	/*
1742 	 * Get rid of the data block.
1743 	 */
1744 	if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1745 		ASSERT(error != ENOSPC);
1746 		xfs_da_brelse(tp, dbp);
1747 		return error;
1748 	}
1749 	/*
1750 	 * Eliminate the last bests entry from the table.
1751 	 */
1752 	bestsp = XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT);
1753 	INT_MOD(ltp->bestcount, ARCH_CONVERT, -1);
1754 	memmove(&bestsp[1], &bestsp[0], INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(*bestsp));
1755 	xfs_dir2_leaf_log_tail(tp, lbp);
1756 	xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
1757 	return 0;
1758 }
1759 
1760 /*
1761  * Convert node form directory to leaf form directory.
1762  * The root of the node form dir needs to already be a LEAFN block.
1763  * Just return if we can't do anything.
1764  */
1765 int						/* error */
xfs_dir2_node_to_leaf(xfs_da_state_t * state)1766 xfs_dir2_node_to_leaf(
1767 	xfs_da_state_t		*state)		/* directory operation state */
1768 {
1769 	xfs_da_args_t		*args;		/* operation arguments */
1770 	xfs_inode_t		*dp;		/* incore directory inode */
1771 	int			error;		/* error return code */
1772 	xfs_dabuf_t		*fbp;		/* buffer for freespace block */
1773 	xfs_fileoff_t		fo;		/* freespace file offset */
1774 	xfs_dir2_free_t		*free;		/* freespace structure */
1775 	xfs_dabuf_t		*lbp;		/* buffer for leaf block */
1776 	xfs_dir2_leaf_tail_t	*ltp;		/* tail of leaf structure */
1777 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1778 	xfs_mount_t		*mp;		/* filesystem mount point */
1779 	int			rval;		/* successful free trim? */
1780 	xfs_trans_t		*tp;		/* transaction pointer */
1781 
1782 	/*
1783 	 * There's more than a leaf level in the btree, so there must
1784 	 * be multiple leafn blocks.  Give up.
1785 	 */
1786 	if (state->path.active > 1)
1787 		return 0;
1788 	args = state->args;
1789 	xfs_dir2_trace_args("node_to_leaf", args);
1790 	mp = state->mp;
1791 	dp = args->dp;
1792 	tp = args->trans;
1793 	/*
1794 	 * Get the last offset in the file.
1795 	 */
1796 	if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) {
1797 		return error;
1798 	}
1799 	fo -= mp->m_dirblkfsbs;
1800 	/*
1801 	 * If there are freespace blocks other than the first one,
1802 	 * take this opportunity to remove trailing empty freespace blocks
1803 	 * that may have been left behind during no-space-reservation
1804 	 * operations.
1805 	 */
1806 	while (fo > mp->m_dirfreeblk) {
1807 		if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {
1808 			return error;
1809 		}
1810 		if (rval)
1811 			fo -= mp->m_dirblkfsbs;
1812 		else
1813 			return 0;
1814 	}
1815 	/*
1816 	 * Now find the block just before the freespace block.
1817 	 */
1818 	if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) {
1819 		return error;
1820 	}
1821 	/*
1822 	 * If it's not the single leaf block, give up.
1823 	 */
1824 	if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
1825 		return 0;
1826 	lbp = state->path.blk[0].bp;
1827 	leaf = lbp->data;
1828 	ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
1829 	/*
1830 	 * Read the freespace block.
1831 	 */
1832 	if ((error = xfs_da_read_buf(tp, dp, mp->m_dirfreeblk, -1, &fbp,
1833 			XFS_DATA_FORK))) {
1834 		return error;
1835 	}
1836 	free = fbp->data;
1837 	ASSERT(INT_GET(free->hdr.magic, ARCH_CONVERT) == XFS_DIR2_FREE_MAGIC);
1838 	ASSERT(INT_ISZERO(free->hdr.firstdb, ARCH_CONVERT));
1839 	/*
1840 	 * Now see if the leafn and free data will fit in a leaf1.
1841 	 * If not, release the buffer and give up.
1842 	 */
1843 	if ((uint)sizeof(leaf->hdr) +
1844 	    (INT_GET(leaf->hdr.count, ARCH_CONVERT) - INT_GET(leaf->hdr.stale, ARCH_CONVERT)) * (uint)sizeof(leaf->ents[0]) +
1845 	    INT_GET(free->hdr.nvalid, ARCH_CONVERT) * (uint)sizeof(leaf->bests[0]) +
1846 	    (uint)sizeof(leaf->tail) >
1847 	    mp->m_dirblksize) {
1848 		xfs_da_brelse(tp, fbp);
1849 		return 0;
1850 	}
1851 	/*
1852 	 * If the leaf has any stale entries in it, compress them out.
1853 	 * The compact routine will log the header.
1854 	 */
1855 	if (INT_GET(leaf->hdr.stale, ARCH_CONVERT))
1856 		xfs_dir2_leaf_compact(args, lbp);
1857 	else
1858 		xfs_dir2_leaf_log_header(tp, lbp);
1859 	INT_SET(leaf->hdr.info.magic, ARCH_CONVERT, XFS_DIR2_LEAF1_MAGIC);
1860 	/*
1861 	 * Set up the leaf tail from the freespace block.
1862 	 */
1863 	ltp = XFS_DIR2_LEAF_TAIL_P(mp, leaf);
1864 	INT_COPY(ltp->bestcount, free->hdr.nvalid, ARCH_CONVERT);
1865 	/*
1866 	 * Set up the leaf bests table.
1867 	 */
1868 	memcpy(XFS_DIR2_LEAF_BESTS_P_ARCH(ltp, ARCH_CONVERT), free->bests,
1869 		INT_GET(ltp->bestcount, ARCH_CONVERT) * sizeof(leaf->bests[0]));
1870 	xfs_dir2_leaf_log_bests(tp, lbp, 0, INT_GET(ltp->bestcount, ARCH_CONVERT) - 1);
1871 	xfs_dir2_leaf_log_tail(tp, lbp);
1872 	xfs_dir2_leaf_check(dp, lbp);
1873 	/*
1874 	 * Get rid of the freespace block.
1875 	 */
1876 	error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp);
1877 	if (error) {
1878 		/*
1879 		 * This can't fail here because it can only happen when
1880 		 * punching out the middle of an extent, and this is an
1881 		 * isolated block.
1882 		 */
1883 		ASSERT(error != ENOSPC);
1884 		return error;
1885 	}
1886 	fbp = NULL;
1887 	/*
1888 	 * Now see if we can convert the single-leaf directory
1889 	 * down to a block form directory.
1890 	 * This routine always kills the dabuf for the leaf, so
1891 	 * eliminate it from the path.
1892 	 */
1893 	error = xfs_dir2_leaf_to_block(args, lbp, NULL);
1894 	state->path.blk[0].bp = NULL;
1895 	return error;
1896 }
1897