1 /*
2  * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_mount.h"
29 #include "xfs_da_btree.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_dir2_sf.h"
32 #include "xfs_dinode.h"
33 #include "xfs_inode.h"
34 #include "xfs_bmap.h"
35 #include "xfs_dir2_data.h"
36 #include "xfs_dir2_leaf.h"
37 #include "xfs_dir2_block.h"
38 #include "xfs_dir2_node.h"
39 #include "xfs_error.h"
40 #include "xfs_trace.h"
41 
42 /*
43  * Local function declarations.
44  */
45 #ifdef DEBUG
46 static void xfs_dir2_leaf_check(xfs_inode_t *dp, xfs_dabuf_t *bp);
47 #else
48 #define	xfs_dir2_leaf_check(dp, bp)
49 #endif
50 static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **lbpp,
51 				    int *indexp, xfs_dabuf_t **dbpp);
52 static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
53 				    int first, int last);
54 static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
55 
56 
57 /*
58  * Convert a block form directory to a leaf form directory.
59  */
60 int						/* error */
xfs_dir2_block_to_leaf(xfs_da_args_t * args,xfs_dabuf_t * dbp)61 xfs_dir2_block_to_leaf(
62 	xfs_da_args_t		*args,		/* operation arguments */
63 	xfs_dabuf_t		*dbp)		/* input block's buffer */
64 {
65 	__be16			*bestsp;	/* leaf's bestsp entries */
66 	xfs_dablk_t		blkno;		/* leaf block's bno */
67 	xfs_dir2_block_t	*block;		/* block structure */
68 	xfs_dir2_leaf_entry_t	*blp;		/* block's leaf entries */
69 	xfs_dir2_block_tail_t	*btp;		/* block's tail */
70 	xfs_inode_t		*dp;		/* incore directory inode */
71 	int			error;		/* error return code */
72 	xfs_dabuf_t		*lbp;		/* leaf block's buffer */
73 	xfs_dir2_db_t		ldb;		/* leaf block's bno */
74 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
75 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf's tail */
76 	xfs_mount_t		*mp;		/* filesystem mount point */
77 	int			needlog;	/* need to log block header */
78 	int			needscan;	/* need to rescan bestfree */
79 	xfs_trans_t		*tp;		/* transaction pointer */
80 
81 	trace_xfs_dir2_block_to_leaf(args);
82 
83 	dp = args->dp;
84 	mp = dp->i_mount;
85 	tp = args->trans;
86 	/*
87 	 * Add the leaf block to the inode.
88 	 * This interface will only put blocks in the leaf/node range.
89 	 * Since that's empty now, we'll get the root (block 0 in range).
90 	 */
91 	if ((error = xfs_da_grow_inode(args, &blkno))) {
92 		return error;
93 	}
94 	ldb = xfs_dir2_da_to_db(mp, blkno);
95 	ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp));
96 	/*
97 	 * Initialize the leaf block, get a buffer for it.
98 	 */
99 	if ((error = xfs_dir2_leaf_init(args, ldb, &lbp, XFS_DIR2_LEAF1_MAGIC))) {
100 		return error;
101 	}
102 	ASSERT(lbp != NULL);
103 	leaf = lbp->data;
104 	block = dbp->data;
105 	xfs_dir2_data_check(dp, dbp);
106 	btp = xfs_dir2_block_tail_p(mp, block);
107 	blp = xfs_dir2_block_leaf_p(btp);
108 	/*
109 	 * Set the counts in the leaf header.
110 	 */
111 	leaf->hdr.count = cpu_to_be16(be32_to_cpu(btp->count));
112 	leaf->hdr.stale = cpu_to_be16(be32_to_cpu(btp->stale));
113 	/*
114 	 * Could compact these but I think we always do the conversion
115 	 * after squeezing out stale entries.
116 	 */
117 	memcpy(leaf->ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
118 	xfs_dir2_leaf_log_ents(tp, lbp, 0, be16_to_cpu(leaf->hdr.count) - 1);
119 	needscan = 0;
120 	needlog = 1;
121 	/*
122 	 * Make the space formerly occupied by the leaf entries and block
123 	 * tail be free.
124 	 */
125 	xfs_dir2_data_make_free(tp, dbp,
126 		(xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
127 		(xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize -
128 				       (char *)blp),
129 		&needlog, &needscan);
130 	/*
131 	 * Fix up the block header, make it a data block.
132 	 */
133 	block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
134 	if (needscan)
135 		xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
136 	/*
137 	 * Set up leaf tail and bests table.
138 	 */
139 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
140 	ltp->bestcount = cpu_to_be32(1);
141 	bestsp = xfs_dir2_leaf_bests_p(ltp);
142 	bestsp[0] =  block->hdr.bestfree[0].length;
143 	/*
144 	 * Log the data header and leaf bests table.
145 	 */
146 	if (needlog)
147 		xfs_dir2_data_log_header(tp, dbp);
148 	xfs_dir2_leaf_check(dp, lbp);
149 	xfs_dir2_data_check(dp, dbp);
150 	xfs_dir2_leaf_log_bests(tp, lbp, 0, 0);
151 	xfs_da_buf_done(lbp);
152 	return 0;
153 }
154 
155 /*
156  * Add an entry to a leaf form directory.
157  */
158 int						/* error */
xfs_dir2_leaf_addname(xfs_da_args_t * args)159 xfs_dir2_leaf_addname(
160 	xfs_da_args_t		*args)		/* operation arguments */
161 {
162 	__be16			*bestsp;	/* freespace table in leaf */
163 	int			compact;	/* need to compact leaves */
164 	xfs_dir2_data_t		*data;		/* data block structure */
165 	xfs_dabuf_t		*dbp;		/* data block buffer */
166 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
167 	xfs_inode_t		*dp;		/* incore directory inode */
168 	xfs_dir2_data_unused_t	*dup;		/* data unused entry */
169 	int			error;		/* error return value */
170 	int			grown;		/* allocated new data block */
171 	int			highstale;	/* index of next stale leaf */
172 	int			i;		/* temporary, index */
173 	int			index;		/* leaf table position */
174 	xfs_dabuf_t		*lbp;		/* leaf's buffer */
175 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
176 	int			length;		/* length of new entry */
177 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry table pointer */
178 	int			lfloglow;	/* low leaf logging index */
179 	int			lfloghigh;	/* high leaf logging index */
180 	int			lowstale;	/* index of prev stale leaf */
181 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail pointer */
182 	xfs_mount_t		*mp;		/* filesystem mount point */
183 	int			needbytes;	/* leaf block bytes needed */
184 	int			needlog;	/* need to log data header */
185 	int			needscan;	/* need to rescan data free */
186 	__be16			*tagp;		/* end of data entry */
187 	xfs_trans_t		*tp;		/* transaction pointer */
188 	xfs_dir2_db_t		use_block;	/* data block number */
189 
190 	trace_xfs_dir2_leaf_addname(args);
191 
192 	dp = args->dp;
193 	tp = args->trans;
194 	mp = dp->i_mount;
195 	/*
196 	 * Read the leaf block.
197 	 */
198 	error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
199 		XFS_DATA_FORK);
200 	if (error) {
201 		return error;
202 	}
203 	ASSERT(lbp != NULL);
204 	/*
205 	 * Look up the entry by hash value and name.
206 	 * We know it's not there, our caller has already done a lookup.
207 	 * So the index is of the entry to insert in front of.
208 	 * But if there are dup hash values the index is of the first of those.
209 	 */
210 	index = xfs_dir2_leaf_search_hash(args, lbp);
211 	leaf = lbp->data;
212 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
213 	bestsp = xfs_dir2_leaf_bests_p(ltp);
214 	length = xfs_dir2_data_entsize(args->namelen);
215 	/*
216 	 * See if there are any entries with the same hash value
217 	 * and space in their block for the new entry.
218 	 * This is good because it puts multiple same-hash value entries
219 	 * in a data block, improving the lookup of those entries.
220 	 */
221 	for (use_block = -1, lep = &leaf->ents[index];
222 	     index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
223 	     index++, lep++) {
224 		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
225 			continue;
226 		i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
227 		ASSERT(i < be32_to_cpu(ltp->bestcount));
228 		ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
229 		if (be16_to_cpu(bestsp[i]) >= length) {
230 			use_block = i;
231 			break;
232 		}
233 	}
234 	/*
235 	 * Didn't find a block yet, linear search all the data blocks.
236 	 */
237 	if (use_block == -1) {
238 		for (i = 0; i < be32_to_cpu(ltp->bestcount); i++) {
239 			/*
240 			 * Remember a block we see that's missing.
241 			 */
242 			if (be16_to_cpu(bestsp[i]) == NULLDATAOFF && use_block == -1)
243 				use_block = i;
244 			else if (be16_to_cpu(bestsp[i]) >= length) {
245 				use_block = i;
246 				break;
247 			}
248 		}
249 	}
250 	/*
251 	 * How many bytes do we need in the leaf block?
252 	 */
253 	needbytes =
254 		(leaf->hdr.stale ? 0 : (uint)sizeof(leaf->ents[0])) +
255 		(use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0]));
256 	/*
257 	 * Now kill use_block if it refers to a missing block, so we
258 	 * can use it as an indication of allocation needed.
259 	 */
260 	if (use_block != -1 && be16_to_cpu(bestsp[use_block]) == NULLDATAOFF)
261 		use_block = -1;
262 	/*
263 	 * If we don't have enough free bytes but we can make enough
264 	 * by compacting out stale entries, we'll do that.
265 	 */
266 	if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
267 				needbytes && be16_to_cpu(leaf->hdr.stale) > 1) {
268 		compact = 1;
269 	}
270 	/*
271 	 * Otherwise if we don't have enough free bytes we need to
272 	 * convert to node form.
273 	 */
274 	else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(
275 						leaf->hdr.count)] < needbytes) {
276 		/*
277 		 * Just checking or no space reservation, give up.
278 		 */
279 		if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
280 							args->total == 0) {
281 			xfs_da_brelse(tp, lbp);
282 			return XFS_ERROR(ENOSPC);
283 		}
284 		/*
285 		 * Convert to node form.
286 		 */
287 		error = xfs_dir2_leaf_to_node(args, lbp);
288 		xfs_da_buf_done(lbp);
289 		if (error)
290 			return error;
291 		/*
292 		 * Then add the new entry.
293 		 */
294 		return xfs_dir2_node_addname(args);
295 	}
296 	/*
297 	 * Otherwise it will fit without compaction.
298 	 */
299 	else
300 		compact = 0;
301 	/*
302 	 * If just checking, then it will fit unless we needed to allocate
303 	 * a new data block.
304 	 */
305 	if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
306 		xfs_da_brelse(tp, lbp);
307 		return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
308 	}
309 	/*
310 	 * If no allocations are allowed, return now before we've
311 	 * changed anything.
312 	 */
313 	if (args->total == 0 && use_block == -1) {
314 		xfs_da_brelse(tp, lbp);
315 		return XFS_ERROR(ENOSPC);
316 	}
317 	/*
318 	 * Need to compact the leaf entries, removing stale ones.
319 	 * Leave one stale entry behind - the one closest to our
320 	 * insertion index - and we'll shift that one to our insertion
321 	 * point later.
322 	 */
323 	if (compact) {
324 		xfs_dir2_leaf_compact_x1(lbp, &index, &lowstale, &highstale,
325 			&lfloglow, &lfloghigh);
326 	}
327 	/*
328 	 * There are stale entries, so we'll need log-low and log-high
329 	 * impossibly bad values later.
330 	 */
331 	else if (be16_to_cpu(leaf->hdr.stale)) {
332 		lfloglow = be16_to_cpu(leaf->hdr.count);
333 		lfloghigh = -1;
334 	}
335 	/*
336 	 * If there was no data block space found, we need to allocate
337 	 * a new one.
338 	 */
339 	if (use_block == -1) {
340 		/*
341 		 * Add the new data block.
342 		 */
343 		if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
344 				&use_block))) {
345 			xfs_da_brelse(tp, lbp);
346 			return error;
347 		}
348 		/*
349 		 * Initialize the block.
350 		 */
351 		if ((error = xfs_dir2_data_init(args, use_block, &dbp))) {
352 			xfs_da_brelse(tp, lbp);
353 			return error;
354 		}
355 		/*
356 		 * If we're adding a new data block on the end we need to
357 		 * extend the bests table.  Copy it up one entry.
358 		 */
359 		if (use_block >= be32_to_cpu(ltp->bestcount)) {
360 			bestsp--;
361 			memmove(&bestsp[0], &bestsp[1],
362 				be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));
363 			be32_add_cpu(&ltp->bestcount, 1);
364 			xfs_dir2_leaf_log_tail(tp, lbp);
365 			xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
366 		}
367 		/*
368 		 * If we're filling in a previously empty block just log it.
369 		 */
370 		else
371 			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
372 		data = dbp->data;
373 		bestsp[use_block] = data->hdr.bestfree[0].length;
374 		grown = 1;
375 	}
376 	/*
377 	 * Already had space in some data block.
378 	 * Just read that one in.
379 	 */
380 	else {
381 		if ((error =
382 		    xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block),
383 			    -1, &dbp, XFS_DATA_FORK))) {
384 			xfs_da_brelse(tp, lbp);
385 			return error;
386 		}
387 		data = dbp->data;
388 		grown = 0;
389 	}
390 	xfs_dir2_data_check(dp, dbp);
391 	/*
392 	 * Point to the biggest freespace in our data block.
393 	 */
394 	dup = (xfs_dir2_data_unused_t *)
395 	      ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
396 	ASSERT(be16_to_cpu(dup->length) >= length);
397 	needscan = needlog = 0;
398 	/*
399 	 * Mark the initial part of our freespace in use for the new entry.
400 	 */
401 	xfs_dir2_data_use_free(tp, dbp, dup,
402 		(xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
403 		&needlog, &needscan);
404 	/*
405 	 * Initialize our new entry (at last).
406 	 */
407 	dep = (xfs_dir2_data_entry_t *)dup;
408 	dep->inumber = cpu_to_be64(args->inumber);
409 	dep->namelen = args->namelen;
410 	memcpy(dep->name, args->name, dep->namelen);
411 	tagp = xfs_dir2_data_entry_tag_p(dep);
412 	*tagp = cpu_to_be16((char *)dep - (char *)data);
413 	/*
414 	 * Need to scan fix up the bestfree table.
415 	 */
416 	if (needscan)
417 		xfs_dir2_data_freescan(mp, data, &needlog);
418 	/*
419 	 * Need to log the data block's header.
420 	 */
421 	if (needlog)
422 		xfs_dir2_data_log_header(tp, dbp);
423 	xfs_dir2_data_log_entry(tp, dbp, dep);
424 	/*
425 	 * If the bests table needs to be changed, do it.
426 	 * Log the change unless we've already done that.
427 	 */
428 	if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
429 		bestsp[use_block] = data->hdr.bestfree[0].length;
430 		if (!grown)
431 			xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
432 	}
433 	/*
434 	 * Now we need to make room to insert the leaf entry.
435 	 * If there are no stale entries, we just insert a hole at index.
436 	 */
437 	if (!leaf->hdr.stale) {
438 		/*
439 		 * lep is still good as the index leaf entry.
440 		 */
441 		if (index < be16_to_cpu(leaf->hdr.count))
442 			memmove(lep + 1, lep,
443 				(be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
444 		/*
445 		 * Record low and high logging indices for the leaf.
446 		 */
447 		lfloglow = index;
448 		lfloghigh = be16_to_cpu(leaf->hdr.count);
449 		be16_add_cpu(&leaf->hdr.count, 1);
450 	}
451 	/*
452 	 * There are stale entries.
453 	 * We will use one of them for the new entry.
454 	 * It's probably not at the right location, so we'll have to
455 	 * shift some up or down first.
456 	 */
457 	else {
458 		/*
459 		 * If we didn't compact before, we need to find the nearest
460 		 * stale entries before and after our insertion point.
461 		 */
462 		if (compact == 0) {
463 			/*
464 			 * Find the first stale entry before the insertion
465 			 * point, if any.
466 			 */
467 			for (lowstale = index - 1;
468 			     lowstale >= 0 &&
469 				be32_to_cpu(leaf->ents[lowstale].address) !=
470 				XFS_DIR2_NULL_DATAPTR;
471 			     lowstale--)
472 				continue;
473 			/*
474 			 * Find the next stale entry at or after the insertion
475 			 * point, if any.   Stop if we go so far that the
476 			 * lowstale entry would be better.
477 			 */
478 			for (highstale = index;
479 			     highstale < be16_to_cpu(leaf->hdr.count) &&
480 				be32_to_cpu(leaf->ents[highstale].address) !=
481 				XFS_DIR2_NULL_DATAPTR &&
482 				(lowstale < 0 ||
483 				 index - lowstale - 1 >= highstale - index);
484 			     highstale++)
485 				continue;
486 		}
487 		/*
488 		 * If the low one is better, use it.
489 		 */
490 		if (lowstale >= 0 &&
491 		    (highstale == be16_to_cpu(leaf->hdr.count) ||
492 		     index - lowstale - 1 < highstale - index)) {
493 			ASSERT(index - lowstale - 1 >= 0);
494 			ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
495 			       XFS_DIR2_NULL_DATAPTR);
496 			/*
497 			 * Copy entries up to cover the stale entry
498 			 * and make room for the new entry.
499 			 */
500 			if (index - lowstale - 1 > 0)
501 				memmove(&leaf->ents[lowstale],
502 					&leaf->ents[lowstale + 1],
503 					(index - lowstale - 1) * sizeof(*lep));
504 			lep = &leaf->ents[index - 1];
505 			lfloglow = MIN(lowstale, lfloglow);
506 			lfloghigh = MAX(index - 1, lfloghigh);
507 		}
508 		/*
509 		 * The high one is better, so use that one.
510 		 */
511 		else {
512 			ASSERT(highstale - index >= 0);
513 			ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
514 			       XFS_DIR2_NULL_DATAPTR);
515 			/*
516 			 * Copy entries down to cover the stale entry
517 			 * and make room for the new entry.
518 			 */
519 			if (highstale - index > 0)
520 				memmove(&leaf->ents[index + 1],
521 					&leaf->ents[index],
522 					(highstale - index) * sizeof(*lep));
523 			lep = &leaf->ents[index];
524 			lfloglow = MIN(index, lfloglow);
525 			lfloghigh = MAX(highstale, lfloghigh);
526 		}
527 		be16_add_cpu(&leaf->hdr.stale, -1);
528 	}
529 	/*
530 	 * Fill in the new leaf entry.
531 	 */
532 	lep->hashval = cpu_to_be32(args->hashval);
533 	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block,
534 				be16_to_cpu(*tagp)));
535 	/*
536 	 * Log the leaf fields and give up the buffers.
537 	 */
538 	xfs_dir2_leaf_log_header(tp, lbp);
539 	xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
540 	xfs_dir2_leaf_check(dp, lbp);
541 	xfs_da_buf_done(lbp);
542 	xfs_dir2_data_check(dp, dbp);
543 	xfs_da_buf_done(dbp);
544 	return 0;
545 }
546 
547 #ifdef DEBUG
548 /*
549  * Check the internal consistency of a leaf1 block.
550  * Pop an assert if something is wrong.
551  */
552 STATIC void
xfs_dir2_leaf_check(xfs_inode_t * dp,xfs_dabuf_t * bp)553 xfs_dir2_leaf_check(
554 	xfs_inode_t		*dp,		/* incore directory inode */
555 	xfs_dabuf_t		*bp)		/* leaf's buffer */
556 {
557 	int			i;		/* leaf index */
558 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
559 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail pointer */
560 	xfs_mount_t		*mp;		/* filesystem mount point */
561 	int			stale;		/* count of stale leaves */
562 
563 	leaf = bp->data;
564 	mp = dp->i_mount;
565 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
566 	/*
567 	 * This value is not restrictive enough.
568 	 * Should factor in the size of the bests table as well.
569 	 * We can deduce a value for that from di_size.
570 	 */
571 	ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
572 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
573 	/*
574 	 * Leaves and bests don't overlap.
575 	 */
576 	ASSERT((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <=
577 	       (char *)xfs_dir2_leaf_bests_p(ltp));
578 	/*
579 	 * Check hash value order, count stale entries.
580 	 */
581 	for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
582 		if (i + 1 < be16_to_cpu(leaf->hdr.count))
583 			ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
584 			       be32_to_cpu(leaf->ents[i + 1].hashval));
585 		if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
586 			stale++;
587 	}
588 	ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
589 }
590 #endif	/* DEBUG */
591 
592 /*
593  * Compact out any stale entries in the leaf.
594  * Log the header and changed leaf entries, if any.
595  */
596 void
xfs_dir2_leaf_compact(xfs_da_args_t * args,xfs_dabuf_t * bp)597 xfs_dir2_leaf_compact(
598 	xfs_da_args_t	*args,		/* operation arguments */
599 	xfs_dabuf_t	*bp)		/* leaf buffer */
600 {
601 	int		from;		/* source leaf index */
602 	xfs_dir2_leaf_t	*leaf;		/* leaf structure */
603 	int		loglow;		/* first leaf entry to log */
604 	int		to;		/* target leaf index */
605 
606 	leaf = bp->data;
607 	if (!leaf->hdr.stale) {
608 		return;
609 	}
610 	/*
611 	 * Compress out the stale entries in place.
612 	 */
613 	for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
614 		if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
615 			continue;
616 		/*
617 		 * Only actually copy the entries that are different.
618 		 */
619 		if (from > to) {
620 			if (loglow == -1)
621 				loglow = to;
622 			leaf->ents[to] = leaf->ents[from];
623 		}
624 		to++;
625 	}
626 	/*
627 	 * Update and log the header, log the leaf entries.
628 	 */
629 	ASSERT(be16_to_cpu(leaf->hdr.stale) == from - to);
630 	be16_add_cpu(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale)));
631 	leaf->hdr.stale = 0;
632 	xfs_dir2_leaf_log_header(args->trans, bp);
633 	if (loglow != -1)
634 		xfs_dir2_leaf_log_ents(args->trans, bp, loglow, to - 1);
635 }
636 
637 /*
638  * Compact the leaf entries, removing stale ones.
639  * Leave one stale entry behind - the one closest to our
640  * insertion index - and the caller will shift that one to our insertion
641  * point later.
642  * Return new insertion index, where the remaining stale entry is,
643  * and leaf logging indices.
644  */
645 void
xfs_dir2_leaf_compact_x1(xfs_dabuf_t * bp,int * indexp,int * lowstalep,int * highstalep,int * lowlogp,int * highlogp)646 xfs_dir2_leaf_compact_x1(
647 	xfs_dabuf_t	*bp,		/* leaf buffer */
648 	int		*indexp,	/* insertion index */
649 	int		*lowstalep,	/* out: stale entry before us */
650 	int		*highstalep,	/* out: stale entry after us */
651 	int		*lowlogp,	/* out: low log index */
652 	int		*highlogp)	/* out: high log index */
653 {
654 	int		from;		/* source copy index */
655 	int		highstale;	/* stale entry at/after index */
656 	int		index;		/* insertion index */
657 	int		keepstale;	/* source index of kept stale */
658 	xfs_dir2_leaf_t	*leaf;		/* leaf structure */
659 	int		lowstale;	/* stale entry before index */
660 	int		newindex=0;	/* new insertion index */
661 	int		to;		/* destination copy index */
662 
663 	leaf = bp->data;
664 	ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
665 	index = *indexp;
666 	/*
667 	 * Find the first stale entry before our index, if any.
668 	 */
669 	for (lowstale = index - 1;
670 	     lowstale >= 0 &&
671 		be32_to_cpu(leaf->ents[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
672 	     lowstale--)
673 		continue;
674 	/*
675 	 * Find the first stale entry at or after our index, if any.
676 	 * Stop if the answer would be worse than lowstale.
677 	 */
678 	for (highstale = index;
679 	     highstale < be16_to_cpu(leaf->hdr.count) &&
680 		be32_to_cpu(leaf->ents[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
681 		(lowstale < 0 || index - lowstale > highstale - index);
682 	     highstale++)
683 		continue;
684 	/*
685 	 * Pick the better of lowstale and highstale.
686 	 */
687 	if (lowstale >= 0 &&
688 	    (highstale == be16_to_cpu(leaf->hdr.count) ||
689 	     index - lowstale <= highstale - index))
690 		keepstale = lowstale;
691 	else
692 		keepstale = highstale;
693 	/*
694 	 * Copy the entries in place, removing all the stale entries
695 	 * except keepstale.
696 	 */
697 	for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
698 		/*
699 		 * Notice the new value of index.
700 		 */
701 		if (index == from)
702 			newindex = to;
703 		if (from != keepstale &&
704 		    be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) {
705 			if (from == to)
706 				*lowlogp = to;
707 			continue;
708 		}
709 		/*
710 		 * Record the new keepstale value for the insertion.
711 		 */
712 		if (from == keepstale)
713 			lowstale = highstale = to;
714 		/*
715 		 * Copy only the entries that have moved.
716 		 */
717 		if (from > to)
718 			leaf->ents[to] = leaf->ents[from];
719 		to++;
720 	}
721 	ASSERT(from > to);
722 	/*
723 	 * If the insertion point was past the last entry,
724 	 * set the new insertion point accordingly.
725 	 */
726 	if (index == from)
727 		newindex = to;
728 	*indexp = newindex;
729 	/*
730 	 * Adjust the leaf header values.
731 	 */
732 	be16_add_cpu(&leaf->hdr.count, -(from - to));
733 	leaf->hdr.stale = cpu_to_be16(1);
734 	/*
735 	 * Remember the low/high stale value only in the "right"
736 	 * direction.
737 	 */
738 	if (lowstale >= newindex)
739 		lowstale = -1;
740 	else
741 		highstale = be16_to_cpu(leaf->hdr.count);
742 	*highlogp = be16_to_cpu(leaf->hdr.count) - 1;
743 	*lowstalep = lowstale;
744 	*highstalep = highstale;
745 }
746 
747 /*
748  * Getdents (readdir) for leaf and node directories.
749  * This reads the data blocks only, so is the same for both forms.
750  */
751 int						/* error */
xfs_dir2_leaf_getdents(xfs_inode_t * dp,void * dirent,size_t bufsize,xfs_off_t * offset,filldir_t filldir)752 xfs_dir2_leaf_getdents(
753 	xfs_inode_t		*dp,		/* incore directory inode */
754 	void			*dirent,
755 	size_t			bufsize,
756 	xfs_off_t		*offset,
757 	filldir_t		filldir)
758 {
759 	xfs_dabuf_t		*bp;		/* data block buffer */
760 	int			byteoff;	/* offset in current block */
761 	xfs_dir2_db_t		curdb;		/* db for current block */
762 	xfs_dir2_off_t		curoff;		/* current overall offset */
763 	xfs_dir2_data_t		*data;		/* data block structure */
764 	xfs_dir2_data_entry_t	*dep;		/* data entry */
765 	xfs_dir2_data_unused_t	*dup;		/* unused entry */
766 	int			error = 0;	/* error return value */
767 	int			i;		/* temporary loop index */
768 	int			j;		/* temporary loop index */
769 	int			length;		/* temporary length value */
770 	xfs_bmbt_irec_t		*map;		/* map vector for blocks */
771 	xfs_extlen_t		map_blocks;	/* number of fsbs in map */
772 	xfs_dablk_t		map_off;	/* last mapped file offset */
773 	int			map_size;	/* total entries in *map */
774 	int			map_valid;	/* valid entries in *map */
775 	xfs_mount_t		*mp;		/* filesystem mount point */
776 	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
777 	int			nmap;		/* mappings to ask xfs_bmapi */
778 	char			*ptr = NULL;	/* pointer to current data */
779 	int			ra_current;	/* number of read-ahead blks */
780 	int			ra_index;	/* *map index for read-ahead */
781 	int			ra_offset;	/* map entry offset for ra */
782 	int			ra_want;	/* readahead count wanted */
783 
784 	/*
785 	 * If the offset is at or past the largest allowed value,
786 	 * give up right away.
787 	 */
788 	if (*offset >= XFS_DIR2_MAX_DATAPTR)
789 		return 0;
790 
791 	mp = dp->i_mount;
792 
793 	/*
794 	 * Set up to bmap a number of blocks based on the caller's
795 	 * buffer size, the directory block size, and the filesystem
796 	 * block size.
797 	 */
798 	map_size = howmany(bufsize + mp->m_dirblksize, mp->m_sb.sb_blocksize);
799 	map = kmem_alloc(map_size * sizeof(*map), KM_SLEEP);
800 	map_valid = ra_index = ra_offset = ra_current = map_blocks = 0;
801 	bp = NULL;
802 
803 	/*
804 	 * Inside the loop we keep the main offset value as a byte offset
805 	 * in the directory file.
806 	 */
807 	curoff = xfs_dir2_dataptr_to_byte(mp, *offset);
808 
809 	/*
810 	 * Force this conversion through db so we truncate the offset
811 	 * down to get the start of the data block.
812 	 */
813 	map_off = xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, curoff));
814 	/*
815 	 * Loop over directory entries until we reach the end offset.
816 	 * Get more blocks and readahead as necessary.
817 	 */
818 	while (curoff < XFS_DIR2_LEAF_OFFSET) {
819 		/*
820 		 * If we have no buffer, or we're off the end of the
821 		 * current buffer, need to get another one.
822 		 */
823 		if (!bp || ptr >= (char *)bp->data + mp->m_dirblksize) {
824 			/*
825 			 * If we have a buffer, we need to release it and
826 			 * take it out of the mapping.
827 			 */
828 			if (bp) {
829 				xfs_da_brelse(NULL, bp);
830 				bp = NULL;
831 				map_blocks -= mp->m_dirblkfsbs;
832 				/*
833 				 * Loop to get rid of the extents for the
834 				 * directory block.
835 				 */
836 				for (i = mp->m_dirblkfsbs; i > 0; ) {
837 					j = MIN((int)map->br_blockcount, i);
838 					map->br_blockcount -= j;
839 					map->br_startblock += j;
840 					map->br_startoff += j;
841 					/*
842 					 * If mapping is done, pitch it from
843 					 * the table.
844 					 */
845 					if (!map->br_blockcount && --map_valid)
846 						memmove(&map[0], &map[1],
847 							sizeof(map[0]) *
848 							map_valid);
849 					i -= j;
850 				}
851 			}
852 			/*
853 			 * Recalculate the readahead blocks wanted.
854 			 */
855 			ra_want = howmany(bufsize + mp->m_dirblksize,
856 					  mp->m_sb.sb_blocksize) - 1;
857 			ASSERT(ra_want >= 0);
858 
859 			/*
860 			 * If we don't have as many as we want, and we haven't
861 			 * run out of data blocks, get some more mappings.
862 			 */
863 			if (1 + ra_want > map_blocks &&
864 			    map_off <
865 			    xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
866 				/*
867 				 * Get more bmaps, fill in after the ones
868 				 * we already have in the table.
869 				 */
870 				nmap = map_size - map_valid;
871 				error = xfs_bmapi(NULL, dp,
872 					map_off,
873 					xfs_dir2_byte_to_da(mp,
874 						XFS_DIR2_LEAF_OFFSET) - map_off,
875 					XFS_BMAPI_METADATA, NULL, 0,
876 					&map[map_valid], &nmap, NULL);
877 				/*
878 				 * Don't know if we should ignore this or
879 				 * try to return an error.
880 				 * The trouble with returning errors
881 				 * is that readdir will just stop without
882 				 * actually passing the error through.
883 				 */
884 				if (error)
885 					break;	/* XXX */
886 				/*
887 				 * If we got all the mappings we asked for,
888 				 * set the final map offset based on the
889 				 * last bmap value received.
890 				 * Otherwise, we've reached the end.
891 				 */
892 				if (nmap == map_size - map_valid)
893 					map_off =
894 					map[map_valid + nmap - 1].br_startoff +
895 					map[map_valid + nmap - 1].br_blockcount;
896 				else
897 					map_off =
898 						xfs_dir2_byte_to_da(mp,
899 							XFS_DIR2_LEAF_OFFSET);
900 				/*
901 				 * Look for holes in the mapping, and
902 				 * eliminate them.  Count up the valid blocks.
903 				 */
904 				for (i = map_valid; i < map_valid + nmap; ) {
905 					if (map[i].br_startblock ==
906 					    HOLESTARTBLOCK) {
907 						nmap--;
908 						length = map_valid + nmap - i;
909 						if (length)
910 							memmove(&map[i],
911 								&map[i + 1],
912 								sizeof(map[i]) *
913 								length);
914 					} else {
915 						map_blocks +=
916 							map[i].br_blockcount;
917 						i++;
918 					}
919 				}
920 				map_valid += nmap;
921 			}
922 			/*
923 			 * No valid mappings, so no more data blocks.
924 			 */
925 			if (!map_valid) {
926 				curoff = xfs_dir2_da_to_byte(mp, map_off);
927 				break;
928 			}
929 			/*
930 			 * Read the directory block starting at the first
931 			 * mapping.
932 			 */
933 			curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
934 			error = xfs_da_read_buf(NULL, dp, map->br_startoff,
935 				map->br_blockcount >= mp->m_dirblkfsbs ?
936 				    XFS_FSB_TO_DADDR(mp, map->br_startblock) :
937 				    -1,
938 				&bp, XFS_DATA_FORK);
939 			/*
940 			 * Should just skip over the data block instead
941 			 * of giving up.
942 			 */
943 			if (error)
944 				break;	/* XXX */
945 			/*
946 			 * Adjust the current amount of read-ahead: we just
947 			 * read a block that was previously ra.
948 			 */
949 			if (ra_current)
950 				ra_current -= mp->m_dirblkfsbs;
951 			/*
952 			 * Do we need more readahead?
953 			 */
954 			for (ra_index = ra_offset = i = 0;
955 			     ra_want > ra_current && i < map_blocks;
956 			     i += mp->m_dirblkfsbs) {
957 				ASSERT(ra_index < map_valid);
958 				/*
959 				 * Read-ahead a contiguous directory block.
960 				 */
961 				if (i > ra_current &&
962 				    map[ra_index].br_blockcount >=
963 				    mp->m_dirblkfsbs) {
964 					xfs_buf_readahead(mp->m_ddev_targp,
965 						XFS_FSB_TO_DADDR(mp,
966 						   map[ra_index].br_startblock +
967 						   ra_offset),
968 						(int)BTOBB(mp->m_dirblksize));
969 					ra_current = i;
970 				}
971 				/*
972 				 * Read-ahead a non-contiguous directory block.
973 				 * This doesn't use our mapping, but this
974 				 * is a very rare case.
975 				 */
976 				else if (i > ra_current) {
977 					(void)xfs_da_reada_buf(NULL, dp,
978 						map[ra_index].br_startoff +
979 						ra_offset, XFS_DATA_FORK);
980 					ra_current = i;
981 				}
982 				/*
983 				 * Advance offset through the mapping table.
984 				 */
985 				for (j = 0; j < mp->m_dirblkfsbs; j++) {
986 					/*
987 					 * The rest of this extent but not
988 					 * more than a dir block.
989 					 */
990 					length = MIN(mp->m_dirblkfsbs,
991 						(int)(map[ra_index].br_blockcount -
992 						ra_offset));
993 					j += length;
994 					ra_offset += length;
995 					/*
996 					 * Advance to the next mapping if
997 					 * this one is used up.
998 					 */
999 					if (ra_offset ==
1000 					    map[ra_index].br_blockcount) {
1001 						ra_offset = 0;
1002 						ra_index++;
1003 					}
1004 				}
1005 			}
1006 			/*
1007 			 * Having done a read, we need to set a new offset.
1008 			 */
1009 			newoff = xfs_dir2_db_off_to_byte(mp, curdb, 0);
1010 			/*
1011 			 * Start of the current block.
1012 			 */
1013 			if (curoff < newoff)
1014 				curoff = newoff;
1015 			/*
1016 			 * Make sure we're in the right block.
1017 			 */
1018 			else if (curoff > newoff)
1019 				ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
1020 				       curdb);
1021 			data = bp->data;
1022 			xfs_dir2_data_check(dp, bp);
1023 			/*
1024 			 * Find our position in the block.
1025 			 */
1026 			ptr = (char *)&data->u;
1027 			byteoff = xfs_dir2_byte_to_off(mp, curoff);
1028 			/*
1029 			 * Skip past the header.
1030 			 */
1031 			if (byteoff == 0)
1032 				curoff += (uint)sizeof(data->hdr);
1033 			/*
1034 			 * Skip past entries until we reach our offset.
1035 			 */
1036 			else {
1037 				while ((char *)ptr - (char *)data < byteoff) {
1038 					dup = (xfs_dir2_data_unused_t *)ptr;
1039 
1040 					if (be16_to_cpu(dup->freetag)
1041 						  == XFS_DIR2_DATA_FREE_TAG) {
1042 
1043 						length = be16_to_cpu(dup->length);
1044 						ptr += length;
1045 						continue;
1046 					}
1047 					dep = (xfs_dir2_data_entry_t *)ptr;
1048 					length =
1049 					   xfs_dir2_data_entsize(dep->namelen);
1050 					ptr += length;
1051 				}
1052 				/*
1053 				 * Now set our real offset.
1054 				 */
1055 				curoff =
1056 					xfs_dir2_db_off_to_byte(mp,
1057 					    xfs_dir2_byte_to_db(mp, curoff),
1058 					    (char *)ptr - (char *)data);
1059 				if (ptr >= (char *)data + mp->m_dirblksize) {
1060 					continue;
1061 				}
1062 			}
1063 		}
1064 		/*
1065 		 * We have a pointer to an entry.
1066 		 * Is it a live one?
1067 		 */
1068 		dup = (xfs_dir2_data_unused_t *)ptr;
1069 		/*
1070 		 * No, it's unused, skip over it.
1071 		 */
1072 		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
1073 			length = be16_to_cpu(dup->length);
1074 			ptr += length;
1075 			curoff += length;
1076 			continue;
1077 		}
1078 
1079 		dep = (xfs_dir2_data_entry_t *)ptr;
1080 		length = xfs_dir2_data_entsize(dep->namelen);
1081 
1082 		if (filldir(dirent, (char *)dep->name, dep->namelen,
1083 			    xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff,
1084 			    be64_to_cpu(dep->inumber), DT_UNKNOWN))
1085 			break;
1086 
1087 		/*
1088 		 * Advance to next entry in the block.
1089 		 */
1090 		ptr += length;
1091 		curoff += length;
1092 		/* bufsize may have just been a guess; don't go negative */
1093 		bufsize = bufsize > length ? bufsize - length : 0;
1094 	}
1095 
1096 	/*
1097 	 * All done.  Set output offset value to current offset.
1098 	 */
1099 	if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
1100 		*offset = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
1101 	else
1102 		*offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
1103 	kmem_free(map);
1104 	if (bp)
1105 		xfs_da_brelse(NULL, bp);
1106 	return error;
1107 }
1108 
1109 /*
1110  * Initialize a new leaf block, leaf1 or leafn magic accepted.
1111  */
1112 int
xfs_dir2_leaf_init(xfs_da_args_t * args,xfs_dir2_db_t bno,xfs_dabuf_t ** bpp,int magic)1113 xfs_dir2_leaf_init(
1114 	xfs_da_args_t		*args,		/* operation arguments */
1115 	xfs_dir2_db_t		bno,		/* directory block number */
1116 	xfs_dabuf_t		**bpp,		/* out: leaf buffer */
1117 	int			magic)		/* magic number for block */
1118 {
1119 	xfs_dabuf_t		*bp;		/* leaf buffer */
1120 	xfs_inode_t		*dp;		/* incore directory inode */
1121 	int			error;		/* error return code */
1122 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1123 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1124 	xfs_mount_t		*mp;		/* filesystem mount point */
1125 	xfs_trans_t		*tp;		/* transaction pointer */
1126 
1127 	dp = args->dp;
1128 	ASSERT(dp != NULL);
1129 	tp = args->trans;
1130 	mp = dp->i_mount;
1131 	ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) &&
1132 	       bno < XFS_DIR2_FREE_FIRSTDB(mp));
1133 	/*
1134 	 * Get the buffer for the block.
1135 	 */
1136 	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp,
1137 		XFS_DATA_FORK);
1138 	if (error) {
1139 		return error;
1140 	}
1141 	ASSERT(bp != NULL);
1142 	leaf = bp->data;
1143 	/*
1144 	 * Initialize the header.
1145 	 */
1146 	leaf->hdr.info.magic = cpu_to_be16(magic);
1147 	leaf->hdr.info.forw = 0;
1148 	leaf->hdr.info.back = 0;
1149 	leaf->hdr.count = 0;
1150 	leaf->hdr.stale = 0;
1151 	xfs_dir2_leaf_log_header(tp, bp);
1152 	/*
1153 	 * If it's a leaf-format directory initialize the tail.
1154 	 * In this case our caller has the real bests table to copy into
1155 	 * the block.
1156 	 */
1157 	if (magic == XFS_DIR2_LEAF1_MAGIC) {
1158 		ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1159 		ltp->bestcount = 0;
1160 		xfs_dir2_leaf_log_tail(tp, bp);
1161 	}
1162 	*bpp = bp;
1163 	return 0;
1164 }
1165 
1166 /*
1167  * Log the bests entries indicated from a leaf1 block.
1168  */
1169 static void
xfs_dir2_leaf_log_bests(xfs_trans_t * tp,xfs_dabuf_t * bp,int first,int last)1170 xfs_dir2_leaf_log_bests(
1171 	xfs_trans_t		*tp,		/* transaction pointer */
1172 	xfs_dabuf_t		*bp,		/* leaf buffer */
1173 	int			first,		/* first entry to log */
1174 	int			last)		/* last entry to log */
1175 {
1176 	__be16			*firstb;	/* pointer to first entry */
1177 	__be16			*lastb;		/* pointer to last entry */
1178 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1179 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1180 
1181 	leaf = bp->data;
1182 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
1183 	ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
1184 	firstb = xfs_dir2_leaf_bests_p(ltp) + first;
1185 	lastb = xfs_dir2_leaf_bests_p(ltp) + last;
1186 	xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
1187 		(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
1188 }
1189 
1190 /*
1191  * Log the leaf entries indicated from a leaf1 or leafn block.
1192  */
1193 void
xfs_dir2_leaf_log_ents(xfs_trans_t * tp,xfs_dabuf_t * bp,int first,int last)1194 xfs_dir2_leaf_log_ents(
1195 	xfs_trans_t		*tp,		/* transaction pointer */
1196 	xfs_dabuf_t		*bp,		/* leaf buffer */
1197 	int			first,		/* first entry to log */
1198 	int			last)		/* last entry to log */
1199 {
1200 	xfs_dir2_leaf_entry_t	*firstlep;	/* pointer to first entry */
1201 	xfs_dir2_leaf_entry_t	*lastlep;	/* pointer to last entry */
1202 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1203 
1204 	leaf = bp->data;
1205 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
1206 	       be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
1207 	firstlep = &leaf->ents[first];
1208 	lastlep = &leaf->ents[last];
1209 	xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
1210 		(uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
1211 }
1212 
1213 /*
1214  * Log the header of the leaf1 or leafn block.
1215  */
1216 void
xfs_dir2_leaf_log_header(xfs_trans_t * tp,xfs_dabuf_t * bp)1217 xfs_dir2_leaf_log_header(
1218 	xfs_trans_t		*tp,		/* transaction pointer */
1219 	xfs_dabuf_t		*bp)		/* leaf buffer */
1220 {
1221 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1222 
1223 	leaf = bp->data;
1224 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
1225 	       be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
1226 	xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
1227 		(uint)(sizeof(leaf->hdr) - 1));
1228 }
1229 
1230 /*
1231  * Log the tail of the leaf1 block.
1232  */
1233 STATIC void
xfs_dir2_leaf_log_tail(xfs_trans_t * tp,xfs_dabuf_t * bp)1234 xfs_dir2_leaf_log_tail(
1235 	xfs_trans_t		*tp,		/* transaction pointer */
1236 	xfs_dabuf_t		*bp)		/* leaf buffer */
1237 {
1238 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1239 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1240 	xfs_mount_t		*mp;		/* filesystem mount point */
1241 
1242 	mp = tp->t_mountp;
1243 	leaf = bp->data;
1244 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
1245 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1246 	xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
1247 		(uint)(mp->m_dirblksize - 1));
1248 }
1249 
1250 /*
1251  * Look up the entry referred to by args in the leaf format directory.
1252  * Most of the work is done by the xfs_dir2_leaf_lookup_int routine which
1253  * is also used by the node-format code.
1254  */
1255 int
xfs_dir2_leaf_lookup(xfs_da_args_t * args)1256 xfs_dir2_leaf_lookup(
1257 	xfs_da_args_t		*args)		/* operation arguments */
1258 {
1259 	xfs_dabuf_t		*dbp;		/* data block buffer */
1260 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
1261 	xfs_inode_t		*dp;		/* incore directory inode */
1262 	int			error;		/* error return code */
1263 	int			index;		/* found entry index */
1264 	xfs_dabuf_t		*lbp;		/* leaf buffer */
1265 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1266 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1267 	xfs_trans_t		*tp;		/* transaction pointer */
1268 
1269 	trace_xfs_dir2_leaf_lookup(args);
1270 
1271 	/*
1272 	 * Look up name in the leaf block, returning both buffers and index.
1273 	 */
1274 	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1275 		return error;
1276 	}
1277 	tp = args->trans;
1278 	dp = args->dp;
1279 	xfs_dir2_leaf_check(dp, lbp);
1280 	leaf = lbp->data;
1281 	/*
1282 	 * Get to the leaf entry and contained data entry address.
1283 	 */
1284 	lep = &leaf->ents[index];
1285 	/*
1286 	 * Point to the data entry.
1287 	 */
1288 	dep = (xfs_dir2_data_entry_t *)
1289 	      ((char *)dbp->data +
1290 	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1291 	/*
1292 	 * Return the found inode number & CI name if appropriate
1293 	 */
1294 	args->inumber = be64_to_cpu(dep->inumber);
1295 	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1296 	xfs_da_brelse(tp, dbp);
1297 	xfs_da_brelse(tp, lbp);
1298 	return XFS_ERROR(error);
1299 }
1300 
1301 /*
1302  * Look up name/hash in the leaf block.
1303  * Fill in indexp with the found index, and dbpp with the data buffer.
1304  * If not found dbpp will be NULL, and ENOENT comes back.
1305  * lbpp will always be filled in with the leaf buffer unless there's an error.
1306  */
1307 static int					/* error */
xfs_dir2_leaf_lookup_int(xfs_da_args_t * args,xfs_dabuf_t ** lbpp,int * indexp,xfs_dabuf_t ** dbpp)1308 xfs_dir2_leaf_lookup_int(
1309 	xfs_da_args_t		*args,		/* operation arguments */
1310 	xfs_dabuf_t		**lbpp,		/* out: leaf buffer */
1311 	int			*indexp,	/* out: index in leaf block */
1312 	xfs_dabuf_t		**dbpp)		/* out: data buffer */
1313 {
1314 	xfs_dir2_db_t		curdb = -1;	/* current data block number */
1315 	xfs_dabuf_t		*dbp = NULL;	/* data buffer */
1316 	xfs_dir2_data_entry_t	*dep;		/* data entry */
1317 	xfs_inode_t		*dp;		/* incore directory inode */
1318 	int			error;		/* error return code */
1319 	int			index;		/* index in leaf block */
1320 	xfs_dabuf_t		*lbp;		/* leaf buffer */
1321 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1322 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1323 	xfs_mount_t		*mp;		/* filesystem mount point */
1324 	xfs_dir2_db_t		newdb;		/* new data block number */
1325 	xfs_trans_t		*tp;		/* transaction pointer */
1326 	xfs_dir2_db_t		cidb = -1;	/* case match data block no. */
1327 	enum xfs_dacmp		cmp;		/* name compare result */
1328 
1329 	dp = args->dp;
1330 	tp = args->trans;
1331 	mp = dp->i_mount;
1332 	/*
1333 	 * Read the leaf block into the buffer.
1334 	 */
1335 	error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
1336 							XFS_DATA_FORK);
1337 	if (error)
1338 		return error;
1339 	*lbpp = lbp;
1340 	leaf = lbp->data;
1341 	xfs_dir2_leaf_check(dp, lbp);
1342 	/*
1343 	 * Look for the first leaf entry with our hash value.
1344 	 */
1345 	index = xfs_dir2_leaf_search_hash(args, lbp);
1346 	/*
1347 	 * Loop over all the entries with the right hash value
1348 	 * looking to match the name.
1349 	 */
1350 	for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
1351 				be32_to_cpu(lep->hashval) == args->hashval;
1352 				lep++, index++) {
1353 		/*
1354 		 * Skip over stale leaf entries.
1355 		 */
1356 		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
1357 			continue;
1358 		/*
1359 		 * Get the new data block number.
1360 		 */
1361 		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1362 		/*
1363 		 * If it's not the same as the old data block number,
1364 		 * need to pitch the old one and read the new one.
1365 		 */
1366 		if (newdb != curdb) {
1367 			if (dbp)
1368 				xfs_da_brelse(tp, dbp);
1369 			error = xfs_da_read_buf(tp, dp,
1370 						xfs_dir2_db_to_da(mp, newdb),
1371 						-1, &dbp, XFS_DATA_FORK);
1372 			if (error) {
1373 				xfs_da_brelse(tp, lbp);
1374 				return error;
1375 			}
1376 			xfs_dir2_data_check(dp, dbp);
1377 			curdb = newdb;
1378 		}
1379 		/*
1380 		 * Point to the data entry.
1381 		 */
1382 		dep = (xfs_dir2_data_entry_t *)((char *)dbp->data +
1383 			xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1384 		/*
1385 		 * Compare name and if it's an exact match, return the index
1386 		 * and buffer. If it's the first case-insensitive match, store
1387 		 * the index and buffer and continue looking for an exact match.
1388 		 */
1389 		cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
1390 		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
1391 			args->cmpresult = cmp;
1392 			*indexp = index;
1393 			/* case exact match: return the current buffer. */
1394 			if (cmp == XFS_CMP_EXACT) {
1395 				*dbpp = dbp;
1396 				return 0;
1397 			}
1398 			cidb = curdb;
1399 		}
1400 	}
1401 	ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1402 	/*
1403 	 * Here, we can only be doing a lookup (not a rename or remove).
1404 	 * If a case-insensitive match was found earlier, re-read the
1405 	 * appropriate data block if required and return it.
1406 	 */
1407 	if (args->cmpresult == XFS_CMP_CASE) {
1408 		ASSERT(cidb != -1);
1409 		if (cidb != curdb) {
1410 			xfs_da_brelse(tp, dbp);
1411 			error = xfs_da_read_buf(tp, dp,
1412 						xfs_dir2_db_to_da(mp, cidb),
1413 						-1, &dbp, XFS_DATA_FORK);
1414 			if (error) {
1415 				xfs_da_brelse(tp, lbp);
1416 				return error;
1417 			}
1418 		}
1419 		*dbpp = dbp;
1420 		return 0;
1421 	}
1422 	/*
1423 	 * No match found, return ENOENT.
1424 	 */
1425 	ASSERT(cidb == -1);
1426 	if (dbp)
1427 		xfs_da_brelse(tp, dbp);
1428 	xfs_da_brelse(tp, lbp);
1429 	return XFS_ERROR(ENOENT);
1430 }
1431 
1432 /*
1433  * Remove an entry from a leaf format directory.
1434  */
1435 int						/* error */
xfs_dir2_leaf_removename(xfs_da_args_t * args)1436 xfs_dir2_leaf_removename(
1437 	xfs_da_args_t		*args)		/* operation arguments */
1438 {
1439 	__be16			*bestsp;	/* leaf block best freespace */
1440 	xfs_dir2_data_t		*data;		/* data block structure */
1441 	xfs_dir2_db_t		db;		/* data block number */
1442 	xfs_dabuf_t		*dbp;		/* data block buffer */
1443 	xfs_dir2_data_entry_t	*dep;		/* data entry structure */
1444 	xfs_inode_t		*dp;		/* incore directory inode */
1445 	int			error;		/* error return code */
1446 	xfs_dir2_db_t		i;		/* temporary data block # */
1447 	int			index;		/* index into leaf entries */
1448 	xfs_dabuf_t		*lbp;		/* leaf buffer */
1449 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1450 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1451 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1452 	xfs_mount_t		*mp;		/* filesystem mount point */
1453 	int			needlog;	/* need to log data header */
1454 	int			needscan;	/* need to rescan data frees */
1455 	xfs_dir2_data_off_t	oldbest;	/* old value of best free */
1456 	xfs_trans_t		*tp;		/* transaction pointer */
1457 
1458 	trace_xfs_dir2_leaf_removename(args);
1459 
1460 	/*
1461 	 * Lookup the leaf entry, get the leaf and data blocks read in.
1462 	 */
1463 	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1464 		return error;
1465 	}
1466 	dp = args->dp;
1467 	tp = args->trans;
1468 	mp = dp->i_mount;
1469 	leaf = lbp->data;
1470 	data = dbp->data;
1471 	xfs_dir2_data_check(dp, dbp);
1472 	/*
1473 	 * Point to the leaf entry, use that to point to the data entry.
1474 	 */
1475 	lep = &leaf->ents[index];
1476 	db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1477 	dep = (xfs_dir2_data_entry_t *)
1478 	      ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1479 	needscan = needlog = 0;
1480 	oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
1481 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1482 	bestsp = xfs_dir2_leaf_bests_p(ltp);
1483 	ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
1484 	/*
1485 	 * Mark the former data entry unused.
1486 	 */
1487 	xfs_dir2_data_make_free(tp, dbp,
1488 		(xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
1489 		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
1490 	/*
1491 	 * We just mark the leaf entry stale by putting a null in it.
1492 	 */
1493 	be16_add_cpu(&leaf->hdr.stale, 1);
1494 	xfs_dir2_leaf_log_header(tp, lbp);
1495 	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
1496 	xfs_dir2_leaf_log_ents(tp, lbp, index, index);
1497 	/*
1498 	 * Scan the freespace in the data block again if necessary,
1499 	 * log the data block header if necessary.
1500 	 */
1501 	if (needscan)
1502 		xfs_dir2_data_freescan(mp, data, &needlog);
1503 	if (needlog)
1504 		xfs_dir2_data_log_header(tp, dbp);
1505 	/*
1506 	 * If the longest freespace in the data block has changed,
1507 	 * put the new value in the bests table and log that.
1508 	 */
1509 	if (be16_to_cpu(data->hdr.bestfree[0].length) != oldbest) {
1510 		bestsp[db] = data->hdr.bestfree[0].length;
1511 		xfs_dir2_leaf_log_bests(tp, lbp, db, db);
1512 	}
1513 	xfs_dir2_data_check(dp, dbp);
1514 	/*
1515 	 * If the data block is now empty then get rid of the data block.
1516 	 */
1517 	if (be16_to_cpu(data->hdr.bestfree[0].length) ==
1518 	    mp->m_dirblksize - (uint)sizeof(data->hdr)) {
1519 		ASSERT(db != mp->m_dirdatablk);
1520 		if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1521 			/*
1522 			 * Nope, can't get rid of it because it caused
1523 			 * allocation of a bmap btree block to do so.
1524 			 * Just go on, returning success, leaving the
1525 			 * empty block in place.
1526 			 */
1527 			if (error == ENOSPC && args->total == 0) {
1528 				xfs_da_buf_done(dbp);
1529 				error = 0;
1530 			}
1531 			xfs_dir2_leaf_check(dp, lbp);
1532 			xfs_da_buf_done(lbp);
1533 			return error;
1534 		}
1535 		dbp = NULL;
1536 		/*
1537 		 * If this is the last data block then compact the
1538 		 * bests table by getting rid of entries.
1539 		 */
1540 		if (db == be32_to_cpu(ltp->bestcount) - 1) {
1541 			/*
1542 			 * Look for the last active entry (i).
1543 			 */
1544 			for (i = db - 1; i > 0; i--) {
1545 				if (be16_to_cpu(bestsp[i]) != NULLDATAOFF)
1546 					break;
1547 			}
1548 			/*
1549 			 * Copy the table down so inactive entries at the
1550 			 * end are removed.
1551 			 */
1552 			memmove(&bestsp[db - i], bestsp,
1553 				(be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));
1554 			be32_add_cpu(&ltp->bestcount, -(db - i));
1555 			xfs_dir2_leaf_log_tail(tp, lbp);
1556 			xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1557 		} else
1558 			bestsp[db] = cpu_to_be16(NULLDATAOFF);
1559 	}
1560 	/*
1561 	 * If the data block was not the first one, drop it.
1562 	 */
1563 	else if (db != mp->m_dirdatablk && dbp != NULL) {
1564 		xfs_da_buf_done(dbp);
1565 		dbp = NULL;
1566 	}
1567 	xfs_dir2_leaf_check(dp, lbp);
1568 	/*
1569 	 * See if we can convert to block form.
1570 	 */
1571 	return xfs_dir2_leaf_to_block(args, lbp, dbp);
1572 }
1573 
1574 /*
1575  * Replace the inode number in a leaf format directory entry.
1576  */
1577 int						/* error */
xfs_dir2_leaf_replace(xfs_da_args_t * args)1578 xfs_dir2_leaf_replace(
1579 	xfs_da_args_t		*args)		/* operation arguments */
1580 {
1581 	xfs_dabuf_t		*dbp;		/* data block buffer */
1582 	xfs_dir2_data_entry_t	*dep;		/* data block entry */
1583 	xfs_inode_t		*dp;		/* incore directory inode */
1584 	int			error;		/* error return code */
1585 	int			index;		/* index of leaf entry */
1586 	xfs_dabuf_t		*lbp;		/* leaf buffer */
1587 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1588 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1589 	xfs_trans_t		*tp;		/* transaction pointer */
1590 
1591 	trace_xfs_dir2_leaf_replace(args);
1592 
1593 	/*
1594 	 * Look up the entry.
1595 	 */
1596 	if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
1597 		return error;
1598 	}
1599 	dp = args->dp;
1600 	leaf = lbp->data;
1601 	/*
1602 	 * Point to the leaf entry, get data address from it.
1603 	 */
1604 	lep = &leaf->ents[index];
1605 	/*
1606 	 * Point to the data entry.
1607 	 */
1608 	dep = (xfs_dir2_data_entry_t *)
1609 	      ((char *)dbp->data +
1610 	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1611 	ASSERT(args->inumber != be64_to_cpu(dep->inumber));
1612 	/*
1613 	 * Put the new inode number in, log it.
1614 	 */
1615 	dep->inumber = cpu_to_be64(args->inumber);
1616 	tp = args->trans;
1617 	xfs_dir2_data_log_entry(tp, dbp, dep);
1618 	xfs_da_buf_done(dbp);
1619 	xfs_dir2_leaf_check(dp, lbp);
1620 	xfs_da_brelse(tp, lbp);
1621 	return 0;
1622 }
1623 
1624 /*
1625  * Return index in the leaf block (lbp) which is either the first
1626  * one with this hash value, or if there are none, the insert point
1627  * for that hash value.
1628  */
1629 int						/* index value */
xfs_dir2_leaf_search_hash(xfs_da_args_t * args,xfs_dabuf_t * lbp)1630 xfs_dir2_leaf_search_hash(
1631 	xfs_da_args_t		*args,		/* operation arguments */
1632 	xfs_dabuf_t		*lbp)		/* leaf buffer */
1633 {
1634 	xfs_dahash_t		hash=0;		/* hash from this entry */
1635 	xfs_dahash_t		hashwant;	/* hash value looking for */
1636 	int			high;		/* high leaf index */
1637 	int			low;		/* low leaf index */
1638 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1639 	xfs_dir2_leaf_entry_t	*lep;		/* leaf entry */
1640 	int			mid=0;		/* current leaf index */
1641 
1642 	leaf = lbp->data;
1643 #ifndef __KERNEL__
1644 	if (!leaf->hdr.count)
1645 		return 0;
1646 #endif
1647 	/*
1648 	 * Note, the table cannot be empty, so we have to go through the loop.
1649 	 * Binary search the leaf entries looking for our hash value.
1650 	 */
1651 	for (lep = leaf->ents, low = 0, high = be16_to_cpu(leaf->hdr.count) - 1,
1652 		hashwant = args->hashval;
1653 	     low <= high; ) {
1654 		mid = (low + high) >> 1;
1655 		if ((hash = be32_to_cpu(lep[mid].hashval)) == hashwant)
1656 			break;
1657 		if (hash < hashwant)
1658 			low = mid + 1;
1659 		else
1660 			high = mid - 1;
1661 	}
1662 	/*
1663 	 * Found one, back up through all the equal hash values.
1664 	 */
1665 	if (hash == hashwant) {
1666 		while (mid > 0 && be32_to_cpu(lep[mid - 1].hashval) == hashwant) {
1667 			mid--;
1668 		}
1669 	}
1670 	/*
1671 	 * Need to point to an entry higher than ours.
1672 	 */
1673 	else if (hash < hashwant)
1674 		mid++;
1675 	return mid;
1676 }
1677 
1678 /*
1679  * Trim off a trailing data block.  We know it's empty since the leaf
1680  * freespace table says so.
1681  */
1682 int						/* error */
xfs_dir2_leaf_trim_data(xfs_da_args_t * args,xfs_dabuf_t * lbp,xfs_dir2_db_t db)1683 xfs_dir2_leaf_trim_data(
1684 	xfs_da_args_t		*args,		/* operation arguments */
1685 	xfs_dabuf_t		*lbp,		/* leaf buffer */
1686 	xfs_dir2_db_t		db)		/* data block number */
1687 {
1688 	__be16			*bestsp;	/* leaf bests table */
1689 #ifdef DEBUG
1690 	xfs_dir2_data_t		*data;		/* data block structure */
1691 #endif
1692 	xfs_dabuf_t		*dbp;		/* data block buffer */
1693 	xfs_inode_t		*dp;		/* incore directory inode */
1694 	int			error;		/* error return value */
1695 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1696 	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */
1697 	xfs_mount_t		*mp;		/* filesystem mount point */
1698 	xfs_trans_t		*tp;		/* transaction pointer */
1699 
1700 	dp = args->dp;
1701 	mp = dp->i_mount;
1702 	tp = args->trans;
1703 	/*
1704 	 * Read the offending data block.  We need its buffer.
1705 	 */
1706 	if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp,
1707 			XFS_DATA_FORK))) {
1708 		return error;
1709 	}
1710 #ifdef DEBUG
1711 	data = dbp->data;
1712 	ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
1713 #endif
1714 	/* this seems to be an error
1715 	 * data is only valid if DEBUG is defined?
1716 	 * RMC 09/08/1999
1717 	 */
1718 
1719 	leaf = lbp->data;
1720 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1721 	ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
1722 	       mp->m_dirblksize - (uint)sizeof(data->hdr));
1723 	ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
1724 	/*
1725 	 * Get rid of the data block.
1726 	 */
1727 	if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1728 		ASSERT(error != ENOSPC);
1729 		xfs_da_brelse(tp, dbp);
1730 		return error;
1731 	}
1732 	/*
1733 	 * Eliminate the last bests entry from the table.
1734 	 */
1735 	bestsp = xfs_dir2_leaf_bests_p(ltp);
1736 	be32_add_cpu(&ltp->bestcount, -1);
1737 	memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp));
1738 	xfs_dir2_leaf_log_tail(tp, lbp);
1739 	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1740 	return 0;
1741 }
1742 
1743 /*
1744  * Convert node form directory to leaf form directory.
1745  * The root of the node form dir needs to already be a LEAFN block.
1746  * Just return if we can't do anything.
1747  */
1748 int						/* error */
xfs_dir2_node_to_leaf(xfs_da_state_t * state)1749 xfs_dir2_node_to_leaf(
1750 	xfs_da_state_t		*state)		/* directory operation state */
1751 {
1752 	xfs_da_args_t		*args;		/* operation arguments */
1753 	xfs_inode_t		*dp;		/* incore directory inode */
1754 	int			error;		/* error return code */
1755 	xfs_dabuf_t		*fbp;		/* buffer for freespace block */
1756 	xfs_fileoff_t		fo;		/* freespace file offset */
1757 	xfs_dir2_free_t		*free;		/* freespace structure */
1758 	xfs_dabuf_t		*lbp;		/* buffer for leaf block */
1759 	xfs_dir2_leaf_tail_t	*ltp;		/* tail of leaf structure */
1760 	xfs_dir2_leaf_t		*leaf;		/* leaf structure */
1761 	xfs_mount_t		*mp;		/* filesystem mount point */
1762 	int			rval;		/* successful free trim? */
1763 	xfs_trans_t		*tp;		/* transaction pointer */
1764 
1765 	/*
1766 	 * There's more than a leaf level in the btree, so there must
1767 	 * be multiple leafn blocks.  Give up.
1768 	 */
1769 	if (state->path.active > 1)
1770 		return 0;
1771 	args = state->args;
1772 
1773 	trace_xfs_dir2_node_to_leaf(args);
1774 
1775 	mp = state->mp;
1776 	dp = args->dp;
1777 	tp = args->trans;
1778 	/*
1779 	 * Get the last offset in the file.
1780 	 */
1781 	if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) {
1782 		return error;
1783 	}
1784 	fo -= mp->m_dirblkfsbs;
1785 	/*
1786 	 * If there are freespace blocks other than the first one,
1787 	 * take this opportunity to remove trailing empty freespace blocks
1788 	 * that may have been left behind during no-space-reservation
1789 	 * operations.
1790 	 */
1791 	while (fo > mp->m_dirfreeblk) {
1792 		if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {
1793 			return error;
1794 		}
1795 		if (rval)
1796 			fo -= mp->m_dirblkfsbs;
1797 		else
1798 			return 0;
1799 	}
1800 	/*
1801 	 * Now find the block just before the freespace block.
1802 	 */
1803 	if ((error = xfs_bmap_last_before(tp, dp, &fo, XFS_DATA_FORK))) {
1804 		return error;
1805 	}
1806 	/*
1807 	 * If it's not the single leaf block, give up.
1808 	 */
1809 	if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
1810 		return 0;
1811 	lbp = state->path.blk[0].bp;
1812 	leaf = lbp->data;
1813 	ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
1814 	/*
1815 	 * Read the freespace block.
1816 	 */
1817 	if ((error = xfs_da_read_buf(tp, dp, mp->m_dirfreeblk, -1, &fbp,
1818 			XFS_DATA_FORK))) {
1819 		return error;
1820 	}
1821 	free = fbp->data;
1822 	ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
1823 	ASSERT(!free->hdr.firstdb);
1824 	/*
1825 	 * Now see if the leafn and free data will fit in a leaf1.
1826 	 * If not, release the buffer and give up.
1827 	 */
1828 	if ((uint)sizeof(leaf->hdr) +
1829 	    (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)) * (uint)sizeof(leaf->ents[0]) +
1830 	    be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) +
1831 	    (uint)sizeof(leaf->tail) >
1832 	    mp->m_dirblksize) {
1833 		xfs_da_brelse(tp, fbp);
1834 		return 0;
1835 	}
1836 	/*
1837 	 * If the leaf has any stale entries in it, compress them out.
1838 	 * The compact routine will log the header.
1839 	 */
1840 	if (be16_to_cpu(leaf->hdr.stale))
1841 		xfs_dir2_leaf_compact(args, lbp);
1842 	else
1843 		xfs_dir2_leaf_log_header(tp, lbp);
1844 	leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAF1_MAGIC);
1845 	/*
1846 	 * Set up the leaf tail from the freespace block.
1847 	 */
1848 	ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1849 	ltp->bestcount = free->hdr.nvalid;
1850 	/*
1851 	 * Set up the leaf bests table.
1852 	 */
1853 	memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
1854 		be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0]));
1855 	xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1856 	xfs_dir2_leaf_log_tail(tp, lbp);
1857 	xfs_dir2_leaf_check(dp, lbp);
1858 	/*
1859 	 * Get rid of the freespace block.
1860 	 */
1861 	error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp);
1862 	if (error) {
1863 		/*
1864 		 * This can't fail here because it can only happen when
1865 		 * punching out the middle of an extent, and this is an
1866 		 * isolated block.
1867 		 */
1868 		ASSERT(error != ENOSPC);
1869 		return error;
1870 	}
1871 	fbp = NULL;
1872 	/*
1873 	 * Now see if we can convert the single-leaf directory
1874 	 * down to a block form directory.
1875 	 * This routine always kills the dabuf for the leaf, so
1876 	 * eliminate it from the path.
1877 	 */
1878 	error = xfs_dir2_leaf_to_block(args, lbp, NULL);
1879 	state->path.blk[0].bp = NULL;
1880 	return error;
1881 }
1882