1 /*
2  * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32 
33 #include "xfs.h"
34 
35 #include "xfs_macros.h"
36 #include "xfs_types.h"
37 #include "xfs_inum.h"
38 #include "xfs_log.h"
39 #include "xfs_trans.h"
40 #include "xfs_sb.h"
41 #include "xfs_ag.h"
42 #include "xfs_dir.h"
43 #include "xfs_dir2.h"
44 #include "xfs_dmapi.h"
45 #include "xfs_mount.h"
46 #include "xfs_alloc_btree.h"
47 #include "xfs_bmap_btree.h"
48 #include "xfs_ialloc_btree.h"
49 #include "xfs_btree.h"
50 #include "xfs_ialloc.h"
51 #include "xfs_attr_sf.h"
52 #include "xfs_dir_sf.h"
53 #include "xfs_dir2_sf.h"
54 #include "xfs_dinode.h"
55 #include "xfs_inode_item.h"
56 #include "xfs_inode.h"
57 #include "xfs_itable.h"
58 #include "xfs_extfree_item.h"
59 #include "xfs_alloc.h"
60 #include "xfs_bmap.h"
61 #include "xfs_rtalloc.h"
62 #include "xfs_error.h"
63 #include "xfs_da_btree.h"
64 #include "xfs_dir_leaf.h"
65 #include "xfs_bit.h"
66 #include "xfs_rw.h"
67 #include "xfs_quota.h"
68 #include "xfs_trans_space.h"
69 #include "xfs_buf_item.h"
70 
71 
72 #ifdef DEBUG
73 STATIC void
74 xfs_bmap_check_leaf_extents(xfs_btree_cur_t *cur, xfs_inode_t *ip, int whichfork);
75 #endif
76 
77 kmem_zone_t		*xfs_bmap_free_item_zone;
78 
79 /*
80  * Prototypes for internal bmap routines.
81  */
82 
83 
84 /*
85  * Called from xfs_bmap_add_attrfork to handle extents format files.
86  */
87 STATIC int					/* error */
88 xfs_bmap_add_attrfork_extents(
89 	xfs_trans_t		*tp,		/* transaction pointer */
90 	xfs_inode_t		*ip,		/* incore inode pointer */
91 	xfs_fsblock_t		*firstblock,	/* first block allocated */
92 	xfs_bmap_free_t		*flist,		/* blocks to free at commit */
93 	int			*flags);	/* inode logging flags */
94 
95 /*
96  * Called from xfs_bmap_add_attrfork to handle local format files.
97  */
98 STATIC int					/* error */
99 xfs_bmap_add_attrfork_local(
100 	xfs_trans_t		*tp,		/* transaction pointer */
101 	xfs_inode_t		*ip,		/* incore inode pointer */
102 	xfs_fsblock_t		*firstblock,	/* first block allocated */
103 	xfs_bmap_free_t		*flist,		/* blocks to free at commit */
104 	int			*flags);	/* inode logging flags */
105 
106 /*
107  * Called by xfs_bmapi to update extent list structure and the btree
108  * after allocating space (or doing a delayed allocation).
109  */
110 STATIC int				/* error */
111 xfs_bmap_add_extent(
112 	xfs_inode_t		*ip,	/* incore inode pointer */
113 	xfs_extnum_t		idx,	/* extent number to update/insert */
114 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
115 	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
116 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
117 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
118 	int			*logflagsp, /* inode logging flags */
119 	int			whichfork, /* data or attr fork */
120 	int			rsvd);	/* OK to allocate reserved blocks */
121 
122 /*
123  * Called by xfs_bmap_add_extent to handle cases converting a delayed
124  * allocation to a real allocation.
125  */
126 STATIC int				/* error */
127 xfs_bmap_add_extent_delay_real(
128 	xfs_inode_t		*ip,	/* incore inode pointer */
129 	xfs_extnum_t		idx,	/* extent number to update/insert */
130 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
131 	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
132 	xfs_filblks_t		*dnew,	/* new delayed-alloc indirect blocks */
133 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
134 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
135 	int			*logflagsp, /* inode logging flags */
136 	int			rsvd);	/* OK to allocate reserved blocks */
137 
138 /*
139  * Called by xfs_bmap_add_extent to handle cases converting a hole
140  * to a delayed allocation.
141  */
142 STATIC int				/* error */
143 xfs_bmap_add_extent_hole_delay(
144 	xfs_inode_t		*ip,	/* incore inode pointer */
145 	xfs_extnum_t		idx,	/* extent number to update/insert */
146 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
147 	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
148 	int			*logflagsp,/* inode logging flags */
149 	int			rsvd);	/* OK to allocate reserved blocks */
150 
151 /*
152  * Called by xfs_bmap_add_extent to handle cases converting a hole
153  * to a real allocation.
154  */
155 STATIC int				/* error */
156 xfs_bmap_add_extent_hole_real(
157 	xfs_inode_t		*ip,	/* incore inode pointer */
158 	xfs_extnum_t		idx,	/* extent number to update/insert */
159 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
160 	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
161 	int			*logflagsp, /* inode logging flags */
162 	int			whichfork); /* data or attr fork */
163 
164 /*
165  * Called by xfs_bmap_add_extent to handle cases converting an unwritten
166  * allocation to a real allocation or vice versa.
167  */
168 STATIC int				/* error */
169 xfs_bmap_add_extent_unwritten_real(
170 	xfs_inode_t		*ip,	/* incore inode pointer */
171 	xfs_extnum_t		idx,	/* extent number to update/insert */
172 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
173 	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
174 	int			*logflagsp); /* inode logging flags */
175 
176 /*
177  * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
178  * It figures out where to ask the underlying allocator to put the new extent.
179  */
180 STATIC int				/* error */
181 xfs_bmap_alloc(
182 	xfs_bmalloca_t		*ap);	/* bmap alloc argument struct */
183 
184 /*
185  * Transform a btree format file with only one leaf node, where the
186  * extents list will fit in the inode, into an extents format file.
187  * Since the extent list is already in-core, all we have to do is
188  * give up the space for the btree root and pitch the leaf block.
189  */
190 STATIC int				/* error */
191 xfs_bmap_btree_to_extents(
192 	xfs_trans_t		*tp,	/* transaction pointer */
193 	xfs_inode_t		*ip,	/* incore inode pointer */
194 	xfs_btree_cur_t		*cur,	/* btree cursor */
195 	int			*logflagsp, /* inode logging flags */
196 	int			whichfork); /* data or attr fork */
197 
198 #ifdef DEBUG
199 /*
200  * Check that the extents list for the inode ip is in the right order.
201  */
202 STATIC void
203 xfs_bmap_check_extents(
204 	xfs_inode_t		*ip,		/* incore inode pointer */
205 	int			whichfork);	/* data or attr fork */
206 #endif
207 
208 /*
209  * Called by xfs_bmapi to update extent list structure and the btree
210  * after removing space (or undoing a delayed allocation).
211  */
212 STATIC int				/* error */
213 xfs_bmap_del_extent(
214 	xfs_inode_t		*ip,	/* incore inode pointer */
215 	xfs_trans_t		*tp,	/* current trans pointer */
216 	xfs_extnum_t		idx,	/* extent number to update/insert */
217 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
218 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
219 	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
220 	int			*logflagsp,/* inode logging flags */
221 	int			whichfork, /* data or attr fork */
222 	int			rsvd);	 /* OK to allocate reserved blocks */
223 
224 /*
225  * Remove the entry "free" from the free item list.  Prev points to the
226  * previous entry, unless "free" is the head of the list.
227  */
228 STATIC void
229 xfs_bmap_del_free(
230 	xfs_bmap_free_t		*flist,	/* free item list header */
231 	xfs_bmap_free_item_t	*prev,	/* previous item on list, if any */
232 	xfs_bmap_free_item_t	*free);	/* list item to be freed */
233 
234 /*
235  * Remove count entries from the extents array for inode "ip", starting
236  * at index "idx".  Copies the remaining items down over the deleted ones,
237  * and gives back the excess memory.
238  */
239 STATIC void
240 xfs_bmap_delete_exlist(
241 	xfs_inode_t	*ip,		/* incode inode pointer */
242 	xfs_extnum_t	idx,		/* starting delete index */
243 	xfs_extnum_t	count,		/* count of items to delete */
244 	int		whichfork);	/* data or attr fork */
245 
246 /*
247  * Convert an extents-format file into a btree-format file.
248  * The new file will have a root block (in the inode) and a single child block.
249  */
250 STATIC int					/* error */
251 xfs_bmap_extents_to_btree(
252 	xfs_trans_t		*tp,		/* transaction pointer */
253 	xfs_inode_t		*ip,		/* incore inode pointer */
254 	xfs_fsblock_t		*firstblock,	/* first-block-allocated */
255 	xfs_bmap_free_t		*flist,		/* blocks freed in xaction */
256 	xfs_btree_cur_t		**curp,		/* cursor returned to caller */
257 	int			wasdel,		/* converting a delayed alloc */
258 	int			*logflagsp,	/* inode logging flags */
259 	int			whichfork);	/* data or attr fork */
260 
261 /*
262  * Insert new item(s) in the extent list for inode "ip".
263  * Count new items are inserted at offset idx.
264  */
265 STATIC void
266 xfs_bmap_insert_exlist(
267 	xfs_inode_t	*ip,		/* incore inode pointer */
268 	xfs_extnum_t	idx,		/* starting index of new items */
269 	xfs_extnum_t	count,		/* number of inserted items */
270 	xfs_bmbt_irec_t	*new,		/* items to insert */
271 	int		whichfork);	/* data or attr fork */
272 
273 /*
274  * Convert a local file to an extents file.
275  * This code is sort of bogus, since the file data needs to get
276  * logged so it won't be lost.  The bmap-level manipulations are ok, though.
277  */
278 STATIC int				/* error */
279 xfs_bmap_local_to_extents(
280 	xfs_trans_t	*tp,		/* transaction pointer */
281 	xfs_inode_t	*ip,		/* incore inode pointer */
282 	xfs_fsblock_t	*firstblock,	/* first block allocated in xaction */
283 	xfs_extlen_t	total,		/* total blocks needed by transaction */
284 	int		*logflagsp,	/* inode logging flags */
285 	int		whichfork);	/* data or attr fork */
286 
287 /*
288  * Search the extents list for the inode, for the extent containing bno.
289  * If bno lies in a hole, point to the next entry.  If bno lies past eof,
290  * *eofp will be set, and *prevp will contain the last entry (null if none).
291  * Else, *lastxp will be set to the index of the found
292  * entry; *gotp will contain the entry.
293  */
294 STATIC xfs_bmbt_rec_t *			/* pointer to found extent entry */
295 xfs_bmap_search_extents(
296 	xfs_inode_t	*ip,		/* incore inode pointer */
297 	xfs_fileoff_t	bno,		/* block number searched for */
298 	int		whichfork,	/* data or attr fork */
299 	int		*eofp,		/* out: end of file found */
300 	xfs_extnum_t	*lastxp,	/* out: last extent index */
301 	xfs_bmbt_irec_t	*gotp,		/* out: extent entry found */
302 	xfs_bmbt_irec_t	*prevp);	/* out: previous extent entry found */
303 
304 #ifdef XFS_BMAP_TRACE
305 /*
306  * Add a bmap trace buffer entry.  Base routine for the others.
307  */
308 STATIC void
309 xfs_bmap_trace_addentry(
310 	int		opcode,		/* operation */
311 	char		*fname,		/* function name */
312 	char		*desc,		/* operation description */
313 	xfs_inode_t	*ip,		/* incore inode pointer */
314 	xfs_extnum_t	idx,		/* index of entry(ies) */
315 	xfs_extnum_t	cnt,		/* count of entries, 1 or 2 */
316 	xfs_bmbt_rec_t	*r1,		/* first record */
317 	xfs_bmbt_rec_t	*r2,		/* second record or null */
318 	int		whichfork);	/* data or attr fork */
319 
320 /*
321  * Add bmap trace entry prior to a call to xfs_bmap_delete_exlist.
322  */
323 STATIC void
324 xfs_bmap_trace_delete(
325 	char		*fname,		/* function name */
326 	char		*desc,		/* operation description */
327 	xfs_inode_t	*ip,		/* incore inode pointer */
328 	xfs_extnum_t	idx,		/* index of entry(entries) deleted */
329 	xfs_extnum_t	cnt,		/* count of entries deleted, 1 or 2 */
330 	int		whichfork);	/* data or attr fork */
331 
332 /*
333  * Add bmap trace entry prior to a call to xfs_bmap_insert_exlist, or
334  * reading in the extents list from the disk (in the btree).
335  */
336 STATIC void
337 xfs_bmap_trace_insert(
338 	char		*fname,		/* function name */
339 	char		*desc,		/* operation description */
340 	xfs_inode_t	*ip,		/* incore inode pointer */
341 	xfs_extnum_t	idx,		/* index of entry(entries) inserted */
342 	xfs_extnum_t	cnt,		/* count of entries inserted, 1 or 2 */
343 	xfs_bmbt_irec_t	*r1,		/* inserted record 1 */
344 	xfs_bmbt_irec_t	*r2,		/* inserted record 2 or null */
345 	int		whichfork);	/* data or attr fork */
346 
347 /*
348  * Add bmap trace entry after updating an extent list entry in place.
349  */
350 STATIC void
351 xfs_bmap_trace_post_update(
352 	char		*fname,		/* function name */
353 	char		*desc,		/* operation description */
354 	xfs_inode_t	*ip,		/* incore inode pointer */
355 	xfs_extnum_t	idx,		/* index of entry updated */
356 	int		whichfork);	/* data or attr fork */
357 
358 /*
359  * Add bmap trace entry prior to updating an extent list entry in place.
360  */
361 STATIC void
362 xfs_bmap_trace_pre_update(
363 	char		*fname,		/* function name */
364 	char		*desc,		/* operation description */
365 	xfs_inode_t	*ip,		/* incore inode pointer */
366 	xfs_extnum_t	idx,		/* index of entry to be updated */
367 	int		whichfork);	/* data or attr fork */
368 
369 #else
370 #define	xfs_bmap_trace_delete(f,d,ip,i,c,w)
371 #define	xfs_bmap_trace_insert(f,d,ip,i,c,r1,r2,w)
372 #define	xfs_bmap_trace_post_update(f,d,ip,i,w)
373 #define	xfs_bmap_trace_pre_update(f,d,ip,i,w)
374 #endif	/* XFS_BMAP_TRACE */
375 
376 /*
377  * Compute the worst-case number of indirect blocks that will be used
378  * for ip's delayed extent of length "len".
379  */
380 STATIC xfs_filblks_t
381 xfs_bmap_worst_indlen(
382 	xfs_inode_t		*ip,	/* incore inode pointer */
383 	xfs_filblks_t		len);	/* delayed extent length */
384 
385 #ifdef DEBUG
386 /*
387  * Perform various validation checks on the values being returned
388  * from xfs_bmapi().
389  */
390 STATIC void
391 xfs_bmap_validate_ret(
392 	xfs_fileoff_t		bno,
393 	xfs_filblks_t		len,
394 	int			flags,
395 	xfs_bmbt_irec_t		*mval,
396 	int			nmap,
397 	int			ret_nmap);
398 #else
399 #define	xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
400 #endif /* DEBUG */
401 
402 #if defined(XFS_RW_TRACE)
403 STATIC void
404 xfs_bunmap_trace(
405 	xfs_inode_t		*ip,
406 	xfs_fileoff_t		bno,
407 	xfs_filblks_t		len,
408 	int			flags,
409 	inst_t			*ra);
410 #else
411 #define	xfs_bunmap_trace(ip, bno, len, flags, ra)
412 #endif	/* XFS_RW_TRACE */
413 
414 STATIC int
415 xfs_bmap_count_tree(
416 	xfs_mount_t     *mp,
417 	xfs_trans_t     *tp,
418 	xfs_fsblock_t   blockno,
419 	int             levelin,
420 	int		*count);
421 
422 STATIC int
423 xfs_bmap_count_leaves(
424 	xfs_bmbt_rec_t		*frp,
425 	int			numrecs,
426 	int			*count);
427 
428 /*
429  * Bmap internal routines.
430  */
431 
432 /*
433  * Called from xfs_bmap_add_attrfork to handle btree format files.
434  */
435 STATIC int					/* error */
xfs_bmap_add_attrfork_btree(xfs_trans_t * tp,xfs_inode_t * ip,xfs_fsblock_t * firstblock,xfs_bmap_free_t * flist,int * flags)436 xfs_bmap_add_attrfork_btree(
437 	xfs_trans_t		*tp,		/* transaction pointer */
438 	xfs_inode_t		*ip,		/* incore inode pointer */
439 	xfs_fsblock_t		*firstblock,	/* first block allocated */
440 	xfs_bmap_free_t		*flist,		/* blocks to free at commit */
441 	int			*flags)		/* inode logging flags */
442 {
443 	xfs_btree_cur_t		*cur;		/* btree cursor */
444 	int			error;		/* error return value */
445 	xfs_mount_t		*mp;		/* file system mount struct */
446 	int			stat;		/* newroot status */
447 
448 	mp = ip->i_mount;
449 	if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
450 		*flags |= XFS_ILOG_DBROOT;
451 	else {
452 		cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip,
453 			XFS_DATA_FORK);
454 		cur->bc_private.b.flist = flist;
455 		cur->bc_private.b.firstblock = *firstblock;
456 		if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
457 			goto error0;
458 		ASSERT(stat == 1);	/* must be at least one entry */
459 		if ((error = xfs_bmbt_newroot(cur, flags, &stat)))
460 			goto error0;
461 		if (stat == 0) {
462 			xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
463 			return XFS_ERROR(ENOSPC);
464 		}
465 		*firstblock = cur->bc_private.b.firstblock;
466 		cur->bc_private.b.allocated = 0;
467 		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
468 	}
469 	return 0;
470 error0:
471 	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
472 	return error;
473 }
474 
475 /*
476  * Called from xfs_bmap_add_attrfork to handle extents format files.
477  */
478 STATIC int					/* error */
xfs_bmap_add_attrfork_extents(xfs_trans_t * tp,xfs_inode_t * ip,xfs_fsblock_t * firstblock,xfs_bmap_free_t * flist,int * flags)479 xfs_bmap_add_attrfork_extents(
480 	xfs_trans_t		*tp,		/* transaction pointer */
481 	xfs_inode_t		*ip,		/* incore inode pointer */
482 	xfs_fsblock_t		*firstblock,	/* first block allocated */
483 	xfs_bmap_free_t		*flist,		/* blocks to free at commit */
484 	int			*flags)		/* inode logging flags */
485 {
486 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
487 	int			error;		/* error return value */
488 
489 	if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
490 		return 0;
491 	cur = NULL;
492 	error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist, &cur, 0,
493 		flags, XFS_DATA_FORK);
494 	if (cur) {
495 		cur->bc_private.b.allocated = 0;
496 		xfs_btree_del_cursor(cur,
497 			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
498 	}
499 	return error;
500 }
501 
502 /*
503  * Called from xfs_bmap_add_attrfork to handle local format files.
504  */
505 STATIC int					/* error */
xfs_bmap_add_attrfork_local(xfs_trans_t * tp,xfs_inode_t * ip,xfs_fsblock_t * firstblock,xfs_bmap_free_t * flist,int * flags)506 xfs_bmap_add_attrfork_local(
507 	xfs_trans_t		*tp,		/* transaction pointer */
508 	xfs_inode_t		*ip,		/* incore inode pointer */
509 	xfs_fsblock_t		*firstblock,	/* first block allocated */
510 	xfs_bmap_free_t		*flist,		/* blocks to free at commit */
511 	int			*flags)		/* inode logging flags */
512 {
513 	xfs_da_args_t		dargs;		/* args for dir/attr code */
514 	int			error;		/* error return value */
515 	xfs_mount_t		*mp;		/* mount structure pointer */
516 
517 	if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
518 		return 0;
519 	if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
520 		mp = ip->i_mount;
521 		memset(&dargs, 0, sizeof(dargs));
522 		dargs.dp = ip;
523 		dargs.firstblock = firstblock;
524 		dargs.flist = flist;
525 		dargs.total = mp->m_dirblkfsbs;
526 		dargs.whichfork = XFS_DATA_FORK;
527 		dargs.trans = tp;
528 		error = XFS_DIR_SHORTFORM_TO_SINGLE(mp, &dargs);
529 	} else
530 		error = xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags,
531 			XFS_DATA_FORK);
532 	return error;
533 }
534 
535 /*
536  * Called by xfs_bmapi to update extent list structure and the btree
537  * after allocating space (or doing a delayed allocation).
538  */
539 STATIC int				/* error */
xfs_bmap_add_extent(xfs_inode_t * ip,xfs_extnum_t idx,xfs_btree_cur_t ** curp,xfs_bmbt_irec_t * new,xfs_fsblock_t * first,xfs_bmap_free_t * flist,int * logflagsp,int whichfork,int rsvd)540 xfs_bmap_add_extent(
541 	xfs_inode_t		*ip,	/* incore inode pointer */
542 	xfs_extnum_t		idx,	/* extent number to update/insert */
543 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
544 	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
545 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
546 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
547 	int			*logflagsp, /* inode logging flags */
548 	int			whichfork, /* data or attr fork */
549 	int			rsvd)	/* OK to use reserved data blocks */
550 {
551 	xfs_btree_cur_t		*cur;	/* btree cursor or null */
552 	xfs_filblks_t		da_new; /* new count del alloc blocks used */
553 	xfs_filblks_t		da_old; /* old count del alloc blocks used */
554 	int			error;	/* error return value */
555 #ifdef XFS_BMAP_TRACE
556 	static char		fname[] = "xfs_bmap_add_extent";
557 #endif
558 	xfs_ifork_t		*ifp;	/* inode fork ptr */
559 	int			logflags; /* returned value */
560 	xfs_extnum_t		nextents; /* number of extents in file now */
561 
562 	XFS_STATS_INC(xs_add_exlist);
563 	cur = *curp;
564 	ifp = XFS_IFORK_PTR(ip, whichfork);
565 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
566 	ASSERT(idx <= nextents);
567 	da_old = da_new = 0;
568 	error = 0;
569 	/*
570 	 * This is the first extent added to a new/empty file.
571 	 * Special case this one, so other routines get to assume there are
572 	 * already extents in the list.
573 	 */
574 	if (nextents == 0) {
575 		xfs_bmap_trace_insert(fname, "insert empty", ip, 0, 1, new,
576 			NULL, whichfork);
577 		xfs_bmap_insert_exlist(ip, 0, 1, new, whichfork);
578 		ASSERT(cur == NULL);
579 		ifp->if_lastex = 0;
580 		if (!ISNULLSTARTBLOCK(new->br_startblock)) {
581 			XFS_IFORK_NEXT_SET(ip, whichfork, 1);
582 			logflags = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
583 		} else
584 			logflags = 0;
585 	}
586 	/*
587 	 * Any kind of new delayed allocation goes here.
588 	 */
589 	else if (ISNULLSTARTBLOCK(new->br_startblock)) {
590 		if (cur)
591 			ASSERT((cur->bc_private.b.flags &
592 				XFS_BTCUR_BPRV_WASDEL) == 0);
593 		if ((error = xfs_bmap_add_extent_hole_delay(ip, idx, cur, new,
594 				&logflags, rsvd)))
595 			goto done;
596 	}
597 	/*
598 	 * Real allocation off the end of the file.
599 	 */
600 	else if (idx == nextents) {
601 		if (cur)
602 			ASSERT((cur->bc_private.b.flags &
603 				XFS_BTCUR_BPRV_WASDEL) == 0);
604 		if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new,
605 				&logflags, whichfork)))
606 			goto done;
607 	} else {
608 		xfs_bmbt_irec_t	prev;	/* old extent at offset idx */
609 
610 		/*
611 		 * Get the record referred to by idx.
612 		 */
613 		xfs_bmbt_get_all(&ifp->if_u1.if_extents[idx], &prev);
614 		/*
615 		 * If it's a real allocation record, and the new allocation ends
616 		 * after the start of the referred to record, then we're filling
617 		 * in a delayed or unwritten allocation with a real one, or
618 		 * converting real back to unwritten.
619 		 */
620 		if (!ISNULLSTARTBLOCK(new->br_startblock) &&
621 		    new->br_startoff + new->br_blockcount > prev.br_startoff) {
622 			if (prev.br_state != XFS_EXT_UNWRITTEN &&
623 			    ISNULLSTARTBLOCK(prev.br_startblock)) {
624 				da_old = STARTBLOCKVAL(prev.br_startblock);
625 				if (cur)
626 					ASSERT(cur->bc_private.b.flags &
627 						XFS_BTCUR_BPRV_WASDEL);
628 				if ((error = xfs_bmap_add_extent_delay_real(ip,
629 					idx, &cur, new, &da_new, first, flist,
630 					&logflags, rsvd)))
631 					goto done;
632 			} else if (new->br_state == XFS_EXT_NORM) {
633 				ASSERT(new->br_state == XFS_EXT_NORM);
634 				if ((error = xfs_bmap_add_extent_unwritten_real(
635 					ip, idx, &cur, new, &logflags)))
636 					goto done;
637 			} else {
638 				ASSERT(new->br_state == XFS_EXT_UNWRITTEN);
639 				if ((error = xfs_bmap_add_extent_unwritten_real(
640 					ip, idx, &cur, new, &logflags)))
641 					goto done;
642 			}
643 			ASSERT(*curp == cur || *curp == NULL);
644 		}
645 		/*
646 		 * Otherwise we're filling in a hole with an allocation.
647 		 */
648 		else {
649 			if (cur)
650 				ASSERT((cur->bc_private.b.flags &
651 					XFS_BTCUR_BPRV_WASDEL) == 0);
652 			if ((error = xfs_bmap_add_extent_hole_real(ip, idx, cur,
653 					new, &logflags, whichfork)))
654 				goto done;
655 		}
656 	}
657 
658 	ASSERT(*curp == cur || *curp == NULL);
659 	/*
660 	 * Convert to a btree if necessary.
661 	 */
662 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
663 	    XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) {
664 		int	tmp_logflags;	/* partial log flag return val */
665 
666 		ASSERT(cur == NULL);
667 		error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first,
668 			flist, &cur, da_old > 0, &tmp_logflags, whichfork);
669 		logflags |= tmp_logflags;
670 		if (error)
671 			goto done;
672 	}
673 	/*
674 	 * Adjust for changes in reserved delayed indirect blocks.
675 	 * Nothing to do for disk quotas here.
676 	 */
677 	if (da_old || da_new) {
678 		xfs_filblks_t	nblks;
679 
680 		nblks = da_new;
681 		if (cur)
682 			nblks += cur->bc_private.b.allocated;
683 		ASSERT(nblks <= da_old);
684 		if (nblks < da_old)
685 			xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
686 				(int)(da_old - nblks), rsvd);
687 	}
688 	/*
689 	 * Clear out the allocated field, done with it now in any case.
690 	 */
691 	if (cur) {
692 		cur->bc_private.b.allocated = 0;
693 		*curp = cur;
694 	}
695 done:
696 #ifdef DEBUG
697 	if (!error)
698 		xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
699 #endif
700 	*logflagsp = logflags;
701 	return error;
702 }
703 
704 /*
705  * Called by xfs_bmap_add_extent to handle cases converting a delayed
706  * allocation to a real allocation.
707  */
708 STATIC int				/* error */
xfs_bmap_add_extent_delay_real(xfs_inode_t * ip,xfs_extnum_t idx,xfs_btree_cur_t ** curp,xfs_bmbt_irec_t * new,xfs_filblks_t * dnew,xfs_fsblock_t * first,xfs_bmap_free_t * flist,int * logflagsp,int rsvd)709 xfs_bmap_add_extent_delay_real(
710 	xfs_inode_t		*ip,	/* incore inode pointer */
711 	xfs_extnum_t		idx,	/* extent number to update/insert */
712 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
713 	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
714 	xfs_filblks_t		*dnew,	/* new delayed-alloc indirect blocks */
715 	xfs_fsblock_t		*first,	/* pointer to firstblock variable */
716 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
717 	int			*logflagsp, /* inode logging flags */
718 	int			rsvd)	/* OK to use reserved data block allocation */
719 {
720 	xfs_bmbt_rec_t		*base;	/* base of extent entry list */
721 	xfs_btree_cur_t		*cur;	/* btree cursor */
722 	int			diff;	/* temp value */
723 	xfs_bmbt_rec_t		*ep;	/* extent entry for idx */
724 	int			error;	/* error return value */
725 #ifdef XFS_BMAP_TRACE
726 	static char		fname[] = "xfs_bmap_add_extent_delay_real";
727 #endif
728 	int			i;	/* temp state */
729 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
730 	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
731 					/* left is 0, right is 1, prev is 2 */
732 	int			rval=0;	/* return value (logging flags) */
733 	int			state = 0;/* state bits, accessed thru macros */
734 	xfs_filblks_t		temp;	/* value for dnew calculations */
735 	xfs_filblks_t		temp2;	/* value for dnew calculations */
736 	int			tmp_rval;	/* partial logging flags */
737 	enum {				/* bit number definitions for state */
738 		LEFT_CONTIG,	RIGHT_CONTIG,
739 		LEFT_FILLING,	RIGHT_FILLING,
740 		LEFT_DELAY,	RIGHT_DELAY,
741 		LEFT_VALID,	RIGHT_VALID
742 	};
743 
744 #define	LEFT		r[0]
745 #define	RIGHT		r[1]
746 #define	PREV		r[2]
747 #define	MASK(b)		(1 << (b))
748 #define	MASK2(a,b)	(MASK(a) | MASK(b))
749 #define	MASK3(a,b,c)	(MASK2(a,b) | MASK(c))
750 #define	MASK4(a,b,c,d)	(MASK3(a,b,c) | MASK(d))
751 #define	STATE_SET(b,v)	((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
752 #define	STATE_TEST(b)	(state & MASK(b))
753 #define	STATE_SET_TEST(b,v)	((v) ? ((state |= MASK(b)), 1) : \
754 				       ((state &= ~MASK(b)), 0))
755 #define	SWITCH_STATE		\
756 	(state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG))
757 
758 	/*
759 	 * Set up a bunch of variables to make the tests simpler.
760 	 */
761 	cur = *curp;
762 	base = ip->i_df.if_u1.if_extents;
763 	ep = &base[idx];
764 	xfs_bmbt_get_all(ep, &PREV);
765 	new_endoff = new->br_startoff + new->br_blockcount;
766 	ASSERT(PREV.br_startoff <= new->br_startoff);
767 	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
768 	/*
769 	 * Set flags determining what part of the previous delayed allocation
770 	 * extent is being replaced by a real allocation.
771 	 */
772 	STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff);
773 	STATE_SET(RIGHT_FILLING,
774 		PREV.br_startoff + PREV.br_blockcount == new_endoff);
775 	/*
776 	 * Check and set flags if this segment has a left neighbor.
777 	 * Don't set contiguous if the combined extent would be too large.
778 	 */
779 	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
780 		xfs_bmbt_get_all(ep - 1, &LEFT);
781 		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(LEFT.br_startblock));
782 	}
783 	STATE_SET(LEFT_CONTIG,
784 		STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) &&
785 		LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
786 		LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
787 		LEFT.br_state == new->br_state &&
788 		LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN);
789 	/*
790 	 * Check and set flags if this segment has a right neighbor.
791 	 * Don't set contiguous if the combined extent would be too large.
792 	 * Also check for all-three-contiguous being too large.
793 	 */
794 	if (STATE_SET_TEST(RIGHT_VALID,
795 			idx <
796 			ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
797 		xfs_bmbt_get_all(ep + 1, &RIGHT);
798 		STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(RIGHT.br_startblock));
799 	}
800 	STATE_SET(RIGHT_CONTIG,
801 		STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) &&
802 		new_endoff == RIGHT.br_startoff &&
803 		new->br_startblock + new->br_blockcount ==
804 		    RIGHT.br_startblock &&
805 		new->br_state == RIGHT.br_state &&
806 		new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
807 		((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) !=
808 		  MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) ||
809 		 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
810 		     <= MAXEXTLEN));
811 	error = 0;
812 	/*
813 	 * Switch out based on the FILLING and CONTIG state bits.
814 	 */
815 	switch (SWITCH_STATE) {
816 
817 	case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
818 		/*
819 		 * Filling in all of a previously delayed allocation extent.
820 		 * The left and right neighbors are both contiguous with new.
821 		 */
822 		xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
823 			XFS_DATA_FORK);
824 		xfs_bmbt_set_blockcount(ep - 1,
825 			LEFT.br_blockcount + PREV.br_blockcount +
826 			RIGHT.br_blockcount);
827 		xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
828 			XFS_DATA_FORK);
829 		xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
830 			XFS_DATA_FORK);
831 		xfs_bmap_delete_exlist(ip, idx, 2, XFS_DATA_FORK);
832 		ip->i_df.if_lastex = idx - 1;
833 		ip->i_d.di_nextents--;
834 		if (cur == NULL)
835 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
836 		else {
837 			rval = XFS_ILOG_CORE;
838 			if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
839 					RIGHT.br_startblock,
840 					RIGHT.br_blockcount, &i)))
841 				goto done;
842 			ASSERT(i == 1);
843 			if ((error = xfs_bmbt_delete(cur, &i)))
844 				goto done;
845 			ASSERT(i == 1);
846 			if ((error = xfs_bmbt_decrement(cur, 0, &i)))
847 				goto done;
848 			ASSERT(i == 1);
849 			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
850 					LEFT.br_startblock,
851 					LEFT.br_blockcount +
852 					PREV.br_blockcount +
853 					RIGHT.br_blockcount, LEFT.br_state)))
854 				goto done;
855 		}
856 		*dnew = 0;
857 		break;
858 
859 	case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG):
860 		/*
861 		 * Filling in all of a previously delayed allocation extent.
862 		 * The left neighbor is contiguous, the right is not.
863 		 */
864 		xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
865 			XFS_DATA_FORK);
866 		xfs_bmbt_set_blockcount(ep - 1,
867 			LEFT.br_blockcount + PREV.br_blockcount);
868 		xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
869 			XFS_DATA_FORK);
870 		ip->i_df.if_lastex = idx - 1;
871 		xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
872 			XFS_DATA_FORK);
873 		xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
874 		if (cur == NULL)
875 			rval = XFS_ILOG_DEXT;
876 		else {
877 			rval = 0;
878 			if ((error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff,
879 					LEFT.br_startblock, LEFT.br_blockcount,
880 					&i)))
881 				goto done;
882 			ASSERT(i == 1);
883 			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
884 					LEFT.br_startblock,
885 					LEFT.br_blockcount +
886 					PREV.br_blockcount, LEFT.br_state)))
887 				goto done;
888 		}
889 		*dnew = 0;
890 		break;
891 
892 	case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG):
893 		/*
894 		 * Filling in all of a previously delayed allocation extent.
895 		 * The right neighbor is contiguous, the left is not.
896 		 */
897 		xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx,
898 			XFS_DATA_FORK);
899 		xfs_bmbt_set_startblock(ep, new->br_startblock);
900 		xfs_bmbt_set_blockcount(ep,
901 			PREV.br_blockcount + RIGHT.br_blockcount);
902 		xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx,
903 			XFS_DATA_FORK);
904 		ip->i_df.if_lastex = idx;
905 		xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
906 			XFS_DATA_FORK);
907 		xfs_bmap_delete_exlist(ip, idx + 1, 1, XFS_DATA_FORK);
908 		if (cur == NULL)
909 			rval = XFS_ILOG_DEXT;
910 		else {
911 			rval = 0;
912 			if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
913 					RIGHT.br_startblock,
914 					RIGHT.br_blockcount, &i)))
915 				goto done;
916 			ASSERT(i == 1);
917 			if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
918 					new->br_startblock,
919 					PREV.br_blockcount +
920 					RIGHT.br_blockcount, PREV.br_state)))
921 				goto done;
922 		}
923 		*dnew = 0;
924 		break;
925 
926 	case MASK2(LEFT_FILLING, RIGHT_FILLING):
927 		/*
928 		 * Filling in all of a previously delayed allocation extent.
929 		 * Neither the left nor right neighbors are contiguous with
930 		 * the new one.
931 		 */
932 		xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx,
933 			XFS_DATA_FORK);
934 		xfs_bmbt_set_startblock(ep, new->br_startblock);
935 		xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx,
936 			XFS_DATA_FORK);
937 		ip->i_df.if_lastex = idx;
938 		ip->i_d.di_nextents++;
939 		if (cur == NULL)
940 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
941 		else {
942 			rval = XFS_ILOG_CORE;
943 			if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
944 					new->br_startblock, new->br_blockcount,
945 					&i)))
946 				goto done;
947 			ASSERT(i == 0);
948 			cur->bc_rec.b.br_state = XFS_EXT_NORM;
949 			if ((error = xfs_bmbt_insert(cur, &i)))
950 				goto done;
951 			ASSERT(i == 1);
952 		}
953 		*dnew = 0;
954 		break;
955 
956 	case MASK2(LEFT_FILLING, LEFT_CONTIG):
957 		/*
958 		 * Filling in the first part of a previous delayed allocation.
959 		 * The left neighbor is contiguous.
960 		 */
961 		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
962 			XFS_DATA_FORK);
963 		xfs_bmbt_set_blockcount(ep - 1,
964 			LEFT.br_blockcount + new->br_blockcount);
965 		xfs_bmbt_set_startoff(ep,
966 			PREV.br_startoff + new->br_blockcount);
967 		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1,
968 			XFS_DATA_FORK);
969 		temp = PREV.br_blockcount - new->br_blockcount;
970 		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx,
971 			XFS_DATA_FORK);
972 		xfs_bmbt_set_blockcount(ep, temp);
973 		ip->i_df.if_lastex = idx - 1;
974 		if (cur == NULL)
975 			rval = XFS_ILOG_DEXT;
976 		else {
977 			rval = 0;
978 			if ((error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff,
979 					LEFT.br_startblock, LEFT.br_blockcount,
980 					&i)))
981 				goto done;
982 			ASSERT(i == 1);
983 			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
984 					LEFT.br_startblock,
985 					LEFT.br_blockcount +
986 					new->br_blockcount,
987 					LEFT.br_state)))
988 				goto done;
989 		}
990 		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
991 			STARTBLOCKVAL(PREV.br_startblock));
992 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
993 		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
994 			XFS_DATA_FORK);
995 		*dnew = temp;
996 		break;
997 
998 	case MASK(LEFT_FILLING):
999 		/*
1000 		 * Filling in the first part of a previous delayed allocation.
1001 		 * The left neighbor is not contiguous.
1002 		 */
1003 		xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK);
1004 		xfs_bmbt_set_startoff(ep, new_endoff);
1005 		temp = PREV.br_blockcount - new->br_blockcount;
1006 		xfs_bmbt_set_blockcount(ep, temp);
1007 		xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
1008 			XFS_DATA_FORK);
1009 		xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
1010 		ip->i_df.if_lastex = idx;
1011 		ip->i_d.di_nextents++;
1012 		if (cur == NULL)
1013 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1014 		else {
1015 			rval = XFS_ILOG_CORE;
1016 			if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
1017 					new->br_startblock, new->br_blockcount,
1018 					&i)))
1019 				goto done;
1020 			ASSERT(i == 0);
1021 			cur->bc_rec.b.br_state = XFS_EXT_NORM;
1022 			if ((error = xfs_bmbt_insert(cur, &i)))
1023 				goto done;
1024 			ASSERT(i == 1);
1025 		}
1026 		if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1027 		    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
1028 			error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
1029 					first, flist, &cur, 1, &tmp_rval,
1030 					XFS_DATA_FORK);
1031 			rval |= tmp_rval;
1032 			if (error)
1033 				goto done;
1034 		}
1035 		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
1036 			STARTBLOCKVAL(PREV.br_startblock) -
1037 			(cur ? cur->bc_private.b.allocated : 0));
1038 		base = ip->i_df.if_u1.if_extents;
1039 		ep = &base[idx + 1];
1040 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
1041 		xfs_bmap_trace_post_update(fname, "LF", ip, idx + 1,
1042 			XFS_DATA_FORK);
1043 		*dnew = temp;
1044 		break;
1045 
1046 	case MASK2(RIGHT_FILLING, RIGHT_CONTIG):
1047 		/*
1048 		 * Filling in the last part of a previous delayed allocation.
1049 		 * The right neighbor is contiguous with the new allocation.
1050 		 */
1051 		temp = PREV.br_blockcount - new->br_blockcount;
1052 		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx,
1053 			XFS_DATA_FORK);
1054 		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
1055 			XFS_DATA_FORK);
1056 		xfs_bmbt_set_blockcount(ep, temp);
1057 		xfs_bmbt_set_allf(ep + 1, new->br_startoff, new->br_startblock,
1058 			new->br_blockcount + RIGHT.br_blockcount,
1059 			RIGHT.br_state);
1060 		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
1061 			XFS_DATA_FORK);
1062 		ip->i_df.if_lastex = idx + 1;
1063 		if (cur == NULL)
1064 			rval = XFS_ILOG_DEXT;
1065 		else {
1066 			rval = 0;
1067 			if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
1068 					RIGHT.br_startblock,
1069 					RIGHT.br_blockcount, &i)))
1070 				goto done;
1071 			ASSERT(i == 1);
1072 			if ((error = xfs_bmbt_update(cur, new->br_startoff,
1073 					new->br_startblock,
1074 					new->br_blockcount +
1075 					RIGHT.br_blockcount,
1076 					RIGHT.br_state)))
1077 				goto done;
1078 		}
1079 		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
1080 			STARTBLOCKVAL(PREV.br_startblock));
1081 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
1082 		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
1083 			XFS_DATA_FORK);
1084 		*dnew = temp;
1085 		break;
1086 
1087 	case MASK(RIGHT_FILLING):
1088 		/*
1089 		 * Filling in the last part of a previous delayed allocation.
1090 		 * The right neighbor is not contiguous.
1091 		 */
1092 		temp = PREV.br_blockcount - new->br_blockcount;
1093 		xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK);
1094 		xfs_bmbt_set_blockcount(ep, temp);
1095 		xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
1096 			new, NULL, XFS_DATA_FORK);
1097 		xfs_bmap_insert_exlist(ip, idx + 1, 1, new, XFS_DATA_FORK);
1098 		ip->i_df.if_lastex = idx + 1;
1099 		ip->i_d.di_nextents++;
1100 		if (cur == NULL)
1101 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1102 		else {
1103 			rval = XFS_ILOG_CORE;
1104 			if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
1105 					new->br_startblock, new->br_blockcount,
1106 					&i)))
1107 				goto done;
1108 			ASSERT(i == 0);
1109 			cur->bc_rec.b.br_state = XFS_EXT_NORM;
1110 			if ((error = xfs_bmbt_insert(cur, &i)))
1111 				goto done;
1112 			ASSERT(i == 1);
1113 		}
1114 		if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1115 		    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
1116 			error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
1117 				first, flist, &cur, 1, &tmp_rval,
1118 				XFS_DATA_FORK);
1119 			rval |= tmp_rval;
1120 			if (error)
1121 				goto done;
1122 		}
1123 		temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
1124 			STARTBLOCKVAL(PREV.br_startblock) -
1125 			(cur ? cur->bc_private.b.allocated : 0));
1126 		base = ip->i_df.if_u1.if_extents;
1127 		ep = &base[idx];
1128 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
1129 		xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
1130 		*dnew = temp;
1131 		break;
1132 
1133 	case 0:
1134 		/*
1135 		 * Filling in the middle part of a previous delayed allocation.
1136 		 * Contiguity is impossible here.
1137 		 * This case is avoided almost all the time.
1138 		 */
1139 		temp = new->br_startoff - PREV.br_startoff;
1140 		xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK);
1141 		xfs_bmbt_set_blockcount(ep, temp);
1142 		r[0] = *new;
1143 		r[1].br_startoff = new_endoff;
1144 		temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
1145 		r[1].br_blockcount = temp2;
1146 		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
1147 			XFS_DATA_FORK);
1148 		xfs_bmap_insert_exlist(ip, idx + 1, 2, &r[0], XFS_DATA_FORK);
1149 		ip->i_df.if_lastex = idx + 1;
1150 		ip->i_d.di_nextents++;
1151 		if (cur == NULL)
1152 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1153 		else {
1154 			rval = XFS_ILOG_CORE;
1155 			if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
1156 					new->br_startblock, new->br_blockcount,
1157 					&i)))
1158 				goto done;
1159 			ASSERT(i == 0);
1160 			cur->bc_rec.b.br_state = XFS_EXT_NORM;
1161 			if ((error = xfs_bmbt_insert(cur, &i)))
1162 				goto done;
1163 			ASSERT(i == 1);
1164 		}
1165 		if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1166 		    ip->i_d.di_nextents > ip->i_df.if_ext_max) {
1167 			error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
1168 					first, flist, &cur, 1, &tmp_rval,
1169 					XFS_DATA_FORK);
1170 			rval |= tmp_rval;
1171 			if (error)
1172 				goto done;
1173 		}
1174 		temp = xfs_bmap_worst_indlen(ip, temp);
1175 		temp2 = xfs_bmap_worst_indlen(ip, temp2);
1176 		diff = (int)(temp + temp2 - STARTBLOCKVAL(PREV.br_startblock) -
1177 			(cur ? cur->bc_private.b.allocated : 0));
1178 		if (diff > 0 &&
1179 		    xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -diff, rsvd)) {
1180 			/*
1181 			 * Ick gross gag me with a spoon.
1182 			 */
1183 			ASSERT(0);	/* want to see if this ever happens! */
1184 			while (diff > 0) {
1185 				if (temp) {
1186 					temp--;
1187 					diff--;
1188 					if (!diff ||
1189 					    !xfs_mod_incore_sb(ip->i_mount,
1190 						    XFS_SBS_FDBLOCKS, -diff, rsvd))
1191 						break;
1192 				}
1193 				if (temp2) {
1194 					temp2--;
1195 					diff--;
1196 					if (!diff ||
1197 					    !xfs_mod_incore_sb(ip->i_mount,
1198 						    XFS_SBS_FDBLOCKS, -diff, rsvd))
1199 						break;
1200 				}
1201 			}
1202 		}
1203 		base = ip->i_df.if_u1.if_extents;
1204 		ep = &base[idx];
1205 		xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
1206 		xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
1207 		xfs_bmap_trace_pre_update(fname, "0", ip, idx + 2,
1208 			XFS_DATA_FORK);
1209 		xfs_bmbt_set_startblock(ep + 2, NULLSTARTBLOCK((int)temp2));
1210 		xfs_bmap_trace_post_update(fname, "0", ip, idx + 2,
1211 			XFS_DATA_FORK);
1212 		*dnew = temp + temp2;
1213 		break;
1214 
1215 	case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
1216 	case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
1217 	case MASK2(LEFT_FILLING, RIGHT_CONTIG):
1218 	case MASK2(RIGHT_FILLING, LEFT_CONTIG):
1219 	case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
1220 	case MASK(LEFT_CONTIG):
1221 	case MASK(RIGHT_CONTIG):
1222 		/*
1223 		 * These cases are all impossible.
1224 		 */
1225 		ASSERT(0);
1226 	}
1227 	*curp = cur;
1228 done:
1229 	*logflagsp = rval;
1230 	return error;
1231 #undef	LEFT
1232 #undef	RIGHT
1233 #undef	PREV
1234 #undef	MASK
1235 #undef	MASK2
1236 #undef	MASK3
1237 #undef	MASK4
1238 #undef	STATE_SET
1239 #undef	STATE_TEST
1240 #undef	STATE_SET_TEST
1241 #undef	SWITCH_STATE
1242 }
1243 
1244 /*
1245  * Called by xfs_bmap_add_extent to handle cases converting an unwritten
1246  * allocation to a real allocation or vice versa.
1247  */
1248 STATIC int				/* error */
xfs_bmap_add_extent_unwritten_real(xfs_inode_t * ip,xfs_extnum_t idx,xfs_btree_cur_t ** curp,xfs_bmbt_irec_t * new,int * logflagsp)1249 xfs_bmap_add_extent_unwritten_real(
1250 	xfs_inode_t		*ip,	/* incore inode pointer */
1251 	xfs_extnum_t		idx,	/* extent number to update/insert */
1252 	xfs_btree_cur_t		**curp,	/* if *curp is null, not a btree */
1253 	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
1254 	int			*logflagsp) /* inode logging flags */
1255 {
1256 	xfs_bmbt_rec_t		*base;	/* base of extent entry list */
1257 	xfs_btree_cur_t		*cur;	/* btree cursor */
1258 	xfs_bmbt_rec_t		*ep;	/* extent entry for idx */
1259 	int			error;	/* error return value */
1260 #ifdef XFS_BMAP_TRACE
1261 	static char		fname[] = "xfs_bmap_add_extent_unwritten_real";
1262 #endif
1263 	int			i;	/* temp state */
1264 	xfs_fileoff_t		new_endoff;	/* end offset of new entry */
1265 	xfs_exntst_t		newext;	/* new extent state */
1266 	xfs_exntst_t		oldext;	/* old extent state */
1267 	xfs_bmbt_irec_t		r[3];	/* neighbor extent entries */
1268 					/* left is 0, right is 1, prev is 2 */
1269 	int			rval=0;	/* return value (logging flags) */
1270 	int			state = 0;/* state bits, accessed thru macros */
1271 	enum {				/* bit number definitions for state */
1272 		LEFT_CONTIG,	RIGHT_CONTIG,
1273 		LEFT_FILLING,	RIGHT_FILLING,
1274 		LEFT_DELAY,	RIGHT_DELAY,
1275 		LEFT_VALID,	RIGHT_VALID
1276 	};
1277 
1278 #define	LEFT		r[0]
1279 #define	RIGHT		r[1]
1280 #define	PREV		r[2]
1281 #define	MASK(b)		(1 << (b))
1282 #define	MASK2(a,b)	(MASK(a) | MASK(b))
1283 #define	MASK3(a,b,c)	(MASK2(a,b) | MASK(c))
1284 #define	MASK4(a,b,c,d)	(MASK3(a,b,c) | MASK(d))
1285 #define	STATE_SET(b,v)	((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
1286 #define	STATE_TEST(b)	(state & MASK(b))
1287 #define	STATE_SET_TEST(b,v)	((v) ? ((state |= MASK(b)), 1) : \
1288 				       ((state &= ~MASK(b)), 0))
1289 #define	SWITCH_STATE		\
1290 	(state & MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG))
1291 
1292 	/*
1293 	 * Set up a bunch of variables to make the tests simpler.
1294 	 */
1295 	error = 0;
1296 	cur = *curp;
1297 	base = ip->i_df.if_u1.if_extents;
1298 	ep = &base[idx];
1299 	xfs_bmbt_get_all(ep, &PREV);
1300 	newext = new->br_state;
1301 	oldext = (newext == XFS_EXT_UNWRITTEN) ?
1302 		XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
1303 	ASSERT(PREV.br_state == oldext);
1304 	new_endoff = new->br_startoff + new->br_blockcount;
1305 	ASSERT(PREV.br_startoff <= new->br_startoff);
1306 	ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1307 	/*
1308 	 * Set flags determining what part of the previous oldext allocation
1309 	 * extent is being replaced by a newext allocation.
1310 	 */
1311 	STATE_SET(LEFT_FILLING, PREV.br_startoff == new->br_startoff);
1312 	STATE_SET(RIGHT_FILLING,
1313 		PREV.br_startoff + PREV.br_blockcount == new_endoff);
1314 	/*
1315 	 * Check and set flags if this segment has a left neighbor.
1316 	 * Don't set contiguous if the combined extent would be too large.
1317 	 */
1318 	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
1319 		xfs_bmbt_get_all(ep - 1, &LEFT);
1320 		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(LEFT.br_startblock));
1321 	}
1322 	STATE_SET(LEFT_CONTIG,
1323 		STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) &&
1324 		LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1325 		LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1326 		LEFT.br_state == newext &&
1327 		LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN);
1328 	/*
1329 	 * Check and set flags if this segment has a right neighbor.
1330 	 * Don't set contiguous if the combined extent would be too large.
1331 	 * Also check for all-three-contiguous being too large.
1332 	 */
1333 	if (STATE_SET_TEST(RIGHT_VALID,
1334 			idx <
1335 			ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1)) {
1336 		xfs_bmbt_get_all(ep + 1, &RIGHT);
1337 		STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(RIGHT.br_startblock));
1338 	}
1339 	STATE_SET(RIGHT_CONTIG,
1340 		STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) &&
1341 		new_endoff == RIGHT.br_startoff &&
1342 		new->br_startblock + new->br_blockcount ==
1343 		    RIGHT.br_startblock &&
1344 		newext == RIGHT.br_state &&
1345 		new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1346 		((state & MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING)) !=
1347 		  MASK3(LEFT_CONTIG, LEFT_FILLING, RIGHT_FILLING) ||
1348 		 LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1349 		     <= MAXEXTLEN));
1350 	/*
1351 	 * Switch out based on the FILLING and CONTIG state bits.
1352 	 */
1353 	switch (SWITCH_STATE) {
1354 
1355 	case MASK4(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
1356 		/*
1357 		 * Setting all of a previous oldext extent to newext.
1358 		 * The left and right neighbors are both contiguous with new.
1359 		 */
1360 		xfs_bmap_trace_pre_update(fname, "LF|RF|LC|RC", ip, idx - 1,
1361 			XFS_DATA_FORK);
1362 		xfs_bmbt_set_blockcount(ep - 1,
1363 			LEFT.br_blockcount + PREV.br_blockcount +
1364 			RIGHT.br_blockcount);
1365 		xfs_bmap_trace_post_update(fname, "LF|RF|LC|RC", ip, idx - 1,
1366 			XFS_DATA_FORK);
1367 		xfs_bmap_trace_delete(fname, "LF|RF|LC|RC", ip, idx, 2,
1368 			XFS_DATA_FORK);
1369 		xfs_bmap_delete_exlist(ip, idx, 2, XFS_DATA_FORK);
1370 		ip->i_df.if_lastex = idx - 1;
1371 		ip->i_d.di_nextents -= 2;
1372 		if (cur == NULL)
1373 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1374 		else {
1375 			rval = XFS_ILOG_CORE;
1376 			if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
1377 					RIGHT.br_startblock,
1378 					RIGHT.br_blockcount, &i)))
1379 				goto done;
1380 			ASSERT(i == 1);
1381 			if ((error = xfs_bmbt_delete(cur, &i)))
1382 				goto done;
1383 			ASSERT(i == 1);
1384 			if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1385 				goto done;
1386 			ASSERT(i == 1);
1387 			if ((error = xfs_bmbt_delete(cur, &i)))
1388 				goto done;
1389 			ASSERT(i == 1);
1390 			if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1391 				goto done;
1392 			ASSERT(i == 1);
1393 			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
1394 				LEFT.br_startblock,
1395 				LEFT.br_blockcount + PREV.br_blockcount +
1396 				RIGHT.br_blockcount, LEFT.br_state)))
1397 				goto done;
1398 		}
1399 		break;
1400 
1401 	case MASK3(LEFT_FILLING, RIGHT_FILLING, LEFT_CONTIG):
1402 		/*
1403 		 * Setting all of a previous oldext extent to newext.
1404 		 * The left neighbor is contiguous, the right is not.
1405 		 */
1406 		xfs_bmap_trace_pre_update(fname, "LF|RF|LC", ip, idx - 1,
1407 			XFS_DATA_FORK);
1408 		xfs_bmbt_set_blockcount(ep - 1,
1409 			LEFT.br_blockcount + PREV.br_blockcount);
1410 		xfs_bmap_trace_post_update(fname, "LF|RF|LC", ip, idx - 1,
1411 			XFS_DATA_FORK);
1412 		ip->i_df.if_lastex = idx - 1;
1413 		xfs_bmap_trace_delete(fname, "LF|RF|LC", ip, idx, 1,
1414 			XFS_DATA_FORK);
1415 		xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
1416 		ip->i_d.di_nextents--;
1417 		if (cur == NULL)
1418 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1419 		else {
1420 			rval = XFS_ILOG_CORE;
1421 			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
1422 					PREV.br_startblock, PREV.br_blockcount,
1423 					&i)))
1424 				goto done;
1425 			ASSERT(i == 1);
1426 			if ((error = xfs_bmbt_delete(cur, &i)))
1427 				goto done;
1428 			ASSERT(i == 1);
1429 			if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1430 				goto done;
1431 			ASSERT(i == 1);
1432 			if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
1433 				LEFT.br_startblock,
1434 				LEFT.br_blockcount + PREV.br_blockcount,
1435 				LEFT.br_state)))
1436 				goto done;
1437 		}
1438 		break;
1439 
1440 	case MASK3(LEFT_FILLING, RIGHT_FILLING, RIGHT_CONTIG):
1441 		/*
1442 		 * Setting all of a previous oldext extent to newext.
1443 		 * The right neighbor is contiguous, the left is not.
1444 		 */
1445 		xfs_bmap_trace_pre_update(fname, "LF|RF|RC", ip, idx,
1446 			XFS_DATA_FORK);
1447 		xfs_bmbt_set_blockcount(ep,
1448 			PREV.br_blockcount + RIGHT.br_blockcount);
1449 		xfs_bmbt_set_state(ep, newext);
1450 		xfs_bmap_trace_post_update(fname, "LF|RF|RC", ip, idx,
1451 			XFS_DATA_FORK);
1452 		ip->i_df.if_lastex = idx;
1453 		xfs_bmap_trace_delete(fname, "LF|RF|RC", ip, idx + 1, 1,
1454 			XFS_DATA_FORK);
1455 		xfs_bmap_delete_exlist(ip, idx + 1, 1, XFS_DATA_FORK);
1456 		ip->i_d.di_nextents--;
1457 		if (cur == NULL)
1458 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1459 		else {
1460 			rval = XFS_ILOG_CORE;
1461 			if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
1462 					RIGHT.br_startblock,
1463 					RIGHT.br_blockcount, &i)))
1464 				goto done;
1465 			ASSERT(i == 1);
1466 			if ((error = xfs_bmbt_delete(cur, &i)))
1467 				goto done;
1468 			ASSERT(i == 1);
1469 			if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1470 				goto done;
1471 			ASSERT(i == 1);
1472 			if ((error = xfs_bmbt_update(cur, new->br_startoff,
1473 				new->br_startblock,
1474 				new->br_blockcount + RIGHT.br_blockcount,
1475 				newext)))
1476 				goto done;
1477 		}
1478 		break;
1479 
1480 	case MASK2(LEFT_FILLING, RIGHT_FILLING):
1481 		/*
1482 		 * Setting all of a previous oldext extent to newext.
1483 		 * Neither the left nor right neighbors are contiguous with
1484 		 * the new one.
1485 		 */
1486 		xfs_bmap_trace_pre_update(fname, "LF|RF", ip, idx,
1487 			XFS_DATA_FORK);
1488 		xfs_bmbt_set_state(ep, newext);
1489 		xfs_bmap_trace_post_update(fname, "LF|RF", ip, idx,
1490 			XFS_DATA_FORK);
1491 		ip->i_df.if_lastex = idx;
1492 		if (cur == NULL)
1493 			rval = XFS_ILOG_DEXT;
1494 		else {
1495 			rval = 0;
1496 			if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
1497 					new->br_startblock, new->br_blockcount,
1498 					&i)))
1499 				goto done;
1500 			ASSERT(i == 1);
1501 			if ((error = xfs_bmbt_update(cur, new->br_startoff,
1502 				new->br_startblock, new->br_blockcount,
1503 				newext)))
1504 				goto done;
1505 		}
1506 		break;
1507 
1508 	case MASK2(LEFT_FILLING, LEFT_CONTIG):
1509 		/*
1510 		 * Setting the first part of a previous oldext extent to newext.
1511 		 * The left neighbor is contiguous.
1512 		 */
1513 		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx - 1,
1514 			XFS_DATA_FORK);
1515 		xfs_bmbt_set_blockcount(ep - 1,
1516 			LEFT.br_blockcount + new->br_blockcount);
1517 		xfs_bmbt_set_startoff(ep,
1518 			PREV.br_startoff + new->br_blockcount);
1519 		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx - 1,
1520 			XFS_DATA_FORK);
1521 		xfs_bmap_trace_pre_update(fname, "LF|LC", ip, idx,
1522 			XFS_DATA_FORK);
1523 		xfs_bmbt_set_startblock(ep,
1524 			new->br_startblock + new->br_blockcount);
1525 		xfs_bmbt_set_blockcount(ep,
1526 			PREV.br_blockcount - new->br_blockcount);
1527 		xfs_bmap_trace_post_update(fname, "LF|LC", ip, idx,
1528 			XFS_DATA_FORK);
1529 		ip->i_df.if_lastex = idx - 1;
1530 		if (cur == NULL)
1531 			rval = XFS_ILOG_DEXT;
1532 		else {
1533 			rval = 0;
1534 			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
1535 					PREV.br_startblock, PREV.br_blockcount,
1536 					&i)))
1537 				goto done;
1538 			ASSERT(i == 1);
1539 			if ((error = xfs_bmbt_update(cur,
1540 				PREV.br_startoff + new->br_blockcount,
1541 				PREV.br_startblock + new->br_blockcount,
1542 				PREV.br_blockcount - new->br_blockcount,
1543 				oldext)))
1544 				goto done;
1545 			if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1546 				goto done;
1547 			if (xfs_bmbt_update(cur, LEFT.br_startoff,
1548 				LEFT.br_startblock,
1549 				LEFT.br_blockcount + new->br_blockcount,
1550 				LEFT.br_state))
1551 				goto done;
1552 		}
1553 		break;
1554 
1555 	case MASK(LEFT_FILLING):
1556 		/*
1557 		 * Setting the first part of a previous oldext extent to newext.
1558 		 * The left neighbor is not contiguous.
1559 		 */
1560 		xfs_bmap_trace_pre_update(fname, "LF", ip, idx, XFS_DATA_FORK);
1561 		ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
1562 		xfs_bmbt_set_startoff(ep, new_endoff);
1563 		xfs_bmbt_set_blockcount(ep,
1564 			PREV.br_blockcount - new->br_blockcount);
1565 		xfs_bmbt_set_startblock(ep,
1566 			new->br_startblock + new->br_blockcount);
1567 		xfs_bmap_trace_post_update(fname, "LF", ip, idx, XFS_DATA_FORK);
1568 		xfs_bmap_trace_insert(fname, "LF", ip, idx, 1, new, NULL,
1569 			XFS_DATA_FORK);
1570 		xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
1571 		ip->i_df.if_lastex = idx;
1572 		ip->i_d.di_nextents++;
1573 		if (cur == NULL)
1574 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1575 		else {
1576 			rval = XFS_ILOG_CORE;
1577 			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
1578 					PREV.br_startblock, PREV.br_blockcount,
1579 					&i)))
1580 				goto done;
1581 			ASSERT(i == 1);
1582 			if ((error = xfs_bmbt_update(cur,
1583 				PREV.br_startoff + new->br_blockcount,
1584 				PREV.br_startblock + new->br_blockcount,
1585 				PREV.br_blockcount - new->br_blockcount,
1586 				oldext)))
1587 				goto done;
1588 			cur->bc_rec.b = *new;
1589 			if ((error = xfs_bmbt_insert(cur, &i)))
1590 				goto done;
1591 			ASSERT(i == 1);
1592 		}
1593 		break;
1594 
1595 	case MASK2(RIGHT_FILLING, RIGHT_CONTIG):
1596 		/*
1597 		 * Setting the last part of a previous oldext extent to newext.
1598 		 * The right neighbor is contiguous with the new allocation.
1599 		 */
1600 		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx,
1601 			XFS_DATA_FORK);
1602 		xfs_bmap_trace_pre_update(fname, "RF|RC", ip, idx + 1,
1603 			XFS_DATA_FORK);
1604 		xfs_bmbt_set_blockcount(ep,
1605 			PREV.br_blockcount - new->br_blockcount);
1606 		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx,
1607 			XFS_DATA_FORK);
1608 		xfs_bmbt_set_allf(ep + 1, new->br_startoff, new->br_startblock,
1609 			new->br_blockcount + RIGHT.br_blockcount, newext);
1610 		xfs_bmap_trace_post_update(fname, "RF|RC", ip, idx + 1,
1611 			XFS_DATA_FORK);
1612 		ip->i_df.if_lastex = idx + 1;
1613 		if (cur == NULL)
1614 			rval = XFS_ILOG_DEXT;
1615 		else {
1616 			rval = 0;
1617 			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
1618 					PREV.br_startblock,
1619 					PREV.br_blockcount, &i)))
1620 				goto done;
1621 			ASSERT(i == 1);
1622 			if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
1623 				PREV.br_startblock,
1624 				PREV.br_blockcount - new->br_blockcount,
1625 				oldext)))
1626 				goto done;
1627 			if ((error = xfs_bmbt_increment(cur, 0, &i)))
1628 				goto done;
1629 			if ((error = xfs_bmbt_update(cur, new->br_startoff,
1630 				new->br_startblock,
1631 				new->br_blockcount + RIGHT.br_blockcount,
1632 				newext)))
1633 				goto done;
1634 		}
1635 		break;
1636 
1637 	case MASK(RIGHT_FILLING):
1638 		/*
1639 		 * Setting the last part of a previous oldext extent to newext.
1640 		 * The right neighbor is not contiguous.
1641 		 */
1642 		xfs_bmap_trace_pre_update(fname, "RF", ip, idx, XFS_DATA_FORK);
1643 		xfs_bmbt_set_blockcount(ep,
1644 			PREV.br_blockcount - new->br_blockcount);
1645 		xfs_bmap_trace_post_update(fname, "RF", ip, idx, XFS_DATA_FORK);
1646 		xfs_bmap_trace_insert(fname, "RF", ip, idx + 1, 1,
1647 			new, NULL, XFS_DATA_FORK);
1648 		xfs_bmap_insert_exlist(ip, idx + 1, 1, new, XFS_DATA_FORK);
1649 		ip->i_df.if_lastex = idx + 1;
1650 		ip->i_d.di_nextents++;
1651 		if (cur == NULL)
1652 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1653 		else {
1654 			rval = XFS_ILOG_CORE;
1655 			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
1656 					PREV.br_startblock, PREV.br_blockcount,
1657 					&i)))
1658 				goto done;
1659 			ASSERT(i == 1);
1660 			if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
1661 				PREV.br_startblock,
1662 				PREV.br_blockcount - new->br_blockcount,
1663 				oldext)))
1664 				goto done;
1665 			if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
1666 					new->br_startblock, new->br_blockcount,
1667 					&i)))
1668 				goto done;
1669 			ASSERT(i == 0);
1670 			cur->bc_rec.b.br_state = XFS_EXT_NORM;
1671 			if ((error = xfs_bmbt_insert(cur, &i)))
1672 				goto done;
1673 			ASSERT(i == 1);
1674 		}
1675 		break;
1676 
1677 	case 0:
1678 		/*
1679 		 * Setting the middle part of a previous oldext extent to
1680 		 * newext.  Contiguity is impossible here.
1681 		 * One extent becomes three extents.
1682 		 */
1683 		xfs_bmap_trace_pre_update(fname, "0", ip, idx, XFS_DATA_FORK);
1684 		xfs_bmbt_set_blockcount(ep,
1685 			new->br_startoff - PREV.br_startoff);
1686 		xfs_bmap_trace_post_update(fname, "0", ip, idx, XFS_DATA_FORK);
1687 		r[0] = *new;
1688 		r[1].br_startoff = new_endoff;
1689 		r[1].br_blockcount =
1690 			PREV.br_startoff + PREV.br_blockcount - new_endoff;
1691 		r[1].br_startblock = new->br_startblock + new->br_blockcount;
1692 		r[1].br_state = oldext;
1693 		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 2, &r[0], &r[1],
1694 			XFS_DATA_FORK);
1695 		xfs_bmap_insert_exlist(ip, idx + 1, 2, &r[0], XFS_DATA_FORK);
1696 		ip->i_df.if_lastex = idx + 1;
1697 		ip->i_d.di_nextents += 2;
1698 		if (cur == NULL)
1699 			rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1700 		else {
1701 			rval = XFS_ILOG_CORE;
1702 			if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
1703 					PREV.br_startblock, PREV.br_blockcount,
1704 					&i)))
1705 				goto done;
1706 			ASSERT(i == 1);
1707 			/* new right extent - oldext */
1708 			if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
1709 				r[1].br_startblock, r[1].br_blockcount,
1710 				r[1].br_state)))
1711 				goto done;
1712 			/* new left extent - oldext */
1713 			PREV.br_blockcount =
1714 				new->br_startoff - PREV.br_startoff;
1715 			cur->bc_rec.b = PREV;
1716 			if ((error = xfs_bmbt_insert(cur, &i)))
1717 				goto done;
1718 			ASSERT(i == 1);
1719 			if ((error = xfs_bmbt_increment(cur, 0, &i)))
1720 				goto done;
1721 			ASSERT(i == 1);
1722 			/* new middle extent - newext */
1723 			cur->bc_rec.b = *new;
1724 			if ((error = xfs_bmbt_insert(cur, &i)))
1725 				goto done;
1726 			ASSERT(i == 1);
1727 		}
1728 		break;
1729 
1730 	case MASK3(LEFT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
1731 	case MASK3(RIGHT_FILLING, LEFT_CONTIG, RIGHT_CONTIG):
1732 	case MASK2(LEFT_FILLING, RIGHT_CONTIG):
1733 	case MASK2(RIGHT_FILLING, LEFT_CONTIG):
1734 	case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
1735 	case MASK(LEFT_CONTIG):
1736 	case MASK(RIGHT_CONTIG):
1737 		/*
1738 		 * These cases are all impossible.
1739 		 */
1740 		ASSERT(0);
1741 	}
1742 	*curp = cur;
1743 done:
1744 	*logflagsp = rval;
1745 	return error;
1746 #undef	LEFT
1747 #undef	RIGHT
1748 #undef	PREV
1749 #undef	MASK
1750 #undef	MASK2
1751 #undef	MASK3
1752 #undef	MASK4
1753 #undef	STATE_SET
1754 #undef	STATE_TEST
1755 #undef	STATE_SET_TEST
1756 #undef	SWITCH_STATE
1757 }
1758 
1759 /*
1760  * Called by xfs_bmap_add_extent to handle cases converting a hole
1761  * to a delayed allocation.
1762  */
1763 /*ARGSUSED*/
1764 STATIC int				/* error */
xfs_bmap_add_extent_hole_delay(xfs_inode_t * ip,xfs_extnum_t idx,xfs_btree_cur_t * cur,xfs_bmbt_irec_t * new,int * logflagsp,int rsvd)1765 xfs_bmap_add_extent_hole_delay(
1766 	xfs_inode_t		*ip,	/* incore inode pointer */
1767 	xfs_extnum_t		idx,	/* extent number to update/insert */
1768 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
1769 	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
1770 	int			*logflagsp, /* inode logging flags */
1771 	int			rsvd)		/* OK to allocate reserved blocks */
1772 {
1773 	xfs_bmbt_rec_t		*base;	/* base of extent entry list */
1774 	xfs_bmbt_rec_t		*ep;	/* extent list entry for idx */
1775 #ifdef XFS_BMAP_TRACE
1776 	static char		fname[] = "xfs_bmap_add_extent_hole_delay";
1777 #endif
1778 	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
1779 	xfs_filblks_t		newlen=0;	/* new indirect size */
1780 	xfs_filblks_t		oldlen=0;	/* old indirect size */
1781 	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
1782 	int			state;  /* state bits, accessed thru macros */
1783 	xfs_filblks_t		temp;	/* temp for indirect calculations */
1784 	enum {				/* bit number definitions for state */
1785 		LEFT_CONTIG,	RIGHT_CONTIG,
1786 		LEFT_DELAY,	RIGHT_DELAY,
1787 		LEFT_VALID,	RIGHT_VALID
1788 	};
1789 
1790 #define	MASK(b)			(1 << (b))
1791 #define	MASK2(a,b)		(MASK(a) | MASK(b))
1792 #define	STATE_SET(b,v)		((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
1793 #define	STATE_TEST(b)		(state & MASK(b))
1794 #define	STATE_SET_TEST(b,v)	((v) ? ((state |= MASK(b)), 1) : \
1795 				       ((state &= ~MASK(b)), 0))
1796 #define	SWITCH_STATE		(state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
1797 
1798 	base = ip->i_df.if_u1.if_extents;
1799 	ep = &base[idx];
1800 	state = 0;
1801 	ASSERT(ISNULLSTARTBLOCK(new->br_startblock));
1802 	/*
1803 	 * Check and set flags if this segment has a left neighbor
1804 	 */
1805 	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
1806 		xfs_bmbt_get_all(ep - 1, &left);
1807 		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(left.br_startblock));
1808 	}
1809 	/*
1810 	 * Check and set flags if the current (right) segment exists.
1811 	 * If it doesn't exist, we're converting the hole at end-of-file.
1812 	 */
1813 	if (STATE_SET_TEST(RIGHT_VALID,
1814 			   idx <
1815 			   ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
1816 		xfs_bmbt_get_all(ep, &right);
1817 		STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(right.br_startblock));
1818 	}
1819 	/*
1820 	 * Set contiguity flags on the left and right neighbors.
1821 	 * Don't let extents get too large, even if the pieces are contiguous.
1822 	 */
1823 	STATE_SET(LEFT_CONTIG,
1824 		STATE_TEST(LEFT_VALID) && STATE_TEST(LEFT_DELAY) &&
1825 		left.br_startoff + left.br_blockcount == new->br_startoff &&
1826 		left.br_blockcount + new->br_blockcount <= MAXEXTLEN);
1827 	STATE_SET(RIGHT_CONTIG,
1828 		STATE_TEST(RIGHT_VALID) && STATE_TEST(RIGHT_DELAY) &&
1829 		new->br_startoff + new->br_blockcount == right.br_startoff &&
1830 		new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
1831 		(!STATE_TEST(LEFT_CONTIG) ||
1832 		 (left.br_blockcount + new->br_blockcount +
1833 		     right.br_blockcount <= MAXEXTLEN)));
1834 	/*
1835 	 * Switch out based on the contiguity flags.
1836 	 */
1837 	switch (SWITCH_STATE) {
1838 
1839 	case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
1840 		/*
1841 		 * New allocation is contiguous with delayed allocations
1842 		 * on the left and on the right.
1843 		 * Merge all three into a single extent list entry.
1844 		 */
1845 		temp = left.br_blockcount + new->br_blockcount +
1846 			right.br_blockcount;
1847 		xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
1848 			XFS_DATA_FORK);
1849 		xfs_bmbt_set_blockcount(ep - 1, temp);
1850 		oldlen = STARTBLOCKVAL(left.br_startblock) +
1851 			STARTBLOCKVAL(new->br_startblock) +
1852 			STARTBLOCKVAL(right.br_startblock);
1853 		newlen = xfs_bmap_worst_indlen(ip, temp);
1854 		xfs_bmbt_set_startblock(ep - 1, NULLSTARTBLOCK((int)newlen));
1855 		xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
1856 			XFS_DATA_FORK);
1857 		xfs_bmap_trace_delete(fname, "LC|RC", ip, idx, 1,
1858 			XFS_DATA_FORK);
1859 		xfs_bmap_delete_exlist(ip, idx, 1, XFS_DATA_FORK);
1860 		ip->i_df.if_lastex = idx - 1;
1861 		break;
1862 
1863 	case MASK(LEFT_CONTIG):
1864 		/*
1865 		 * New allocation is contiguous with a delayed allocation
1866 		 * on the left.
1867 		 * Merge the new allocation with the left neighbor.
1868 		 */
1869 		temp = left.br_blockcount + new->br_blockcount;
1870 		xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1,
1871 			XFS_DATA_FORK);
1872 		xfs_bmbt_set_blockcount(ep - 1, temp);
1873 		oldlen = STARTBLOCKVAL(left.br_startblock) +
1874 			STARTBLOCKVAL(new->br_startblock);
1875 		newlen = xfs_bmap_worst_indlen(ip, temp);
1876 		xfs_bmbt_set_startblock(ep - 1, NULLSTARTBLOCK((int)newlen));
1877 		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1,
1878 			XFS_DATA_FORK);
1879 		ip->i_df.if_lastex = idx - 1;
1880 		break;
1881 
1882 	case MASK(RIGHT_CONTIG):
1883 		/*
1884 		 * New allocation is contiguous with a delayed allocation
1885 		 * on the right.
1886 		 * Merge the new allocation with the right neighbor.
1887 		 */
1888 		xfs_bmap_trace_pre_update(fname, "RC", ip, idx, XFS_DATA_FORK);
1889 		temp = new->br_blockcount + right.br_blockcount;
1890 		oldlen = STARTBLOCKVAL(new->br_startblock) +
1891 			STARTBLOCKVAL(right.br_startblock);
1892 		newlen = xfs_bmap_worst_indlen(ip, temp);
1893 		xfs_bmbt_set_allf(ep, new->br_startoff,
1894 			NULLSTARTBLOCK((int)newlen), temp, right.br_state);
1895 		xfs_bmap_trace_post_update(fname, "RC", ip, idx, XFS_DATA_FORK);
1896 		ip->i_df.if_lastex = idx;
1897 		break;
1898 
1899 	case 0:
1900 		/*
1901 		 * New allocation is not contiguous with another
1902 		 * delayed allocation.
1903 		 * Insert a new entry.
1904 		 */
1905 		oldlen = newlen = 0;
1906 		xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
1907 			XFS_DATA_FORK);
1908 		xfs_bmap_insert_exlist(ip, idx, 1, new, XFS_DATA_FORK);
1909 		ip->i_df.if_lastex = idx;
1910 		break;
1911 	}
1912 	if (oldlen != newlen) {
1913 		ASSERT(oldlen > newlen);
1914 		xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
1915 			(int)(oldlen - newlen), rsvd);
1916 		/*
1917 		 * Nothing to do for disk quota accounting here.
1918 		 */
1919 	}
1920 	*logflagsp = 0;
1921 	return 0;
1922 #undef	MASK
1923 #undef	MASK2
1924 #undef	STATE_SET
1925 #undef	STATE_TEST
1926 #undef	STATE_SET_TEST
1927 #undef	SWITCH_STATE
1928 }
1929 
1930 /*
1931  * Called by xfs_bmap_add_extent to handle cases converting a hole
1932  * to a real allocation.
1933  */
1934 STATIC int				/* error */
xfs_bmap_add_extent_hole_real(xfs_inode_t * ip,xfs_extnum_t idx,xfs_btree_cur_t * cur,xfs_bmbt_irec_t * new,int * logflagsp,int whichfork)1935 xfs_bmap_add_extent_hole_real(
1936 	xfs_inode_t		*ip,	/* incore inode pointer */
1937 	xfs_extnum_t		idx,	/* extent number to update/insert */
1938 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
1939 	xfs_bmbt_irec_t		*new,	/* new data to put in extent list */
1940 	int			*logflagsp, /* inode logging flags */
1941 	int			whichfork) /* data or attr fork */
1942 {
1943 	xfs_bmbt_rec_t		*ep;	/* pointer to extent entry ins. point */
1944 	int			error;	/* error return value */
1945 #ifdef XFS_BMAP_TRACE
1946 	static char		fname[] = "xfs_bmap_add_extent_hole_real";
1947 #endif
1948 	int			i;	/* temp state */
1949 	xfs_ifork_t		*ifp;	/* inode fork pointer */
1950 	xfs_bmbt_irec_t		left;	/* left neighbor extent entry */
1951 	xfs_bmbt_irec_t		right;	/* right neighbor extent entry */
1952 	int			state;	/* state bits, accessed thru macros */
1953 	enum {				/* bit number definitions for state */
1954 		LEFT_CONTIG,	RIGHT_CONTIG,
1955 		LEFT_DELAY,	RIGHT_DELAY,
1956 		LEFT_VALID,	RIGHT_VALID
1957 	};
1958 
1959 #define	MASK(b)			(1 << (b))
1960 #define	MASK2(a,b)		(MASK(a) | MASK(b))
1961 #define	STATE_SET(b,v)		((v) ? (state |= MASK(b)) : (state &= ~MASK(b)))
1962 #define	STATE_TEST(b)		(state & MASK(b))
1963 #define	STATE_SET_TEST(b,v)	((v) ? ((state |= MASK(b)), 1) : \
1964 				       ((state &= ~MASK(b)), 0))
1965 #define	SWITCH_STATE		(state & MASK2(LEFT_CONTIG, RIGHT_CONTIG))
1966 
1967 	ifp = XFS_IFORK_PTR(ip, whichfork);
1968 	ASSERT(idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
1969 	ep = &ifp->if_u1.if_extents[idx];
1970 	state = 0;
1971 	/*
1972 	 * Check and set flags if this segment has a left neighbor.
1973 	 */
1974 	if (STATE_SET_TEST(LEFT_VALID, idx > 0)) {
1975 		xfs_bmbt_get_all(ep - 1, &left);
1976 		STATE_SET(LEFT_DELAY, ISNULLSTARTBLOCK(left.br_startblock));
1977 	}
1978 	/*
1979 	 * Check and set flags if this segment has a current value.
1980 	 * Not true if we're inserting into the "hole" at eof.
1981 	 */
1982 	if (STATE_SET_TEST(RIGHT_VALID,
1983 			   idx <
1984 			   ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
1985 		xfs_bmbt_get_all(ep, &right);
1986 		STATE_SET(RIGHT_DELAY, ISNULLSTARTBLOCK(right.br_startblock));
1987 	}
1988 	/*
1989 	 * We're inserting a real allocation between "left" and "right".
1990 	 * Set the contiguity flags.  Don't let extents get too large.
1991 	 */
1992 	STATE_SET(LEFT_CONTIG,
1993 		STATE_TEST(LEFT_VALID) && !STATE_TEST(LEFT_DELAY) &&
1994 		left.br_startoff + left.br_blockcount == new->br_startoff &&
1995 		left.br_startblock + left.br_blockcount == new->br_startblock &&
1996 		left.br_state == new->br_state &&
1997 		left.br_blockcount + new->br_blockcount <= MAXEXTLEN);
1998 	STATE_SET(RIGHT_CONTIG,
1999 		STATE_TEST(RIGHT_VALID) && !STATE_TEST(RIGHT_DELAY) &&
2000 		new->br_startoff + new->br_blockcount == right.br_startoff &&
2001 		new->br_startblock + new->br_blockcount ==
2002 		    right.br_startblock &&
2003 		new->br_state == right.br_state &&
2004 		new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2005 		(!STATE_TEST(LEFT_CONTIG) ||
2006 		 left.br_blockcount + new->br_blockcount +
2007 		     right.br_blockcount <= MAXEXTLEN));
2008 
2009 	/*
2010 	 * Select which case we're in here, and implement it.
2011 	 */
2012 	switch (SWITCH_STATE) {
2013 
2014 	case MASK2(LEFT_CONTIG, RIGHT_CONTIG):
2015 		/*
2016 		 * New allocation is contiguous with real allocations on the
2017 		 * left and on the right.
2018 		 * Merge all three into a single extent list entry.
2019 		 */
2020 		xfs_bmap_trace_pre_update(fname, "LC|RC", ip, idx - 1,
2021 			whichfork);
2022 		xfs_bmbt_set_blockcount(ep - 1,
2023 			left.br_blockcount + new->br_blockcount +
2024 			right.br_blockcount);
2025 		xfs_bmap_trace_post_update(fname, "LC|RC", ip, idx - 1,
2026 			whichfork);
2027 		xfs_bmap_trace_delete(fname, "LC|RC", ip,
2028 			idx, 1, whichfork);
2029 		xfs_bmap_delete_exlist(ip, idx, 1, whichfork);
2030 		ifp->if_lastex = idx - 1;
2031 		XFS_IFORK_NEXT_SET(ip, whichfork,
2032 			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2033 		if (cur == NULL) {
2034 			*logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
2035 			return 0;
2036 		}
2037 		*logflagsp = XFS_ILOG_CORE;
2038 		if ((error = xfs_bmbt_lookup_eq(cur, right.br_startoff,
2039 				right.br_startblock, right.br_blockcount, &i)))
2040 			return error;
2041 		ASSERT(i == 1);
2042 		if ((error = xfs_bmbt_delete(cur, &i)))
2043 			return error;
2044 		ASSERT(i == 1);
2045 		if ((error = xfs_bmbt_decrement(cur, 0, &i)))
2046 			return error;
2047 		ASSERT(i == 1);
2048 		error = xfs_bmbt_update(cur, left.br_startoff,
2049 				left.br_startblock,
2050 				left.br_blockcount + new->br_blockcount +
2051 				right.br_blockcount, left.br_state);
2052 		return error;
2053 
2054 	case MASK(LEFT_CONTIG):
2055 		/*
2056 		 * New allocation is contiguous with a real allocation
2057 		 * on the left.
2058 		 * Merge the new allocation with the left neighbor.
2059 		 */
2060 		xfs_bmap_trace_pre_update(fname, "LC", ip, idx - 1, whichfork);
2061 		xfs_bmbt_set_blockcount(ep - 1,
2062 			left.br_blockcount + new->br_blockcount);
2063 		xfs_bmap_trace_post_update(fname, "LC", ip, idx - 1, whichfork);
2064 		ifp->if_lastex = idx - 1;
2065 		if (cur == NULL) {
2066 			*logflagsp = XFS_ILOG_FEXT(whichfork);
2067 			return 0;
2068 		}
2069 		*logflagsp = 0;
2070 		if ((error = xfs_bmbt_lookup_eq(cur, left.br_startoff,
2071 				left.br_startblock, left.br_blockcount, &i)))
2072 			return error;
2073 		ASSERT(i == 1);
2074 		error = xfs_bmbt_update(cur, left.br_startoff,
2075 				left.br_startblock,
2076 				left.br_blockcount + new->br_blockcount,
2077 				left.br_state);
2078 		return error;
2079 
2080 	case MASK(RIGHT_CONTIG):
2081 		/*
2082 		 * New allocation is contiguous with a real allocation
2083 		 * on the right.
2084 		 * Merge the new allocation with the right neighbor.
2085 		 */
2086 		xfs_bmap_trace_pre_update(fname, "RC", ip, idx, whichfork);
2087 		xfs_bmbt_set_allf(ep, new->br_startoff, new->br_startblock,
2088 			new->br_blockcount + right.br_blockcount,
2089 			right.br_state);
2090 		xfs_bmap_trace_post_update(fname, "RC", ip, idx, whichfork);
2091 		ifp->if_lastex = idx;
2092 		if (cur == NULL) {
2093 			*logflagsp = XFS_ILOG_FEXT(whichfork);
2094 			return 0;
2095 		}
2096 		*logflagsp = 0;
2097 		if ((error = xfs_bmbt_lookup_eq(cur, right.br_startoff,
2098 				right.br_startblock, right.br_blockcount, &i)))
2099 			return error;
2100 		ASSERT(i == 1);
2101 		error = xfs_bmbt_update(cur, new->br_startoff,
2102 				new->br_startblock,
2103 				new->br_blockcount + right.br_blockcount,
2104 				right.br_state);
2105 		return error;
2106 
2107 	case 0:
2108 		/*
2109 		 * New allocation is not contiguous with another
2110 		 * real allocation.
2111 		 * Insert a new entry.
2112 		 */
2113 		xfs_bmap_trace_insert(fname, "0", ip, idx, 1, new, NULL,
2114 			whichfork);
2115 		xfs_bmap_insert_exlist(ip, idx, 1, new, whichfork);
2116 		ifp->if_lastex = idx;
2117 		XFS_IFORK_NEXT_SET(ip, whichfork,
2118 			XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2119 		if (cur == NULL) {
2120 			*logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
2121 			return 0;
2122 		}
2123 		*logflagsp = XFS_ILOG_CORE;
2124 		if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2125 				new->br_startblock, new->br_blockcount, &i)))
2126 			return error;
2127 		ASSERT(i == 0);
2128 		cur->bc_rec.b.br_state = new->br_state;
2129 		if ((error = xfs_bmbt_insert(cur, &i)))
2130 			return error;
2131 		ASSERT(i == 1);
2132 		return 0;
2133 	}
2134 #undef	MASK
2135 #undef	MASK2
2136 #undef	STATE_SET
2137 #undef	STATE_TEST
2138 #undef	STATE_SET_TEST
2139 #undef	SWITCH_STATE
2140 	/* NOTREACHED */
2141 	ASSERT(0);
2142 	return 0; /* keep gcc quite */
2143 }
2144 
2145 #define XFS_ALLOC_GAP_UNITS	4
2146 
2147 /*
2148  * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
2149  * It figures out where to ask the underlying allocator to put the new extent.
2150  */
2151 STATIC int				/* error */
xfs_bmap_alloc(xfs_bmalloca_t * ap)2152 xfs_bmap_alloc(
2153 	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
2154 {
2155 	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
2156 	xfs_alloctype_t	atype=0;	/* type for allocation routines */
2157 	int		error;		/* error return value */
2158 	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
2159 	xfs_mount_t	*mp;		/* mount point structure */
2160 	int		nullfb;		/* true if ap->firstblock isn't set */
2161 	int		rt;		/* true if inode is realtime */
2162 #ifdef __KERNEL__
2163 	xfs_extlen_t	prod=0;		/* product factor for allocators */
2164 	xfs_extlen_t	ralen=0;	/* realtime allocation length */
2165 #endif
2166 
2167 #define	ISVALID(x,y)	\
2168 	(rt ? \
2169 		(x) < mp->m_sb.sb_rblocks : \
2170 		XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
2171 		XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
2172 		XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
2173 
2174 	/*
2175 	 * Set up variables.
2176 	 */
2177 	mp = ap->ip->i_mount;
2178 	nullfb = ap->firstblock == NULLFSBLOCK;
2179 	rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
2180 	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
2181 #ifdef __KERNEL__
2182 	if (rt) {
2183 		xfs_extlen_t	extsz;		/* file extent size for rt */
2184 		xfs_fileoff_t	nexto;		/* next file offset */
2185 		xfs_extlen_t	orig_alen;	/* original ap->alen */
2186 		xfs_fileoff_t	orig_end;	/* original off+len */
2187 		xfs_fileoff_t	orig_off;	/* original ap->off */
2188 		xfs_extlen_t	mod_off;	/* modulus calculations */
2189 		xfs_fileoff_t	prevo;		/* previous file offset */
2190 		xfs_rtblock_t	rtx;		/* realtime extent number */
2191 		xfs_extlen_t	temp;		/* temp for rt calculations */
2192 
2193 		/*
2194 		 * Set prod to match the realtime extent size.
2195 		 */
2196 		if (!(extsz = ap->ip->i_d.di_extsize))
2197 			extsz = mp->m_sb.sb_rextsize;
2198 		prod = extsz / mp->m_sb.sb_rextsize;
2199 		orig_off = ap->off;
2200 		orig_alen = ap->alen;
2201 		orig_end = orig_off + orig_alen;
2202 		/*
2203 		 * If the file offset is unaligned vs. the extent size
2204 		 * we need to align it.  This will be possible unless
2205 		 * the file was previously written with a kernel that didn't
2206 		 * perform this alignment.
2207 		 */
2208 		mod_off = do_mod(orig_off, extsz);
2209 		if (mod_off) {
2210 			ap->alen += mod_off;
2211 			ap->off -= mod_off;
2212 		}
2213 		/*
2214 		 * Same adjustment for the end of the requested area.
2215 		 */
2216 		if ((temp = (ap->alen % extsz)))
2217 			ap->alen += extsz - temp;
2218 		/*
2219 		 * If the previous block overlaps with this proposed allocation
2220 		 * then move the start forward without adjusting the length.
2221 		 */
2222 		prevo =
2223 			ap->prevp->br_startoff == NULLFILEOFF ?
2224 				0 :
2225 				(ap->prevp->br_startoff +
2226 				 ap->prevp->br_blockcount);
2227 		if (ap->off != orig_off && ap->off < prevo)
2228 			ap->off = prevo;
2229 		/*
2230 		 * If the next block overlaps with this proposed allocation
2231 		 * then move the start back without adjusting the length,
2232 		 * but not before offset 0.
2233 		 * This may of course make the start overlap previous block,
2234 		 * and if we hit the offset 0 limit then the next block
2235 		 * can still overlap too.
2236 		 */
2237 		nexto = (ap->eof || ap->gotp->br_startoff == NULLFILEOFF) ?
2238 			NULLFILEOFF : ap->gotp->br_startoff;
2239 		if (!ap->eof &&
2240 		    ap->off + ap->alen != orig_end &&
2241 		    ap->off + ap->alen > nexto)
2242 			ap->off = nexto > ap->alen ? nexto - ap->alen : 0;
2243 		/*
2244 		 * If we're now overlapping the next or previous extent that
2245 		 * means we can't fit an extsz piece in this hole.  Just move
2246 		 * the start forward to the first valid spot and set
2247 		 * the length so we hit the end.
2248 		 */
2249 		if ((ap->off != orig_off && ap->off < prevo) ||
2250 		    (ap->off + ap->alen != orig_end &&
2251 		     ap->off + ap->alen > nexto)) {
2252 			ap->off = prevo;
2253 			ap->alen = nexto - prevo;
2254 		}
2255 		/*
2256 		 * If the result isn't a multiple of rtextents we need to
2257 		 * remove blocks until it is.
2258 		 */
2259 		if ((temp = (ap->alen % mp->m_sb.sb_rextsize))) {
2260 			/*
2261 			 * We're not covering the original request, or
2262 			 * we won't be able to once we fix the length.
2263 			 */
2264 			if (orig_off < ap->off ||
2265 			    orig_end > ap->off + ap->alen ||
2266 			    ap->alen - temp < orig_alen)
2267 				return XFS_ERROR(EINVAL);
2268 			/*
2269 			 * Try to fix it by moving the start up.
2270 			 */
2271 			if (ap->off + temp <= orig_off) {
2272 				ap->alen -= temp;
2273 				ap->off += temp;
2274 			}
2275 			/*
2276 			 * Try to fix it by moving the end in.
2277 			 */
2278 			else if (ap->off + ap->alen - temp >= orig_end)
2279 				ap->alen -= temp;
2280 			/*
2281 			 * Set the start to the minimum then trim the length.
2282 			 */
2283 			else {
2284 				ap->alen -= orig_off - ap->off;
2285 				ap->off = orig_off;
2286 				ap->alen -= ap->alen % mp->m_sb.sb_rextsize;
2287 			}
2288 			/*
2289 			 * Result doesn't cover the request, fail it.
2290 			 */
2291 			if (orig_off < ap->off || orig_end > ap->off + ap->alen)
2292 				return XFS_ERROR(EINVAL);
2293 		}
2294 		ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
2295 		/*
2296 		 * If the offset & length are not perfectly aligned
2297 		 * then kill prod, it will just get us in trouble.
2298 		 */
2299 		if (do_mod(ap->off, extsz) || ap->alen % extsz)
2300 			prod = 1;
2301 		/*
2302 		 * Set ralen to be the actual requested length in rtextents.
2303 		 */
2304 		ralen = ap->alen / mp->m_sb.sb_rextsize;
2305 		/*
2306 		 * If the old value was close enough to MAXEXTLEN that
2307 		 * we rounded up to it, cut it back so it's valid again.
2308 		 * Note that if it's a really large request (bigger than
2309 		 * MAXEXTLEN), we don't hear about that number, and can't
2310 		 * adjust the starting point to match it.
2311 		 */
2312 		if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
2313 			ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
2314 		/*
2315 		 * If it's an allocation to an empty file at offset 0,
2316 		 * pick an extent that will space things out in the rt area.
2317 		 */
2318 		if (ap->eof && ap->off == 0) {
2319 			error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
2320 			if (error)
2321 				return error;
2322 			ap->rval = rtx * mp->m_sb.sb_rextsize;
2323 		} else
2324 			ap->rval = 0;
2325 	}
2326 #else
2327 	if (rt)
2328 		ap->rval = 0;
2329 #endif	/* __KERNEL__ */
2330 	else if (nullfb)
2331 		ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
2332 	else
2333 		ap->rval = ap->firstblock;
2334 	/*
2335 	 * If allocating at eof, and there's a previous real block,
2336 	 * try to use it's last block as our starting point.
2337 	 */
2338 	if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF &&
2339 	    !ISNULLSTARTBLOCK(ap->prevp->br_startblock) &&
2340 	    ISVALID(ap->prevp->br_startblock + ap->prevp->br_blockcount,
2341 		    ap->prevp->br_startblock)) {
2342 		ap->rval = ap->prevp->br_startblock + ap->prevp->br_blockcount;
2343 		/*
2344 		 * Adjust for the gap between prevp and us.
2345 		 */
2346 		adjust = ap->off -
2347 			(ap->prevp->br_startoff + ap->prevp->br_blockcount);
2348 		if (adjust &&
2349 		    ISVALID(ap->rval + adjust, ap->prevp->br_startblock))
2350 			ap->rval += adjust;
2351 	}
2352 	/*
2353 	 * If not at eof, then compare the two neighbor blocks.
2354 	 * Figure out whether either one gives us a good starting point,
2355 	 * and pick the better one.
2356 	 */
2357 	else if (!ap->eof) {
2358 		xfs_fsblock_t	gotbno;		/* right side block number */
2359 		xfs_fsblock_t	gotdiff=0;	/* right side difference */
2360 		xfs_fsblock_t	prevbno;	/* left side block number */
2361 		xfs_fsblock_t	prevdiff=0;	/* left side difference */
2362 
2363 		/*
2364 		 * If there's a previous (left) block, select a requested
2365 		 * start block based on it.
2366 		 */
2367 		if (ap->prevp->br_startoff != NULLFILEOFF &&
2368 		    !ISNULLSTARTBLOCK(ap->prevp->br_startblock) &&
2369 		    (prevbno = ap->prevp->br_startblock +
2370 			       ap->prevp->br_blockcount) &&
2371 		    ISVALID(prevbno, ap->prevp->br_startblock)) {
2372 			/*
2373 			 * Calculate gap to end of previous block.
2374 			 */
2375 			adjust = prevdiff = ap->off -
2376 				(ap->prevp->br_startoff +
2377 				 ap->prevp->br_blockcount);
2378 			/*
2379 			 * Figure the startblock based on the previous block's
2380 			 * end and the gap size.
2381 			 * Heuristic!
2382 			 * If the gap is large relative to the piece we're
2383 			 * allocating, or using it gives us an invalid block
2384 			 * number, then just use the end of the previous block.
2385 			 */
2386 			if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->alen &&
2387 			    ISVALID(prevbno + prevdiff,
2388 				    ap->prevp->br_startblock))
2389 				prevbno += adjust;
2390 			else
2391 				prevdiff += adjust;
2392 			/*
2393 			 * If the firstblock forbids it, can't use it,
2394 			 * must use default.
2395 			 */
2396 			if (!rt && !nullfb &&
2397 			    XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
2398 				prevbno = NULLFSBLOCK;
2399 		}
2400 		/*
2401 		 * No previous block or can't follow it, just default.
2402 		 */
2403 		else
2404 			prevbno = NULLFSBLOCK;
2405 		/*
2406 		 * If there's a following (right) block, select a requested
2407 		 * start block based on it.
2408 		 */
2409 		if (!ISNULLSTARTBLOCK(ap->gotp->br_startblock)) {
2410 			/*
2411 			 * Calculate gap to start of next block.
2412 			 */
2413 			adjust = gotdiff = ap->gotp->br_startoff - ap->off;
2414 			/*
2415 			 * Figure the startblock based on the next block's
2416 			 * start and the gap size.
2417 			 */
2418 			gotbno = ap->gotp->br_startblock;
2419 			/*
2420 			 * Heuristic!
2421 			 * If the gap is large relative to the piece we're
2422 			 * allocating, or using it gives us an invalid block
2423 			 * number, then just use the start of the next block
2424 			 * offset by our length.
2425 			 */
2426 			if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->alen &&
2427 			    ISVALID(gotbno - gotdiff, gotbno))
2428 				gotbno -= adjust;
2429 			else if (ISVALID(gotbno - ap->alen, gotbno)) {
2430 				gotbno -= ap->alen;
2431 				gotdiff += adjust - ap->alen;
2432 			} else
2433 				gotdiff += adjust;
2434 			/*
2435 			 * If the firstblock forbids it, can't use it,
2436 			 * must use default.
2437 			 */
2438 			if (!rt && !nullfb &&
2439 			    XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
2440 				gotbno = NULLFSBLOCK;
2441 		}
2442 		/*
2443 		 * No next block, just default.
2444 		 */
2445 		else
2446 			gotbno = NULLFSBLOCK;
2447 		/*
2448 		 * If both valid, pick the better one, else the only good
2449 		 * one, else ap->rval is already set (to 0 or the inode block).
2450 		 */
2451 		if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
2452 			ap->rval = prevdiff <= gotdiff ? prevbno : gotbno;
2453 		else if (prevbno != NULLFSBLOCK)
2454 			ap->rval = prevbno;
2455 		else if (gotbno != NULLFSBLOCK)
2456 			ap->rval = gotbno;
2457 	}
2458 	/*
2459 	 * If allowed, use ap->rval; otherwise must use firstblock since
2460 	 * it's in the right allocation group.
2461 	 */
2462 	if (nullfb || rt || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
2463 		;
2464 	else
2465 		ap->rval = ap->firstblock;
2466 	/*
2467 	 * Realtime allocation, done through xfs_rtallocate_extent.
2468 	 */
2469 	if (rt) {
2470 #ifndef __KERNEL__
2471 		ASSERT(0);
2472 #else
2473 		xfs_rtblock_t	rtb;
2474 
2475 		atype = ap->rval == 0 ?
2476 			XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
2477 		do_div(ap->rval, mp->m_sb.sb_rextsize);
2478 		rtb = ap->rval;
2479 		ap->alen = ralen;
2480 		if ((error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, ap->alen,
2481 				&ralen, atype, ap->wasdel, prod, &rtb)))
2482 			return error;
2483 		if (rtb == NULLFSBLOCK && prod > 1 &&
2484 		    (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1,
2485 						   ap->alen, &ralen, atype,
2486 						   ap->wasdel, 1, &rtb)))
2487 			return error;
2488 		ap->rval = rtb;
2489 		if (ap->rval != NULLFSBLOCK) {
2490 			ap->rval *= mp->m_sb.sb_rextsize;
2491 			ralen *= mp->m_sb.sb_rextsize;
2492 			ap->alen = ralen;
2493 			ap->ip->i_d.di_nblocks += ralen;
2494 			xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
2495 			if (ap->wasdel)
2496 				ap->ip->i_delayed_blks -= ralen;
2497 			/*
2498 			 * Adjust the disk quota also. This was reserved
2499 			 * earlier.
2500 			 */
2501 			XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
2502 				ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
2503 						XFS_TRANS_DQ_RTBCOUNT,
2504 				(long) ralen);
2505 		} else
2506 			ap->alen = 0;
2507 #endif	/* __KERNEL__ */
2508 	}
2509 	/*
2510 	 * Normal allocation, done through xfs_alloc_vextent.
2511 	 */
2512 	else {
2513 		xfs_agnumber_t	ag;
2514 		xfs_alloc_arg_t	args;
2515 		xfs_extlen_t	blen;
2516 		xfs_extlen_t	delta;
2517 		int		isaligned;
2518 		xfs_extlen_t	longest;
2519 		xfs_extlen_t	need;
2520 		xfs_extlen_t	nextminlen=0;
2521 		int		notinit;
2522 		xfs_perag_t	*pag;
2523 		xfs_agnumber_t	startag;
2524 		int		tryagain;
2525 
2526 		tryagain = isaligned = 0;
2527 		args.tp = ap->tp;
2528 		args.mp = mp;
2529 		args.fsbno = ap->rval;
2530 		args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
2531 		blen = 0;
2532 		if (nullfb) {
2533 			args.type = XFS_ALLOCTYPE_START_BNO;
2534 			args.total = ap->total;
2535 			/*
2536 			 * Find the longest available space.
2537 			 * We're going to try for the whole allocation at once.
2538 			 */
2539 			startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
2540 			notinit = 0;
2541 			down_read(&mp->m_peraglock);
2542 			while (blen < ap->alen) {
2543 				pag = &mp->m_perag[ag];
2544 				if (!pag->pagf_init &&
2545 				    (error = xfs_alloc_pagf_init(mp, args.tp,
2546 					    ag, XFS_ALLOC_FLAG_TRYLOCK))) {
2547 					up_read(&mp->m_peraglock);
2548 					return error;
2549 				}
2550 				/*
2551 				 * See xfs_alloc_fix_freelist...
2552 				 */
2553 				if (pag->pagf_init) {
2554 					need = XFS_MIN_FREELIST_PAG(pag, mp);
2555 					delta = need > pag->pagf_flcount ?
2556 						need - pag->pagf_flcount : 0;
2557 					longest = (pag->pagf_longest > delta) ?
2558 						(pag->pagf_longest - delta) :
2559 						(pag->pagf_flcount > 0 ||
2560 						 pag->pagf_longest > 0);
2561 					if (blen < longest)
2562 						blen = longest;
2563 				} else
2564 					notinit = 1;
2565 				if (++ag == mp->m_sb.sb_agcount)
2566 					ag = 0;
2567 				if (ag == startag)
2568 					break;
2569 			}
2570 			up_read(&mp->m_peraglock);
2571 			/*
2572 			 * Since the above loop did a BUF_TRYLOCK, it is
2573 			 * possible that there is space for this request.
2574 			 */
2575 			if (notinit || blen < ap->minlen)
2576 				args.minlen = ap->minlen;
2577 			/*
2578 			 * If the best seen length is less than the request
2579 			 * length, use the best as the minimum.
2580 			 */
2581 			else if (blen < ap->alen)
2582 				args.minlen = blen;
2583 			/*
2584 			 * Otherwise we've seen an extent as big as alen,
2585 			 * use that as the minimum.
2586 			 */
2587 			else
2588 				args.minlen = ap->alen;
2589 		} else if (ap->low) {
2590 			args.type = XFS_ALLOCTYPE_FIRST_AG;
2591 			args.total = args.minlen = ap->minlen;
2592 		} else {
2593 			args.type = XFS_ALLOCTYPE_NEAR_BNO;
2594 			args.total = ap->total;
2595 			args.minlen = ap->minlen;
2596 		}
2597 		if (ap->ip->i_d.di_extsize) {
2598 			args.prod = ap->ip->i_d.di_extsize;
2599 			if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
2600 				args.mod = (xfs_extlen_t)(args.prod - args.mod);
2601 		} else if (mp->m_sb.sb_blocksize >= NBPP) {
2602 			args.prod = 1;
2603 			args.mod = 0;
2604 		} else {
2605 			args.prod = NBPP >> mp->m_sb.sb_blocklog;
2606 			if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod))))
2607 				args.mod = (xfs_extlen_t)(args.prod - args.mod);
2608 		}
2609 		/*
2610 		 * If we are not low on available data blocks, and the
2611 		 * underlying logical volume manager is a stripe, and
2612 		 * the file offset is zero then try to allocate data
2613 		 * blocks on stripe unit boundary.
2614 		 * NOTE: ap->aeof is only set if the allocation length
2615 		 * is >= the stripe unit and the allocation offset is
2616 		 * at the end of file.
2617 		 */
2618 		if (!ap->low && ap->aeof) {
2619 			if (!ap->off) {
2620 				args.alignment = mp->m_dalign;
2621 				atype = args.type;
2622 				isaligned = 1;
2623 				/*
2624 				 * Adjust for alignment
2625 				 */
2626 				if (blen > args.alignment && blen <= ap->alen)
2627 					args.minlen = blen - args.alignment;
2628 				args.minalignslop = 0;
2629 			} else {
2630 				/*
2631 				 * First try an exact bno allocation.
2632 				 * If it fails then do a near or start bno
2633 				 * allocation with alignment turned on.
2634 				 */
2635 				atype = args.type;
2636 				tryagain = 1;
2637 				args.type = XFS_ALLOCTYPE_THIS_BNO;
2638 				args.alignment = 1;
2639 				/*
2640 				 * Compute the minlen+alignment for the
2641 				 * next case.  Set slop so that the value
2642 				 * of minlen+alignment+slop doesn't go up
2643 				 * between the calls.
2644 				 */
2645 				if (blen > mp->m_dalign && blen <= ap->alen)
2646 					nextminlen = blen - mp->m_dalign;
2647 				else
2648 					nextminlen = args.minlen;
2649 				if (nextminlen + mp->m_dalign > args.minlen + 1)
2650 					args.minalignslop =
2651 						nextminlen + mp->m_dalign -
2652 						args.minlen - 1;
2653 				else
2654 					args.minalignslop = 0;
2655 			}
2656 		} else {
2657 			args.alignment = 1;
2658 			args.minalignslop = 0;
2659 		}
2660 		args.minleft = ap->minleft;
2661 		args.wasdel = ap->wasdel;
2662 		args.isfl = 0;
2663 		args.userdata = ap->userdata;
2664 		if ((error = xfs_alloc_vextent(&args)))
2665 			return error;
2666 		if (tryagain && args.fsbno == NULLFSBLOCK) {
2667 			/*
2668 			 * Exact allocation failed. Now try with alignment
2669 			 * turned on.
2670 			 */
2671 			args.type = atype;
2672 			args.fsbno = ap->rval;
2673 			args.alignment = mp->m_dalign;
2674 			args.minlen = nextminlen;
2675 			args.minalignslop = 0;
2676 			isaligned = 1;
2677 			if ((error = xfs_alloc_vextent(&args)))
2678 				return error;
2679 		}
2680 		if (isaligned && args.fsbno == NULLFSBLOCK) {
2681 			/*
2682 			 * allocation failed, so turn off alignment and
2683 			 * try again.
2684 			 */
2685 			args.type = atype;
2686 			args.fsbno = ap->rval;
2687 			args.alignment = 0;
2688 			if ((error = xfs_alloc_vextent(&args)))
2689 				return error;
2690 		}
2691 		if (args.fsbno == NULLFSBLOCK && nullfb &&
2692 		    args.minlen > ap->minlen) {
2693 			args.minlen = ap->minlen;
2694 			args.type = XFS_ALLOCTYPE_START_BNO;
2695 			args.fsbno = ap->rval;
2696 			if ((error = xfs_alloc_vextent(&args)))
2697 				return error;
2698 		}
2699 		if (args.fsbno == NULLFSBLOCK && nullfb) {
2700 			args.fsbno = 0;
2701 			args.type = XFS_ALLOCTYPE_FIRST_AG;
2702 			args.total = ap->minlen;
2703 			args.minleft = 0;
2704 			if ((error = xfs_alloc_vextent(&args)))
2705 				return error;
2706 			ap->low = 1;
2707 		}
2708 		if (args.fsbno != NULLFSBLOCK) {
2709 			ap->firstblock = ap->rval = args.fsbno;
2710 			ASSERT(nullfb || fb_agno == args.agno ||
2711 			       (ap->low && fb_agno < args.agno));
2712 			ap->alen = args.len;
2713 			ap->ip->i_d.di_nblocks += args.len;
2714 			xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
2715 			if (ap->wasdel)
2716 				ap->ip->i_delayed_blks -= args.len;
2717 			/*
2718 			 * Adjust the disk quota also. This was reserved
2719 			 * earlier.
2720 			 */
2721 			XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
2722 				ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
2723 						XFS_TRANS_DQ_BCOUNT,
2724 				(long) args.len);
2725 		} else {
2726 			ap->rval = NULLFSBLOCK;
2727 			ap->alen = 0;
2728 		}
2729 	}
2730 	return 0;
2731 #undef	ISVALID
2732 }
2733 
2734 /*
2735  * Transform a btree format file with only one leaf node, where the
2736  * extents list will fit in the inode, into an extents format file.
2737  * Since the extent list is already in-core, all we have to do is
2738  * give up the space for the btree root and pitch the leaf block.
2739  */
2740 STATIC int				/* error */
xfs_bmap_btree_to_extents(xfs_trans_t * tp,xfs_inode_t * ip,xfs_btree_cur_t * cur,int * logflagsp,int whichfork)2741 xfs_bmap_btree_to_extents(
2742 	xfs_trans_t		*tp,	/* transaction pointer */
2743 	xfs_inode_t		*ip,	/* incore inode pointer */
2744 	xfs_btree_cur_t		*cur,	/* btree cursor */
2745 	int			*logflagsp, /* inode logging flags */
2746 	int			whichfork)  /* data or attr fork */
2747 {
2748 	/* REFERENCED */
2749 	xfs_bmbt_block_t	*cblock;/* child btree block */
2750 	xfs_fsblock_t		cbno;	/* child block number */
2751 	xfs_buf_t		*cbp;	/* child block's buffer */
2752 	int			error;	/* error return value */
2753 	xfs_ifork_t		*ifp;	/* inode fork data */
2754 	xfs_mount_t		*mp;	/* mount point structure */
2755 	xfs_bmbt_ptr_t		*pp;	/* ptr to block address */
2756 	xfs_bmbt_block_t	*rblock;/* root btree block */
2757 
2758 	ifp = XFS_IFORK_PTR(ip, whichfork);
2759 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
2760 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
2761 	rblock = ifp->if_broot;
2762 	ASSERT(INT_GET(rblock->bb_level, ARCH_CONVERT) == 1);
2763 	ASSERT(INT_GET(rblock->bb_numrecs, ARCH_CONVERT) == 1);
2764 	ASSERT(XFS_BMAP_BROOT_MAXRECS(ifp->if_broot_bytes) == 1);
2765 	mp = ip->i_mount;
2766 	pp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, ifp->if_broot_bytes);
2767 	*logflagsp = 0;
2768 #ifdef DEBUG
2769 	if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), 1)))
2770 		return error;
2771 #endif
2772 	cbno = INT_GET(*pp, ARCH_CONVERT);
2773 	if ((error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp,
2774 			XFS_BMAP_BTREE_REF)))
2775 		return error;
2776 	cblock = XFS_BUF_TO_BMBT_BLOCK(cbp);
2777 	if ((error = xfs_btree_check_lblock(cur, cblock, 0, cbp)))
2778 		return error;
2779 	xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
2780 	ip->i_d.di_nblocks--;
2781 	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
2782 	xfs_trans_binval(tp, cbp);
2783 	if (cur->bc_bufs[0] == cbp)
2784 		cur->bc_bufs[0] = NULL;
2785 	xfs_iroot_realloc(ip, -1, whichfork);
2786 	ASSERT(ifp->if_broot == NULL);
2787 	ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
2788 	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
2789 	*logflagsp = XFS_ILOG_CORE | XFS_ILOG_FEXT(whichfork);
2790 	return 0;
2791 }
2792 
2793 /*
2794  * Called by xfs_bmapi to update extent list structure and the btree
2795  * after removing space (or undoing a delayed allocation).
2796  */
2797 STATIC int				/* error */
xfs_bmap_del_extent(xfs_inode_t * ip,xfs_trans_t * tp,xfs_extnum_t idx,xfs_bmap_free_t * flist,xfs_btree_cur_t * cur,xfs_bmbt_irec_t * del,int * logflagsp,int whichfork,int rsvd)2798 xfs_bmap_del_extent(
2799 	xfs_inode_t		*ip,	/* incore inode pointer */
2800 	xfs_trans_t		*tp,	/* current transaction pointer */
2801 	xfs_extnum_t		idx,	/* extent number to update/delete */
2802 	xfs_bmap_free_t		*flist,	/* list of extents to be freed */
2803 	xfs_btree_cur_t		*cur,	/* if null, not a btree */
2804 	xfs_bmbt_irec_t		*del,	/* data to remove from extent list */
2805 	int			*logflagsp, /* inode logging flags */
2806 	int			whichfork, /* data or attr fork */
2807 	int			rsvd)	/* OK to allocate reserved blocks */
2808 {
2809 	xfs_filblks_t		da_new;	/* new delay-alloc indirect blocks */
2810 	xfs_filblks_t		da_old;	/* old delay-alloc indirect blocks */
2811 	xfs_fsblock_t		del_endblock=0;	/* first block past del */
2812 	xfs_fileoff_t		del_endoff;	/* first offset past del */
2813 	int			delay;	/* current block is delayed allocated */
2814 	int			do_fx;	/* free extent at end of routine */
2815 	xfs_bmbt_rec_t		*ep;	/* current extent entry pointer */
2816 	int			error;	/* error return value */
2817 	int			flags;	/* inode logging flags */
2818 #ifdef XFS_BMAP_TRACE
2819 	static char		fname[] = "xfs_bmap_del_extent";
2820 #endif
2821 	xfs_bmbt_irec_t		got;	/* current extent entry */
2822 	xfs_fileoff_t		got_endoff;	/* first offset past got */
2823 	int			i;	/* temp state */
2824 	xfs_ifork_t		*ifp;	/* inode fork pointer */
2825 	xfs_mount_t		*mp;	/* mount structure */
2826 	xfs_filblks_t		nblks;	/* quota/sb block count */
2827 	xfs_bmbt_irec_t		new;	/* new record to be inserted */
2828 	/* REFERENCED */
2829 	xfs_extnum_t		nextents;	/* number of extents in list */
2830 	uint			qfield;	/* quota field to update */
2831 	xfs_filblks_t		temp;	/* for indirect length calculations */
2832 	xfs_filblks_t		temp2;	/* for indirect length calculations */
2833 
2834 	XFS_STATS_INC(xs_del_exlist);
2835 	mp = ip->i_mount;
2836 	ifp = XFS_IFORK_PTR(ip, whichfork);
2837 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
2838 	ASSERT(idx >= 0 && idx < nextents);
2839 	ASSERT(del->br_blockcount > 0);
2840 	ep = &ifp->if_u1.if_extents[idx];
2841 	xfs_bmbt_get_all(ep, &got);
2842 	ASSERT(got.br_startoff <= del->br_startoff);
2843 	del_endoff = del->br_startoff + del->br_blockcount;
2844 	got_endoff = got.br_startoff + got.br_blockcount;
2845 	ASSERT(got_endoff >= del_endoff);
2846 	delay = ISNULLSTARTBLOCK(got.br_startblock);
2847 	ASSERT(ISNULLSTARTBLOCK(del->br_startblock) == delay);
2848 	flags = 0;
2849 	qfield = 0;
2850 	error = 0;
2851 	/*
2852 	 * If deleting a real allocation, must free up the disk space.
2853 	 */
2854 	if (!delay) {
2855 		flags = XFS_ILOG_CORE;
2856 		/*
2857 		 * Realtime allocation.  Free it and record di_nblocks update.
2858 		 */
2859 		if (whichfork == XFS_DATA_FORK &&
2860 		    (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
2861 			xfs_fsblock_t	bno;
2862 			xfs_filblks_t	len;
2863 
2864 			ASSERT(do_mod(del->br_blockcount,
2865 				      mp->m_sb.sb_rextsize) == 0);
2866 			ASSERT(do_mod(del->br_startblock,
2867 				      mp->m_sb.sb_rextsize) == 0);
2868 			bno = del->br_startblock;
2869 			len = del->br_blockcount;
2870 			do_div(bno, mp->m_sb.sb_rextsize);
2871 			do_div(len, mp->m_sb.sb_rextsize);
2872 			if ((error = xfs_rtfree_extent(ip->i_transp, bno,
2873 					(xfs_extlen_t)len)))
2874 				goto done;
2875 			do_fx = 0;
2876 			nblks = len * mp->m_sb.sb_rextsize;
2877 			qfield = XFS_TRANS_DQ_RTBCOUNT;
2878 		}
2879 		/*
2880 		 * Ordinary allocation.
2881 		 */
2882 		else {
2883 			do_fx = 1;
2884 			nblks = del->br_blockcount;
2885 			qfield = XFS_TRANS_DQ_BCOUNT;
2886 		}
2887 		/*
2888 		 * Set up del_endblock and cur for later.
2889 		 */
2890 		del_endblock = del->br_startblock + del->br_blockcount;
2891 		if (cur) {
2892 			if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
2893 					got.br_startblock, got.br_blockcount,
2894 					&i)))
2895 				goto done;
2896 			ASSERT(i == 1);
2897 		}
2898 		da_old = da_new = 0;
2899 	} else {
2900 		da_old = STARTBLOCKVAL(got.br_startblock);
2901 		da_new = 0;
2902 		nblks = 0;
2903 		do_fx = 0;
2904 	}
2905 	/*
2906 	 * Set flag value to use in switch statement.
2907 	 * Left-contig is 2, right-contig is 1.
2908 	 */
2909 	switch (((got.br_startoff == del->br_startoff) << 1) |
2910 		(got_endoff == del_endoff)) {
2911 	case 3:
2912 		/*
2913 		 * Matches the whole extent.  Delete the entry.
2914 		 */
2915 		xfs_bmap_trace_delete(fname, "3", ip, idx, 1, whichfork);
2916 		xfs_bmap_delete_exlist(ip, idx, 1, whichfork);
2917 		ifp->if_lastex = idx;
2918 		if (delay)
2919 			break;
2920 		XFS_IFORK_NEXT_SET(ip, whichfork,
2921 			XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2922 		flags |= XFS_ILOG_CORE;
2923 		if (!cur) {
2924 			flags |= XFS_ILOG_FEXT(whichfork);
2925 			break;
2926 		}
2927 		if ((error = xfs_bmbt_delete(cur, &i)))
2928 			goto done;
2929 		ASSERT(i == 1);
2930 		break;
2931 
2932 	case 2:
2933 		/*
2934 		 * Deleting the first part of the extent.
2935 		 */
2936 		xfs_bmap_trace_pre_update(fname, "2", ip, idx, whichfork);
2937 		xfs_bmbt_set_startoff(ep, del_endoff);
2938 		temp = got.br_blockcount - del->br_blockcount;
2939 		xfs_bmbt_set_blockcount(ep, temp);
2940 		ifp->if_lastex = idx;
2941 		if (delay) {
2942 			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2943 				da_old);
2944 			xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
2945 			xfs_bmap_trace_post_update(fname, "2", ip, idx,
2946 				whichfork);
2947 			da_new = temp;
2948 			break;
2949 		}
2950 		xfs_bmbt_set_startblock(ep, del_endblock);
2951 		xfs_bmap_trace_post_update(fname, "2", ip, idx, whichfork);
2952 		if (!cur) {
2953 			flags |= XFS_ILOG_FEXT(whichfork);
2954 			break;
2955 		}
2956 		if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
2957 				got.br_blockcount - del->br_blockcount,
2958 				got.br_state)))
2959 			goto done;
2960 		break;
2961 
2962 	case 1:
2963 		/*
2964 		 * Deleting the last part of the extent.
2965 		 */
2966 		temp = got.br_blockcount - del->br_blockcount;
2967 		xfs_bmap_trace_pre_update(fname, "1", ip, idx, whichfork);
2968 		xfs_bmbt_set_blockcount(ep, temp);
2969 		ifp->if_lastex = idx;
2970 		if (delay) {
2971 			temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
2972 				da_old);
2973 			xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
2974 			xfs_bmap_trace_post_update(fname, "1", ip, idx,
2975 				whichfork);
2976 			da_new = temp;
2977 			break;
2978 		}
2979 		xfs_bmap_trace_post_update(fname, "1", ip, idx, whichfork);
2980 		if (!cur) {
2981 			flags |= XFS_ILOG_FEXT(whichfork);
2982 			break;
2983 		}
2984 		if ((error = xfs_bmbt_update(cur, got.br_startoff,
2985 				got.br_startblock,
2986 				got.br_blockcount - del->br_blockcount,
2987 				got.br_state)))
2988 			goto done;
2989 		break;
2990 
2991 	case 0:
2992 		/*
2993 		 * Deleting the middle of the extent.
2994 		 */
2995 		temp = del->br_startoff - got.br_startoff;
2996 		xfs_bmap_trace_pre_update(fname, "0", ip, idx, whichfork);
2997 		xfs_bmbt_set_blockcount(ep, temp);
2998 		new.br_startoff = del_endoff;
2999 		temp2 = got_endoff - del_endoff;
3000 		new.br_blockcount = temp2;
3001 		new.br_state = got.br_state;
3002 		if (!delay) {
3003 			new.br_startblock = del_endblock;
3004 			flags |= XFS_ILOG_CORE;
3005 			if (cur) {
3006 				if ((error = xfs_bmbt_update(cur,
3007 						got.br_startoff,
3008 						got.br_startblock, temp,
3009 						got.br_state)))
3010 					goto done;
3011 				if ((error = xfs_bmbt_increment(cur, 0, &i)))
3012 					goto done;
3013 				cur->bc_rec.b = new;
3014 				error = xfs_bmbt_insert(cur, &i);
3015 				if (error && error != ENOSPC)
3016 					goto done;
3017 				/*
3018 				 * If get no-space back from btree insert,
3019 				 * it tried a split, and we have a zero
3020 				 * block reservation.
3021 				 * Fix up our state and return the error.
3022 				 */
3023 				if (error == ENOSPC) {
3024 					/*
3025 					 * Reset the cursor, don't trust
3026 					 * it after any insert operation.
3027 					 */
3028 					if ((error = xfs_bmbt_lookup_eq(cur,
3029 							got.br_startoff,
3030 							got.br_startblock,
3031 							temp, &i)))
3032 						goto done;
3033 					ASSERT(i == 1);
3034 					/*
3035 					 * Update the btree record back
3036 					 * to the original value.
3037 					 */
3038 					if ((error = xfs_bmbt_update(cur,
3039 							got.br_startoff,
3040 							got.br_startblock,
3041 							got.br_blockcount,
3042 							got.br_state)))
3043 						goto done;
3044 					/*
3045 					 * Reset the extent record back
3046 					 * to the original value.
3047 					 */
3048 					xfs_bmbt_set_blockcount(ep,
3049 						got.br_blockcount);
3050 					flags = 0;
3051 					error = XFS_ERROR(ENOSPC);
3052 					goto done;
3053 				}
3054 				ASSERT(i == 1);
3055 			} else
3056 				flags |= XFS_ILOG_FEXT(whichfork);
3057 			XFS_IFORK_NEXT_SET(ip, whichfork,
3058 				XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
3059 		} else {
3060 			ASSERT(whichfork == XFS_DATA_FORK);
3061 			temp = xfs_bmap_worst_indlen(ip, temp);
3062 			xfs_bmbt_set_startblock(ep, NULLSTARTBLOCK((int)temp));
3063 			temp2 = xfs_bmap_worst_indlen(ip, temp2);
3064 			new.br_startblock = NULLSTARTBLOCK((int)temp2);
3065 			da_new = temp + temp2;
3066 			while (da_new > da_old) {
3067 				if (temp) {
3068 					temp--;
3069 					da_new--;
3070 					xfs_bmbt_set_startblock(ep,
3071 						NULLSTARTBLOCK((int)temp));
3072 				}
3073 				if (da_new == da_old)
3074 					break;
3075 				if (temp2) {
3076 					temp2--;
3077 					da_new--;
3078 					new.br_startblock =
3079 						NULLSTARTBLOCK((int)temp2);
3080 				}
3081 			}
3082 		}
3083 		xfs_bmap_trace_post_update(fname, "0", ip, idx, whichfork);
3084 		xfs_bmap_trace_insert(fname, "0", ip, idx + 1, 1, &new, NULL,
3085 			whichfork);
3086 		xfs_bmap_insert_exlist(ip, idx + 1, 1, &new, whichfork);
3087 		ifp->if_lastex = idx + 1;
3088 		break;
3089 	}
3090 	/*
3091 	 * If we need to, add to list of extents to delete.
3092 	 */
3093 	if (do_fx)
3094 		xfs_bmap_add_free(del->br_startblock, del->br_blockcount, flist,
3095 			mp);
3096 	/*
3097 	 * Adjust inode # blocks in the file.
3098 	 */
3099 	if (nblks)
3100 		ip->i_d.di_nblocks -= nblks;
3101 	/*
3102 	 * Adjust quota data.
3103 	 */
3104 	if (qfield)
3105 		XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, qfield, (long)-nblks);
3106 
3107 	/*
3108 	 * Account for change in delayed indirect blocks.
3109 	 * Nothing to do for disk quota accounting here.
3110 	 */
3111 	ASSERT(da_old >= da_new);
3112 	if (da_old > da_new)
3113 		xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int)(da_old - da_new),
3114 			rsvd);
3115 done:
3116 	*logflagsp = flags;
3117 	return error;
3118 }
3119 
3120 /*
3121  * Remove the entry "free" from the free item list.  Prev points to the
3122  * previous entry, unless "free" is the head of the list.
3123  */
3124 STATIC void
xfs_bmap_del_free(xfs_bmap_free_t * flist,xfs_bmap_free_item_t * prev,xfs_bmap_free_item_t * free)3125 xfs_bmap_del_free(
3126 	xfs_bmap_free_t		*flist,	/* free item list header */
3127 	xfs_bmap_free_item_t	*prev,	/* previous item on list, if any */
3128 	xfs_bmap_free_item_t	*free)	/* list item to be freed */
3129 {
3130 	if (prev)
3131 		prev->xbfi_next = free->xbfi_next;
3132 	else
3133 		flist->xbf_first = free->xbfi_next;
3134 	flist->xbf_count--;
3135 	kmem_zone_free(xfs_bmap_free_item_zone, free);
3136 }
3137 
3138 /*
3139  * Remove count entries from the extents array for inode "ip", starting
3140  * at index "idx".  Copies the remaining items down over the deleted ones,
3141  * and gives back the excess memory.
3142  */
3143 STATIC void
xfs_bmap_delete_exlist(xfs_inode_t * ip,xfs_extnum_t idx,xfs_extnum_t count,int whichfork)3144 xfs_bmap_delete_exlist(
3145 	xfs_inode_t	*ip,		/* incore inode pointer */
3146 	xfs_extnum_t	idx,		/* starting delete index */
3147 	xfs_extnum_t	count,		/* count of items to delete */
3148 	int		whichfork)	/* data or attr fork */
3149 {
3150 	xfs_bmbt_rec_t	*base;		/* base of extent list */
3151 	xfs_ifork_t	*ifp;		/* inode fork pointer */
3152 	xfs_extnum_t	nextents;	/* number of extents in list after */
3153 
3154 	ifp = XFS_IFORK_PTR(ip, whichfork);
3155 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
3156 	base = ifp->if_u1.if_extents;
3157 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - count;
3158 	memmove(&base[idx], &base[idx + count],
3159 		(nextents - idx) * sizeof(*base));
3160 	xfs_iext_realloc(ip, -count, whichfork);
3161 }
3162 
3163 /*
3164  * Convert an extents-format file into a btree-format file.
3165  * The new file will have a root block (in the inode) and a single child block.
3166  */
3167 STATIC int					/* error */
xfs_bmap_extents_to_btree(xfs_trans_t * tp,xfs_inode_t * ip,xfs_fsblock_t * firstblock,xfs_bmap_free_t * flist,xfs_btree_cur_t ** curp,int wasdel,int * logflagsp,int whichfork)3168 xfs_bmap_extents_to_btree(
3169 	xfs_trans_t		*tp,		/* transaction pointer */
3170 	xfs_inode_t		*ip,		/* incore inode pointer */
3171 	xfs_fsblock_t		*firstblock,	/* first-block-allocated */
3172 	xfs_bmap_free_t		*flist,		/* blocks freed in xaction */
3173 	xfs_btree_cur_t		**curp,		/* cursor returned to caller */
3174 	int			wasdel,		/* converting a delayed alloc */
3175 	int			*logflagsp,	/* inode logging flags */
3176 	int			whichfork)	/* data or attr fork */
3177 {
3178 	xfs_bmbt_block_t	*ablock;	/* allocated (child) bt block */
3179 	xfs_buf_t		*abp;		/* buffer for ablock */
3180 	xfs_alloc_arg_t		args;		/* allocation arguments */
3181 	xfs_bmbt_rec_t		*arp;		/* child record pointer */
3182 	xfs_bmbt_block_t	*block;		/* btree root block */
3183 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
3184 	xfs_bmbt_rec_t		*ep;		/* extent list pointer */
3185 	int			error;		/* error return value */
3186 	xfs_extnum_t		i, cnt;		/* extent list index */
3187 	xfs_ifork_t		*ifp;		/* inode fork pointer */
3188 	xfs_bmbt_key_t		*kp;		/* root block key pointer */
3189 	xfs_mount_t		*mp;		/* mount structure */
3190 	xfs_extnum_t		nextents;	/* extent list size */
3191 	xfs_bmbt_ptr_t		*pp;		/* root block address pointer */
3192 
3193 	ifp = XFS_IFORK_PTR(ip, whichfork);
3194 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
3195 	ASSERT(ifp->if_ext_max ==
3196 	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
3197 	/*
3198 	 * Make space in the inode incore.
3199 	 */
3200 	xfs_iroot_realloc(ip, 1, whichfork);
3201 	ifp->if_flags |= XFS_IFBROOT;
3202 	/*
3203 	 * Fill in the root.
3204 	 */
3205 	block = ifp->if_broot;
3206 	INT_SET(block->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
3207 	INT_SET(block->bb_level, ARCH_CONVERT, 1);
3208 	INT_SET(block->bb_numrecs, ARCH_CONVERT, 1);
3209 	INT_SET(block->bb_leftsib, ARCH_CONVERT, NULLDFSBNO);
3210 	INT_SET(block->bb_rightsib, ARCH_CONVERT, NULLDFSBNO);
3211 	/*
3212 	 * Need a cursor.  Can't allocate until bb_level is filled in.
3213 	 */
3214 	mp = ip->i_mount;
3215 	cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip,
3216 		whichfork);
3217 	cur->bc_private.b.firstblock = *firstblock;
3218 	cur->bc_private.b.flist = flist;
3219 	cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
3220 	/*
3221 	 * Convert to a btree with two levels, one record in root.
3222 	 */
3223 	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
3224 	args.tp = tp;
3225 	args.mp = mp;
3226 	if (*firstblock == NULLFSBLOCK) {
3227 		args.type = XFS_ALLOCTYPE_START_BNO;
3228 		args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
3229 	} else if (flist->xbf_low) {
3230 		args.type = XFS_ALLOCTYPE_START_BNO;
3231 		args.fsbno = *firstblock;
3232 	} else {
3233 		args.type = XFS_ALLOCTYPE_NEAR_BNO;
3234 		args.fsbno = *firstblock;
3235 	}
3236 	args.minlen = args.maxlen = args.prod = 1;
3237 	args.total = args.minleft = args.alignment = args.mod = args.isfl =
3238 		args.minalignslop = 0;
3239 	args.wasdel = wasdel;
3240 	*logflagsp = 0;
3241 	if ((error = xfs_alloc_vextent(&args))) {
3242 		xfs_iroot_realloc(ip, -1, whichfork);
3243 		xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
3244 		return error;
3245 	}
3246 	/*
3247 	 * Allocation can't fail, the space was reserved.
3248 	 */
3249 	ASSERT(args.fsbno != NULLFSBLOCK);
3250 	ASSERT(*firstblock == NULLFSBLOCK ||
3251 	       args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
3252 	       (flist->xbf_low &&
3253 		args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
3254 	*firstblock = cur->bc_private.b.firstblock = args.fsbno;
3255 	cur->bc_private.b.allocated++;
3256 	ip->i_d.di_nblocks++;
3257 	XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
3258 	abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
3259 	/*
3260 	 * Fill in the child block.
3261 	 */
3262 	ablock = XFS_BUF_TO_BMBT_BLOCK(abp);
3263 	INT_SET(ablock->bb_magic, ARCH_CONVERT, XFS_BMAP_MAGIC);
3264 	INT_ZERO(ablock->bb_level, ARCH_CONVERT);
3265 	INT_SET(ablock->bb_leftsib, ARCH_CONVERT, NULLDFSBNO);
3266 	INT_SET(ablock->bb_rightsib, ARCH_CONVERT, NULLDFSBNO);
3267 	arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
3268 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3269 	for (ep = ifp->if_u1.if_extents, cnt = i = 0; i < nextents; i++, ep++) {
3270 		if (!ISNULLSTARTBLOCK(xfs_bmbt_get_startblock(ep))) {
3271 			arp->l0 = INT_GET(ep->l0, ARCH_CONVERT);
3272 			arp->l1 = INT_GET(ep->l1, ARCH_CONVERT);
3273 			arp++; cnt++;
3274 		}
3275 	}
3276 	INT_SET(ablock->bb_numrecs, ARCH_CONVERT, cnt);
3277 	ASSERT(INT_GET(ablock->bb_numrecs, ARCH_CONVERT) == XFS_IFORK_NEXTENTS(ip, whichfork));
3278 	/*
3279 	 * Fill in the root key and pointer.
3280 	 */
3281 	kp = XFS_BMAP_KEY_IADDR(block, 1, cur);
3282 	arp = XFS_BMAP_REC_IADDR(ablock, 1, cur);
3283 	INT_SET(kp->br_startoff, ARCH_CONVERT, xfs_bmbt_disk_get_startoff(arp));
3284 	pp = XFS_BMAP_PTR_IADDR(block, 1, cur);
3285 	INT_SET(*pp, ARCH_CONVERT, args.fsbno);
3286 	/*
3287 	 * Do all this logging at the end so that
3288 	 * the root is at the right level.
3289 	 */
3290 	xfs_bmbt_log_block(cur, abp, XFS_BB_ALL_BITS);
3291 	xfs_bmbt_log_recs(cur, abp, 1, INT_GET(ablock->bb_numrecs, ARCH_CONVERT));
3292 	ASSERT(*curp == NULL);
3293 	*curp = cur;
3294 	*logflagsp = XFS_ILOG_CORE | XFS_ILOG_FBROOT(whichfork);
3295 	return 0;
3296 }
3297 
3298 /*
3299  * Insert new item(s) in the extent list for inode "ip".
3300  * Count new items are inserted at offset idx.
3301  */
3302 STATIC void
xfs_bmap_insert_exlist(xfs_inode_t * ip,xfs_extnum_t idx,xfs_extnum_t count,xfs_bmbt_irec_t * new,int whichfork)3303 xfs_bmap_insert_exlist(
3304 	xfs_inode_t	*ip,		/* incore inode pointer */
3305 	xfs_extnum_t	idx,		/* starting index of new items */
3306 	xfs_extnum_t	count,		/* number of inserted items */
3307 	xfs_bmbt_irec_t	*new,		/* items to insert */
3308 	int		whichfork)	/* data or attr fork */
3309 {
3310 	xfs_bmbt_rec_t	*base;		/* extent list base */
3311 	xfs_ifork_t	*ifp;		/* inode fork pointer */
3312 	xfs_extnum_t	nextents;	/* extent list size */
3313 	xfs_extnum_t	to;		/* extent list index */
3314 
3315 	ifp = XFS_IFORK_PTR(ip, whichfork);
3316 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
3317 	xfs_iext_realloc(ip, count, whichfork);
3318 	base = ifp->if_u1.if_extents;
3319 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3320 	memmove(&base[idx + count], &base[idx],
3321 		(nextents - (idx + count)) * sizeof(*base));
3322 	for (to = idx; to < idx + count; to++, new++)
3323 		xfs_bmbt_set_all(&base[to], new);
3324 }
3325 
3326 /*
3327  * Convert a local file to an extents file.
3328  * This code is out of bounds for data forks of regular files,
3329  * since the file data needs to get logged so things will stay consistent.
3330  * (The bmap-level manipulations are ok, though).
3331  */
3332 STATIC int				/* error */
xfs_bmap_local_to_extents(xfs_trans_t * tp,xfs_inode_t * ip,xfs_fsblock_t * firstblock,xfs_extlen_t total,int * logflagsp,int whichfork)3333 xfs_bmap_local_to_extents(
3334 	xfs_trans_t	*tp,		/* transaction pointer */
3335 	xfs_inode_t	*ip,		/* incore inode pointer */
3336 	xfs_fsblock_t	*firstblock,	/* first block allocated in xaction */
3337 	xfs_extlen_t	total,		/* total blocks needed by transaction */
3338 	int		*logflagsp,	/* inode logging flags */
3339 	int		whichfork)	/* data or attr fork */
3340 {
3341 	int		error;		/* error return value */
3342 	int		flags;		/* logging flags returned */
3343 #ifdef XFS_BMAP_TRACE
3344 	static char	fname[] = "xfs_bmap_local_to_extents";
3345 #endif
3346 	xfs_ifork_t	*ifp;		/* inode fork pointer */
3347 
3348 	/*
3349 	 * We don't want to deal with the case of keeping inode data inline yet.
3350 	 * So sending the data fork of a regular inode is invalid.
3351 	 */
3352 	ASSERT(!((ip->i_d.di_mode & S_IFMT) == S_IFREG &&
3353 		 whichfork == XFS_DATA_FORK));
3354 	ifp = XFS_IFORK_PTR(ip, whichfork);
3355 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
3356 	flags = 0;
3357 	error = 0;
3358 	if (ifp->if_bytes) {
3359 		xfs_alloc_arg_t	args;	/* allocation arguments */
3360 		xfs_buf_t	*bp;	/* buffer for extent list block */
3361 		xfs_bmbt_rec_t	*ep;	/* extent list pointer */
3362 
3363 		args.tp = tp;
3364 		args.mp = ip->i_mount;
3365 		ASSERT(ifp->if_flags & XFS_IFINLINE);
3366 		/*
3367 		 * Allocate a block.  We know we need only one, since the
3368 		 * file currently fits in an inode.
3369 		 */
3370 		if (*firstblock == NULLFSBLOCK) {
3371 			args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
3372 			args.type = XFS_ALLOCTYPE_START_BNO;
3373 		} else {
3374 			args.fsbno = *firstblock;
3375 			args.type = XFS_ALLOCTYPE_NEAR_BNO;
3376 		}
3377 		args.total = total;
3378 		args.mod = args.minleft = args.alignment = args.wasdel =
3379 			args.isfl = args.minalignslop = 0;
3380 		args.minlen = args.maxlen = args.prod = 1;
3381 		if ((error = xfs_alloc_vextent(&args)))
3382 			goto done;
3383 		/*
3384 		 * Can't fail, the space was reserved.
3385 		 */
3386 		ASSERT(args.fsbno != NULLFSBLOCK);
3387 		ASSERT(args.len == 1);
3388 		*firstblock = args.fsbno;
3389 		bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
3390 		memcpy((char *)XFS_BUF_PTR(bp), ifp->if_u1.if_data,
3391 			ifp->if_bytes);
3392 		xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
3393 		xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
3394 		xfs_iext_realloc(ip, 1, whichfork);
3395 		ep = ifp->if_u1.if_extents;
3396 		xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
3397 		xfs_bmap_trace_post_update(fname, "new", ip, 0, whichfork);
3398 		XFS_IFORK_NEXT_SET(ip, whichfork, 1);
3399 		ip->i_d.di_nblocks = 1;
3400 		XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
3401 			XFS_TRANS_DQ_BCOUNT, 1L);
3402 		flags |= XFS_ILOG_FEXT(whichfork);
3403 	} else
3404 		ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
3405 	ifp->if_flags &= ~XFS_IFINLINE;
3406 	ifp->if_flags |= XFS_IFEXTENTS;
3407 	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
3408 	flags |= XFS_ILOG_CORE;
3409 done:
3410 	*logflagsp = flags;
3411 	return error;
3412 }
3413 
3414 xfs_bmbt_rec_t *			/* pointer to found extent entry */
xfs_bmap_do_search_extents(xfs_bmbt_rec_t * base,xfs_extnum_t lastx,xfs_extnum_t nextents,xfs_fileoff_t bno,int * eofp,xfs_extnum_t * lastxp,xfs_bmbt_irec_t * gotp,xfs_bmbt_irec_t * prevp)3415 xfs_bmap_do_search_extents(
3416 	xfs_bmbt_rec_t	*base,		/* base of extent list */
3417 	xfs_extnum_t	lastx,		/* last extent index used */
3418 	xfs_extnum_t	nextents,	/* extent list size */
3419 	xfs_fileoff_t	bno,		/* block number searched for */
3420 	int		*eofp,		/* out: end of file found */
3421 	xfs_extnum_t	*lastxp,	/* out: last extent index */
3422 	xfs_bmbt_irec_t	*gotp,		/* out: extent entry found */
3423 	xfs_bmbt_irec_t	*prevp)		/* out: previous extent entry found */
3424 {
3425 	xfs_bmbt_rec_t	*ep;		/* extent list entry pointer */
3426 	xfs_bmbt_irec_t	got;		/* extent list entry, decoded */
3427 	int		high;		/* high index of binary search */
3428 	int		low;		/* low index of binary search */
3429 
3430 	/*
3431 	 * Initialize the extent entry structure to catch access to
3432 	 * uninitialized br_startblock field.
3433 	 */
3434 	got.br_startoff = 0xffa5a5a5a5a5a5a5LL;
3435 	got.br_blockcount = 0xa55a5a5a5a5a5a5aLL;
3436 	got.br_state = XFS_EXT_INVALID;
3437 
3438 #if XFS_BIG_BLKNOS
3439 	got.br_startblock = 0xffffa5a5a5a5a5a5LL;
3440 #else
3441 	got.br_startblock = 0xffffa5a5;
3442 #endif
3443 
3444 	if (lastx != NULLEXTNUM && lastx < nextents)
3445 		ep = base + lastx;
3446 	else
3447 		ep = NULL;
3448 	prevp->br_startoff = NULLFILEOFF;
3449 	if (ep && bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep)) &&
3450 	    bno < got.br_startoff +
3451 		  (got.br_blockcount = xfs_bmbt_get_blockcount(ep)))
3452 		*eofp = 0;
3453 	else if (ep && lastx < nextents - 1 &&
3454 		 bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep + 1)) &&
3455 		 bno < got.br_startoff +
3456 		       (got.br_blockcount = xfs_bmbt_get_blockcount(ep + 1))) {
3457 		lastx++;
3458 		ep++;
3459 		*eofp = 0;
3460 	} else if (nextents == 0)
3461 		*eofp = 1;
3462 	else if (bno == 0 &&
3463 		 (got.br_startoff = xfs_bmbt_get_startoff(base)) == 0) {
3464 		ep = base;
3465 		lastx = 0;
3466 		got.br_blockcount = xfs_bmbt_get_blockcount(ep);
3467 		*eofp = 0;
3468 	} else {
3469 		/* binary search the extents array */
3470 		low = 0;
3471 		high = nextents - 1;
3472 		while (low <= high) {
3473 			XFS_STATS_INC(xs_cmp_exlist);
3474 			lastx = (low + high) >> 1;
3475 			ep = base + lastx;
3476 			got.br_startoff = xfs_bmbt_get_startoff(ep);
3477 			got.br_blockcount = xfs_bmbt_get_blockcount(ep);
3478 			if (bno < got.br_startoff)
3479 				high = lastx - 1;
3480 			else if (bno >= got.br_startoff + got.br_blockcount)
3481 				low = lastx + 1;
3482 			else {
3483 				got.br_startblock = xfs_bmbt_get_startblock(ep);
3484 				got.br_state = xfs_bmbt_get_state(ep);
3485 				*eofp = 0;
3486 				*lastxp = lastx;
3487 				*gotp = got;
3488 				return ep;
3489 			}
3490 		}
3491 		if (bno >= got.br_startoff + got.br_blockcount) {
3492 			lastx++;
3493 			if (lastx == nextents) {
3494 				*eofp = 1;
3495 				got.br_startblock = xfs_bmbt_get_startblock(ep);
3496 				got.br_state = xfs_bmbt_get_state(ep);
3497 				*prevp = got;
3498 				ep = NULL;
3499 			} else {
3500 				*eofp = 0;
3501 				xfs_bmbt_get_all(ep, prevp);
3502 				ep++;
3503 				got.br_startoff = xfs_bmbt_get_startoff(ep);
3504 				got.br_blockcount = xfs_bmbt_get_blockcount(ep);
3505 			}
3506 		} else {
3507 			*eofp = 0;
3508 			if (ep > base)
3509 				xfs_bmbt_get_all(ep - 1, prevp);
3510 		}
3511 	}
3512 	if (ep) {
3513 		got.br_startblock = xfs_bmbt_get_startblock(ep);
3514 		got.br_state = xfs_bmbt_get_state(ep);
3515 	}
3516 	*lastxp = lastx;
3517 	*gotp = got;
3518 	return ep;
3519 }
3520 
3521 /*
3522  * Search the extents list for the inode, for the extent containing bno.
3523  * If bno lies in a hole, point to the next entry.  If bno lies past eof,
3524  * *eofp will be set, and *prevp will contain the last entry (null if none).
3525  * Else, *lastxp will be set to the index of the found
3526  * entry; *gotp will contain the entry.
3527  */
3528 STATIC xfs_bmbt_rec_t *                 /* pointer to found extent entry */
xfs_bmap_search_extents(xfs_inode_t * ip,xfs_fileoff_t bno,int whichfork,int * eofp,xfs_extnum_t * lastxp,xfs_bmbt_irec_t * gotp,xfs_bmbt_irec_t * prevp)3529 xfs_bmap_search_extents(
3530 	xfs_inode_t     *ip,            /* incore inode pointer */
3531 	xfs_fileoff_t   bno,            /* block number searched for */
3532 	int             whichfork,      /* data or attr fork */
3533 	int             *eofp,          /* out: end of file found */
3534 	xfs_extnum_t    *lastxp,        /* out: last extent index */
3535 	xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
3536 	xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
3537 {
3538 	xfs_ifork_t	*ifp;		/* inode fork pointer */
3539 	xfs_bmbt_rec_t  *base;          /* base of extent list */
3540 	xfs_extnum_t    lastx;          /* last extent index used */
3541 	xfs_extnum_t    nextents;       /* extent list size */
3542 	xfs_bmbt_rec_t  *ep;            /* extent list entry pointer */
3543 	int		rt;		/* realtime flag    */
3544 
3545 	XFS_STATS_INC(xs_look_exlist);
3546 	ifp = XFS_IFORK_PTR(ip, whichfork);
3547 	lastx = ifp->if_lastex;
3548 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
3549 	base = &ifp->if_u1.if_extents[0];
3550 
3551 	ep = xfs_bmap_do_search_extents(base, lastx, nextents, bno, eofp,
3552 					  lastxp, gotp, prevp);
3553 	rt = ip->i_d.di_flags & XFS_DIFLAG_REALTIME;
3554 	if(!rt && !gotp->br_startblock && (*lastxp != NULLEXTNUM)) {
3555                 cmn_err(CE_PANIC,"Access to block zero: fs: <%s> inode: %lld "
3556 			"start_block : %llx start_off : %llx blkcnt : %llx "
3557 			"extent-state : %x \n",
3558 			(ip->i_mount)->m_fsname,(long long)ip->i_ino,
3559 			gotp->br_startblock, gotp->br_startoff,
3560 			gotp->br_blockcount,gotp->br_state);
3561         }
3562         return ep;
3563 }
3564 
3565 
3566 #ifdef XFS_BMAP_TRACE
3567 ktrace_t	*xfs_bmap_trace_buf;
3568 
3569 /*
3570  * Add a bmap trace buffer entry.  Base routine for the others.
3571  */
3572 STATIC void
xfs_bmap_trace_addentry(int opcode,char * fname,char * desc,xfs_inode_t * ip,xfs_extnum_t idx,xfs_extnum_t cnt,xfs_bmbt_rec_t * r1,xfs_bmbt_rec_t * r2,int whichfork)3573 xfs_bmap_trace_addentry(
3574 	int		opcode,		/* operation */
3575 	char		*fname,		/* function name */
3576 	char		*desc,		/* operation description */
3577 	xfs_inode_t	*ip,		/* incore inode pointer */
3578 	xfs_extnum_t	idx,		/* index of entry(ies) */
3579 	xfs_extnum_t	cnt,		/* count of entries, 1 or 2 */
3580 	xfs_bmbt_rec_t	*r1,		/* first record */
3581 	xfs_bmbt_rec_t	*r2,		/* second record or null */
3582 	int		whichfork)	/* data or attr fork */
3583 {
3584 	xfs_bmbt_rec_t	tr2;
3585 
3586 	ASSERT(cnt == 1 || cnt == 2);
3587 	ASSERT(r1 != NULL);
3588 	if (cnt == 1) {
3589 		ASSERT(r2 == NULL);
3590 		r2 = &tr2;
3591 		memset(&tr2, 0, sizeof(tr2));
3592 	} else
3593 		ASSERT(r2 != NULL);
3594 	ktrace_enter(xfs_bmap_trace_buf,
3595 		(void *)(__psint_t)(opcode | (whichfork << 16)),
3596 		(void *)fname, (void *)desc, (void *)ip,
3597 		(void *)(__psint_t)idx,
3598 		(void *)(__psint_t)cnt,
3599 		(void *)(__psunsigned_t)(ip->i_ino >> 32),
3600 		(void *)(__psunsigned_t)(unsigned)ip->i_ino,
3601 		(void *)(__psunsigned_t)(r1->l0 >> 32),
3602 		(void *)(__psunsigned_t)(unsigned)(r1->l0),
3603 		(void *)(__psunsigned_t)(r1->l1 >> 32),
3604 		(void *)(__psunsigned_t)(unsigned)(r1->l1),
3605 		(void *)(__psunsigned_t)(r2->l0 >> 32),
3606 		(void *)(__psunsigned_t)(unsigned)(r2->l0),
3607 		(void *)(__psunsigned_t)(r2->l1 >> 32),
3608 		(void *)(__psunsigned_t)(unsigned)(r2->l1)
3609 		);
3610 	ASSERT(ip->i_xtrace);
3611 	ktrace_enter(ip->i_xtrace,
3612 		(void *)(__psint_t)(opcode | (whichfork << 16)),
3613 		(void *)fname, (void *)desc, (void *)ip,
3614 		(void *)(__psint_t)idx,
3615 		(void *)(__psint_t)cnt,
3616 		(void *)(__psunsigned_t)(ip->i_ino >> 32),
3617 		(void *)(__psunsigned_t)(unsigned)ip->i_ino,
3618 		(void *)(__psunsigned_t)(r1->l0 >> 32),
3619 		(void *)(__psunsigned_t)(unsigned)(r1->l0),
3620 		(void *)(__psunsigned_t)(r1->l1 >> 32),
3621 		(void *)(__psunsigned_t)(unsigned)(r1->l1),
3622 		(void *)(__psunsigned_t)(r2->l0 >> 32),
3623 		(void *)(__psunsigned_t)(unsigned)(r2->l0),
3624 		(void *)(__psunsigned_t)(r2->l1 >> 32),
3625 		(void *)(__psunsigned_t)(unsigned)(r2->l1)
3626 		);
3627 }
3628 
3629 /*
3630  * Add bmap trace entry prior to a call to xfs_bmap_delete_exlist.
3631  */
3632 STATIC void
xfs_bmap_trace_delete(char * fname,char * desc,xfs_inode_t * ip,xfs_extnum_t idx,xfs_extnum_t cnt,int whichfork)3633 xfs_bmap_trace_delete(
3634 	char		*fname,		/* function name */
3635 	char		*desc,		/* operation description */
3636 	xfs_inode_t	*ip,		/* incore inode pointer */
3637 	xfs_extnum_t	idx,		/* index of entry(entries) deleted */
3638 	xfs_extnum_t	cnt,		/* count of entries deleted, 1 or 2 */
3639 	int		whichfork)	/* data or attr fork */
3640 {
3641 	xfs_ifork_t	*ifp;		/* inode fork pointer */
3642 
3643 	ifp = XFS_IFORK_PTR(ip, whichfork);
3644 	xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_DELETE, fname, desc, ip, idx,
3645 		cnt, &ifp->if_u1.if_extents[idx],
3646 		cnt == 2 ? &ifp->if_u1.if_extents[idx + 1] : NULL,
3647 		whichfork);
3648 }
3649 
3650 /*
3651  * Add bmap trace entry prior to a call to xfs_bmap_insert_exlist, or
3652  * reading in the extents list from the disk (in the btree).
3653  */
3654 STATIC void
xfs_bmap_trace_insert(char * fname,char * desc,xfs_inode_t * ip,xfs_extnum_t idx,xfs_extnum_t cnt,xfs_bmbt_irec_t * r1,xfs_bmbt_irec_t * r2,int whichfork)3655 xfs_bmap_trace_insert(
3656 	char		*fname,		/* function name */
3657 	char		*desc,		/* operation description */
3658 	xfs_inode_t	*ip,		/* incore inode pointer */
3659 	xfs_extnum_t	idx,		/* index of entry(entries) inserted */
3660 	xfs_extnum_t	cnt,		/* count of entries inserted, 1 or 2 */
3661 	xfs_bmbt_irec_t	*r1,		/* inserted record 1 */
3662 	xfs_bmbt_irec_t	*r2,		/* inserted record 2 or null */
3663 	int		whichfork)	/* data or attr fork */
3664 {
3665 	xfs_bmbt_rec_t	tr1;		/* compressed record 1 */
3666 	xfs_bmbt_rec_t	tr2;		/* compressed record 2 if needed */
3667 
3668 	xfs_bmbt_set_all(&tr1, r1);
3669 	if (cnt == 2) {
3670 		ASSERT(r2 != NULL);
3671 		xfs_bmbt_set_all(&tr2, r2);
3672 	} else {
3673 		ASSERT(cnt == 1);
3674 		ASSERT(r2 == NULL);
3675 	}
3676 	xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_INSERT, fname, desc, ip, idx,
3677 		cnt, &tr1, cnt == 2 ? &tr2 : NULL, whichfork);
3678 }
3679 
3680 /*
3681  * Add bmap trace entry after updating an extent list entry in place.
3682  */
3683 STATIC void
xfs_bmap_trace_post_update(char * fname,char * desc,xfs_inode_t * ip,xfs_extnum_t idx,int whichfork)3684 xfs_bmap_trace_post_update(
3685 	char		*fname,		/* function name */
3686 	char		*desc,		/* operation description */
3687 	xfs_inode_t	*ip,		/* incore inode pointer */
3688 	xfs_extnum_t	idx,		/* index of entry updated */
3689 	int		whichfork)	/* data or attr fork */
3690 {
3691 	xfs_ifork_t	*ifp;		/* inode fork pointer */
3692 
3693 	ifp = XFS_IFORK_PTR(ip, whichfork);
3694 	xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_POST_UP, fname, desc, ip, idx,
3695 		1, &ifp->if_u1.if_extents[idx], NULL, whichfork);
3696 }
3697 
3698 /*
3699  * Add bmap trace entry prior to updating an extent list entry in place.
3700  */
3701 STATIC void
xfs_bmap_trace_pre_update(char * fname,char * desc,xfs_inode_t * ip,xfs_extnum_t idx,int whichfork)3702 xfs_bmap_trace_pre_update(
3703 	char		*fname,		/* function name */
3704 	char		*desc,		/* operation description */
3705 	xfs_inode_t	*ip,		/* incore inode pointer */
3706 	xfs_extnum_t	idx,		/* index of entry to be updated */
3707 	int		whichfork)	/* data or attr fork */
3708 {
3709 	xfs_ifork_t	*ifp;		/* inode fork pointer */
3710 
3711 	ifp = XFS_IFORK_PTR(ip, whichfork);
3712 	xfs_bmap_trace_addentry(XFS_BMAP_KTRACE_PRE_UP, fname, desc, ip, idx, 1,
3713 		&ifp->if_u1.if_extents[idx], NULL, whichfork);
3714 }
3715 #endif	/* XFS_BMAP_TRACE */
3716 
3717 /*
3718  * Compute the worst-case number of indirect blocks that will be used
3719  * for ip's delayed extent of length "len".
3720  */
3721 STATIC xfs_filblks_t
xfs_bmap_worst_indlen(xfs_inode_t * ip,xfs_filblks_t len)3722 xfs_bmap_worst_indlen(
3723 	xfs_inode_t	*ip,		/* incore inode pointer */
3724 	xfs_filblks_t	len)		/* delayed extent length */
3725 {
3726 	int		level;		/* btree level number */
3727 	int		maxrecs;	/* maximum record count at this level */
3728 	xfs_mount_t	*mp;		/* mount structure */
3729 	xfs_filblks_t	rval;		/* return value */
3730 
3731 	mp = ip->i_mount;
3732 	maxrecs = mp->m_bmap_dmxr[0];
3733 	for (level = 0, rval = 0;
3734 	     level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
3735 	     level++) {
3736 		len += maxrecs - 1;
3737 		do_div(len, maxrecs);
3738 		rval += len;
3739 		if (len == 1)
3740 			return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
3741 				level - 1;
3742 		if (level == 0)
3743 			maxrecs = mp->m_bmap_dmxr[1];
3744 	}
3745 	return rval;
3746 }
3747 
3748 #if defined(XFS_RW_TRACE)
3749 STATIC void
xfs_bunmap_trace(xfs_inode_t * ip,xfs_fileoff_t bno,xfs_filblks_t len,int flags,inst_t * ra)3750 xfs_bunmap_trace(
3751 	xfs_inode_t		*ip,
3752 	xfs_fileoff_t		bno,
3753 	xfs_filblks_t		len,
3754 	int			flags,
3755 	inst_t			*ra)
3756 {
3757 	if (ip->i_rwtrace == NULL)
3758 		return;
3759 	ktrace_enter(ip->i_rwtrace,
3760 		(void *)(__psint_t)XFS_BUNMAPI,
3761 		(void *)ip,
3762 		(void *)(__psint_t)((ip->i_d.di_size >> 32) & 0xffffffff),
3763 		(void *)(__psint_t)(ip->i_d.di_size & 0xffffffff),
3764 		(void *)(__psint_t)(((xfs_dfiloff_t)bno >> 32) & 0xffffffff),
3765 		(void *)(__psint_t)((xfs_dfiloff_t)bno & 0xffffffff),
3766 		(void *)(__psint_t)len,
3767 		(void *)(__psint_t)flags,
3768 		(void *)(unsigned long)current_cpu(),
3769 		(void *)ra,
3770 		(void *)0,
3771 		(void *)0,
3772 		(void *)0,
3773 		(void *)0,
3774 		(void *)0,
3775 		(void *)0);
3776 }
3777 #endif
3778 
3779 /*
3780  * Convert inode from non-attributed to attributed.
3781  * Must not be in a transaction, ip must not be locked.
3782  */
3783 int						/* error code */
xfs_bmap_add_attrfork(xfs_inode_t * ip,int rsvd)3784 xfs_bmap_add_attrfork(
3785 	xfs_inode_t		*ip,		/* incore inode pointer */
3786 	int			rsvd)		/* OK to allocated reserved blocks in trans */
3787 {
3788 	int			blks;		/* space reservation */
3789 	int			committed;	/* xaction was committed */
3790 	int			error;		/* error return value */
3791 	xfs_fsblock_t		firstblock;	/* 1st block/ag allocated */
3792 	xfs_bmap_free_t		flist;		/* freed extent list */
3793 	int			logflags;	/* logging flags */
3794 	xfs_mount_t		*mp;		/* mount structure */
3795 	unsigned long		s;		/* spinlock spl value */
3796 	xfs_trans_t		*tp;		/* transaction pointer */
3797 
3798 	ASSERT(ip->i_df.if_ext_max ==
3799 	       XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
3800 	if (XFS_IFORK_Q(ip))
3801 		return 0;
3802 	mp = ip->i_mount;
3803 	ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
3804 	tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
3805 	blks = XFS_ADDAFORK_SPACE_RES(mp);
3806 	if (rsvd)
3807 		tp->t_flags |= XFS_TRANS_RESERVE;
3808 	if ((error = xfs_trans_reserve(tp, blks, XFS_ADDAFORK_LOG_RES(mp), 0,
3809 			XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
3810 		goto error0;
3811 	xfs_ilock(ip, XFS_ILOCK_EXCL);
3812 	error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, blks, 0, rsvd ?
3813 			XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
3814 			XFS_QMOPT_RES_REGBLKS);
3815 	if (error) {
3816 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
3817 		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
3818 		return error;
3819 	}
3820 	if (XFS_IFORK_Q(ip))
3821 		goto error1;
3822 	if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
3823 		/*
3824 		 * For inodes coming from pre-6.2 filesystems.
3825 		 */
3826 		ASSERT(ip->i_d.di_aformat == 0);
3827 		ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
3828 	}
3829 	ASSERT(ip->i_d.di_anextents == 0);
3830 	VN_HOLD(XFS_ITOV(ip));
3831 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
3832 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3833 	switch (ip->i_d.di_format) {
3834 	case XFS_DINODE_FMT_DEV:
3835 		ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
3836 		break;
3837 	case XFS_DINODE_FMT_UUID:
3838 		ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
3839 		break;
3840 	case XFS_DINODE_FMT_LOCAL:
3841 	case XFS_DINODE_FMT_EXTENTS:
3842 	case XFS_DINODE_FMT_BTREE:
3843 		ip->i_d.di_forkoff = mp->m_attroffset >> 3;
3844 		break;
3845 	default:
3846 		ASSERT(0);
3847 		error = XFS_ERROR(EINVAL);
3848 		goto error1;
3849 	}
3850 	ip->i_df.if_ext_max =
3851 		XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
3852 	ASSERT(ip->i_afp == NULL);
3853 	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
3854 	ip->i_afp->if_ext_max =
3855 		XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
3856 	ip->i_afp->if_flags = XFS_IFEXTENTS;
3857 	logflags = 0;
3858 	XFS_BMAP_INIT(&flist, &firstblock);
3859 	switch (ip->i_d.di_format) {
3860 	case XFS_DINODE_FMT_LOCAL:
3861 		error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &flist,
3862 			&logflags);
3863 		break;
3864 	case XFS_DINODE_FMT_EXTENTS:
3865 		error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
3866 			&flist, &logflags);
3867 		break;
3868 	case XFS_DINODE_FMT_BTREE:
3869 		error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &flist,
3870 			&logflags);
3871 		break;
3872 	default:
3873 		error = 0;
3874 		break;
3875 	}
3876 	if (logflags)
3877 		xfs_trans_log_inode(tp, ip, logflags);
3878 	if (error)
3879 		goto error2;
3880 	if (!XFS_SB_VERSION_HASATTR(&mp->m_sb)) {
3881 		s = XFS_SB_LOCK(mp);
3882 		if (!XFS_SB_VERSION_HASATTR(&mp->m_sb)) {
3883 			XFS_SB_VERSION_ADDATTR(&mp->m_sb);
3884 			XFS_SB_UNLOCK(mp, s);
3885 			xfs_mod_sb(tp, XFS_SB_VERSIONNUM);
3886 		} else
3887 			XFS_SB_UNLOCK(mp, s);
3888 	}
3889 	if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed)))
3890 		goto error2;
3891 	error = xfs_trans_commit(tp, XFS_TRANS_PERM_LOG_RES, NULL);
3892 	ASSERT(ip->i_df.if_ext_max ==
3893 	       XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
3894 	return error;
3895 error2:
3896 	xfs_bmap_cancel(&flist);
3897 error1:
3898 	ASSERT(ismrlocked(&ip->i_lock,MR_UPDATE));
3899 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
3900 error0:
3901 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
3902 	ASSERT(ip->i_df.if_ext_max ==
3903 	       XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
3904 	return error;
3905 }
3906 
3907 /*
3908  * Add the extent to the list of extents to be free at transaction end.
3909  * The list is maintained sorted (by block number).
3910  */
3911 /* ARGSUSED */
3912 void
xfs_bmap_add_free(xfs_fsblock_t bno,xfs_filblks_t len,xfs_bmap_free_t * flist,xfs_mount_t * mp)3913 xfs_bmap_add_free(
3914 	xfs_fsblock_t		bno,		/* fs block number of extent */
3915 	xfs_filblks_t		len,		/* length of extent */
3916 	xfs_bmap_free_t		*flist,		/* list of extents */
3917 	xfs_mount_t		*mp)		/* mount point structure */
3918 {
3919 	xfs_bmap_free_item_t	*cur;		/* current (next) element */
3920 	xfs_bmap_free_item_t	*new;		/* new element */
3921 	xfs_bmap_free_item_t	*prev;		/* previous element */
3922 #ifdef DEBUG
3923 	xfs_agnumber_t		agno;
3924 	xfs_agblock_t		agbno;
3925 
3926 	ASSERT(bno != NULLFSBLOCK);
3927 	ASSERT(len > 0);
3928 	ASSERT(len <= MAXEXTLEN);
3929 	ASSERT(!ISNULLSTARTBLOCK(bno));
3930 	agno = XFS_FSB_TO_AGNO(mp, bno);
3931 	agbno = XFS_FSB_TO_AGBNO(mp, bno);
3932 	ASSERT(agno < mp->m_sb.sb_agcount);
3933 	ASSERT(agbno < mp->m_sb.sb_agblocks);
3934 	ASSERT(len < mp->m_sb.sb_agblocks);
3935 	ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
3936 #endif
3937 	ASSERT(xfs_bmap_free_item_zone != NULL);
3938 	new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
3939 	new->xbfi_startblock = bno;
3940 	new->xbfi_blockcount = (xfs_extlen_t)len;
3941 	for (prev = NULL, cur = flist->xbf_first;
3942 	     cur != NULL;
3943 	     prev = cur, cur = cur->xbfi_next) {
3944 		if (cur->xbfi_startblock >= bno)
3945 			break;
3946 	}
3947 	if (prev)
3948 		prev->xbfi_next = new;
3949 	else
3950 		flist->xbf_first = new;
3951 	new->xbfi_next = cur;
3952 	flist->xbf_count++;
3953 }
3954 
3955 /*
3956  * Compute and fill in the value of the maximum depth of a bmap btree
3957  * in this filesystem.  Done once, during mount.
3958  */
3959 void
xfs_bmap_compute_maxlevels(xfs_mount_t * mp,int whichfork)3960 xfs_bmap_compute_maxlevels(
3961 	xfs_mount_t	*mp,		/* file system mount structure */
3962 	int		whichfork)	/* data or attr fork */
3963 {
3964 	int		level;		/* btree level */
3965 	uint		maxblocks;	/* max blocks at this level */
3966 	uint		maxleafents;	/* max leaf entries possible */
3967 	int		maxrootrecs;	/* max records in root block */
3968 	int		minleafrecs;	/* min records in leaf block */
3969 	int		minnoderecs;	/* min records in node block */
3970 	int		sz;		/* root block size */
3971 
3972 	/*
3973 	 * The maximum number of extents in a file, hence the maximum
3974 	 * number of leaf entries, is controlled by the type of di_nextents
3975 	 * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
3976 	 * (a signed 16-bit number, xfs_aextnum_t).
3977 	 */
3978 	maxleafents = (whichfork == XFS_DATA_FORK) ? MAXEXTNUM : MAXAEXTNUM;
3979 	minleafrecs = mp->m_bmap_dmnr[0];
3980 	minnoderecs = mp->m_bmap_dmnr[1];
3981 	sz = (whichfork == XFS_DATA_FORK) ?
3982 		mp->m_attroffset :
3983 		mp->m_sb.sb_inodesize - mp->m_attroffset;
3984 	maxrootrecs = (int)XFS_BTREE_BLOCK_MAXRECS(sz, xfs_bmdr, 0);
3985 	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
3986 	for (level = 1; maxblocks > 1; level++) {
3987 		if (maxblocks <= maxrootrecs)
3988 			maxblocks = 1;
3989 		else
3990 			maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
3991 	}
3992 	mp->m_bm_maxlevels[whichfork] = level;
3993 }
3994 
3995 /*
3996  * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
3997  * caller.  Frees all the extents that need freeing, which must be done
3998  * last due to locking considerations.  We never free any extents in
3999  * the first transaction.  This is to allow the caller to make the first
4000  * transaction a synchronous one so that the pointers to the data being
4001  * broken in this transaction will be permanent before the data is actually
4002  * freed.  This is necessary to prevent blocks from being reallocated
4003  * and written to before the free and reallocation are actually permanent.
4004  * We do not just make the first transaction synchronous here, because
4005  * there are more efficient ways to gain the same protection in some cases
4006  * (see the file truncation code).
4007  *
4008  * Return 1 if the given transaction was committed and a new one
4009  * started, and 0 otherwise in the committed parameter.
4010  */
4011 /*ARGSUSED*/
4012 int						/* error */
xfs_bmap_finish(xfs_trans_t ** tp,xfs_bmap_free_t * flist,xfs_fsblock_t firstblock,int * committed)4013 xfs_bmap_finish(
4014 	xfs_trans_t		**tp,		/* transaction pointer addr */
4015 	xfs_bmap_free_t		*flist,		/* i/o: list extents to free */
4016 	xfs_fsblock_t		firstblock,	/* controlled ag for allocs */
4017 	int			*committed)	/* xact committed or not */
4018 {
4019 	xfs_efd_log_item_t	*efd;		/* extent free data */
4020 	xfs_efi_log_item_t	*efi;		/* extent free intention */
4021 	int			error;		/* error return value */
4022 	xfs_bmap_free_item_t	*free;		/* free extent list item */
4023 	unsigned int		logres;		/* new log reservation */
4024 	unsigned int		logcount;	/* new log count */
4025 	xfs_mount_t		*mp;		/* filesystem mount structure */
4026 	xfs_bmap_free_item_t	*next;		/* next item on free list */
4027 	xfs_trans_t		*ntp;		/* new transaction pointer */
4028 
4029 	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
4030 	if (flist->xbf_count == 0) {
4031 		*committed = 0;
4032 		return 0;
4033 	}
4034 	ntp = *tp;
4035 	efi = xfs_trans_get_efi(ntp, flist->xbf_count);
4036 	for (free = flist->xbf_first; free; free = free->xbfi_next)
4037 		xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
4038 			free->xbfi_blockcount);
4039 	logres = ntp->t_log_res;
4040 	logcount = ntp->t_log_count;
4041 	ntp = xfs_trans_dup(*tp);
4042 	error = xfs_trans_commit(*tp, 0, NULL);
4043 	*tp = ntp;
4044 	*committed = 1;
4045 	/*
4046 	 * We have a new transaction, so we should return committed=1,
4047 	 * even though we're returning an error.
4048 	 */
4049 	if (error) {
4050 		return error;
4051 	}
4052 	if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES,
4053 			logcount)))
4054 		return error;
4055 	efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
4056 	for (free = flist->xbf_first; free != NULL; free = next) {
4057 		next = free->xbfi_next;
4058 		if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
4059 				free->xbfi_blockcount))) {
4060 			/*
4061 			 * The bmap free list will be cleaned up at a
4062 			 * higher level.  The EFI will be canceled when
4063 			 * this transaction is aborted.
4064 			 * Need to force shutdown here to make sure it
4065 			 * happens, since this transaction may not be
4066 			 * dirty yet.
4067 			 */
4068 			mp = ntp->t_mountp;
4069 			if (!XFS_FORCED_SHUTDOWN(mp))
4070 				xfs_force_shutdown(mp,
4071 						   (error == EFSCORRUPTED) ?
4072 						   XFS_CORRUPT_INCORE :
4073 						   XFS_METADATA_IO_ERROR);
4074 			return error;
4075 		}
4076 		xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
4077 			free->xbfi_blockcount);
4078 		xfs_bmap_del_free(flist, NULL, free);
4079 	}
4080 	return 0;
4081 }
4082 
4083 /*
4084  * Free up any items left in the list.
4085  */
4086 void
xfs_bmap_cancel(xfs_bmap_free_t * flist)4087 xfs_bmap_cancel(
4088 	xfs_bmap_free_t		*flist)	/* list of bmap_free_items */
4089 {
4090 	xfs_bmap_free_item_t	*free;	/* free list item */
4091 	xfs_bmap_free_item_t	*next;
4092 
4093 	if (flist->xbf_count == 0)
4094 		return;
4095 	ASSERT(flist->xbf_first != NULL);
4096 	for (free = flist->xbf_first; free; free = next) {
4097 		next = free->xbfi_next;
4098 		xfs_bmap_del_free(flist, NULL, free);
4099 	}
4100 	ASSERT(flist->xbf_count == 0);
4101 }
4102 
4103 /*
4104  * Returns the file-relative block number of the first unused block(s)
4105  * in the file with at least "len" logically contiguous blocks free.
4106  * This is the lowest-address hole if the file has holes, else the first block
4107  * past the end of file.
4108  * Return 0 if the file is currently local (in-inode).
4109  */
4110 int						/* error */
xfs_bmap_first_unused(xfs_trans_t * tp,xfs_inode_t * ip,xfs_extlen_t len,xfs_fileoff_t * first_unused,int whichfork)4111 xfs_bmap_first_unused(
4112 	xfs_trans_t	*tp,			/* transaction pointer */
4113 	xfs_inode_t	*ip,			/* incore inode */
4114 	xfs_extlen_t	len,			/* size of hole to find */
4115 	xfs_fileoff_t	*first_unused,		/* unused block */
4116 	int		whichfork)		/* data or attr fork */
4117 {
4118 	xfs_bmbt_rec_t	*base;			/* base of extent array */
4119 	xfs_bmbt_rec_t	*ep;			/* pointer to an extent entry */
4120 	int		error;			/* error return value */
4121 	xfs_ifork_t	*ifp;			/* inode fork pointer */
4122 	xfs_fileoff_t	lastaddr;		/* last block number seen */
4123 	xfs_fileoff_t	lowest;			/* lowest useful block */
4124 	xfs_fileoff_t	max;			/* starting useful block */
4125 	xfs_fileoff_t	off;			/* offset for this block */
4126 	xfs_extnum_t	nextents;		/* number of extent entries */
4127 
4128 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
4129 	       XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
4130 	       XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
4131 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
4132 		*first_unused = 0;
4133 		return 0;
4134 	}
4135 	ifp = XFS_IFORK_PTR(ip, whichfork);
4136 	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
4137 	    (error = xfs_iread_extents(tp, ip, whichfork)))
4138 		return error;
4139 	lowest = *first_unused;
4140 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4141 	base = &ifp->if_u1.if_extents[0];
4142 	for (lastaddr = 0, max = lowest, ep = base;
4143 	     ep < &base[nextents];
4144 	     ep++) {
4145 		off = xfs_bmbt_get_startoff(ep);
4146 		/*
4147 		 * See if the hole before this extent will work.
4148 		 */
4149 		if (off >= lowest + len && off - max >= len) {
4150 			*first_unused = max;
4151 			return 0;
4152 		}
4153 		lastaddr = off + xfs_bmbt_get_blockcount(ep);
4154 		max = XFS_FILEOFF_MAX(lastaddr, lowest);
4155 	}
4156 	*first_unused = max;
4157 	return 0;
4158 }
4159 
4160 /*
4161  * Returns the file-relative block number of the last block + 1 before
4162  * last_block (input value) in the file.
4163  * This is not based on i_size, it is based on the extent list.
4164  * Returns 0 for local files, as they do not have an extent list.
4165  */
4166 int						/* error */
xfs_bmap_last_before(xfs_trans_t * tp,xfs_inode_t * ip,xfs_fileoff_t * last_block,int whichfork)4167 xfs_bmap_last_before(
4168 	xfs_trans_t	*tp,			/* transaction pointer */
4169 	xfs_inode_t	*ip,			/* incore inode */
4170 	xfs_fileoff_t	*last_block,		/* last block */
4171 	int		whichfork)		/* data or attr fork */
4172 {
4173 	xfs_fileoff_t	bno;			/* input file offset */
4174 	int		eof;			/* hit end of file */
4175 	xfs_bmbt_rec_t	*ep;			/* pointer to last extent */
4176 	int		error;			/* error return value */
4177 	xfs_bmbt_irec_t	got;			/* current extent value */
4178 	xfs_ifork_t	*ifp;			/* inode fork pointer */
4179 	xfs_extnum_t	lastx;			/* last extent used */
4180 	xfs_bmbt_irec_t	prev;			/* previous extent value */
4181 
4182 	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
4183 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4184 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
4185 	       return XFS_ERROR(EIO);
4186 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
4187 		*last_block = 0;
4188 		return 0;
4189 	}
4190 	ifp = XFS_IFORK_PTR(ip, whichfork);
4191 	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
4192 	    (error = xfs_iread_extents(tp, ip, whichfork)))
4193 		return error;
4194 	bno = *last_block - 1;
4195 	ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
4196 		&prev);
4197 	if (eof || xfs_bmbt_get_startoff(ep) > bno) {
4198 		if (prev.br_startoff == NULLFILEOFF)
4199 			*last_block = 0;
4200 		else
4201 			*last_block = prev.br_startoff + prev.br_blockcount;
4202 	}
4203 	/*
4204 	 * Otherwise *last_block is already the right answer.
4205 	 */
4206 	return 0;
4207 }
4208 
4209 /*
4210  * Returns the file-relative block number of the first block past eof in
4211  * the file.  This is not based on i_size, it is based on the extent list.
4212  * Returns 0 for local files, as they do not have an extent list.
4213  */
4214 int						/* error */
xfs_bmap_last_offset(xfs_trans_t * tp,xfs_inode_t * ip,xfs_fileoff_t * last_block,int whichfork)4215 xfs_bmap_last_offset(
4216 	xfs_trans_t	*tp,			/* transaction pointer */
4217 	xfs_inode_t	*ip,			/* incore inode */
4218 	xfs_fileoff_t	*last_block,		/* last block */
4219 	int		whichfork)		/* data or attr fork */
4220 {
4221 	xfs_bmbt_rec_t	*base;			/* base of extent array */
4222 	xfs_bmbt_rec_t	*ep;			/* pointer to last extent */
4223 	int		error;			/* error return value */
4224 	xfs_ifork_t	*ifp;			/* inode fork pointer */
4225 	xfs_extnum_t	nextents;		/* number of extent entries */
4226 
4227 	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
4228 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4229 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
4230 	       return XFS_ERROR(EIO);
4231 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
4232 		*last_block = 0;
4233 		return 0;
4234 	}
4235 	ifp = XFS_IFORK_PTR(ip, whichfork);
4236 	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
4237 	    (error = xfs_iread_extents(tp, ip, whichfork)))
4238 		return error;
4239 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4240 	if (!nextents) {
4241 		*last_block = 0;
4242 		return 0;
4243 	}
4244 	base = &ifp->if_u1.if_extents[0];
4245 	ASSERT(base != NULL);
4246 	ep = &base[nextents - 1];
4247 	*last_block = xfs_bmbt_get_startoff(ep) + xfs_bmbt_get_blockcount(ep);
4248 	return 0;
4249 }
4250 
4251 /*
4252  * Returns whether the selected fork of the inode has exactly one
4253  * block or not.  For the data fork we check this matches di_size,
4254  * implying the file's range is 0..bsize-1.
4255  */
4256 int					/* 1=>1 block, 0=>otherwise */
xfs_bmap_one_block(xfs_inode_t * ip,int whichfork)4257 xfs_bmap_one_block(
4258 	xfs_inode_t	*ip,		/* incore inode */
4259 	int		whichfork)	/* data or attr fork */
4260 {
4261 	xfs_bmbt_rec_t	*ep;		/* ptr to fork's extent */
4262 	xfs_ifork_t	*ifp;		/* inode fork pointer */
4263 	int		rval;		/* return value */
4264 	xfs_bmbt_irec_t	s;		/* internal version of extent */
4265 
4266 #ifndef DEBUG
4267 	if (whichfork == XFS_DATA_FORK)
4268 		return ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize;
4269 #endif	/* !DEBUG */
4270 	if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
4271 		return 0;
4272 	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4273 		return 0;
4274 	ifp = XFS_IFORK_PTR(ip, whichfork);
4275 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
4276 	ep = ifp->if_u1.if_extents;
4277 	xfs_bmbt_get_all(ep, &s);
4278 	rval = s.br_startoff == 0 && s.br_blockcount == 1;
4279 	if (rval && whichfork == XFS_DATA_FORK)
4280 		ASSERT(ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
4281 	return rval;
4282 }
4283 
4284 /*
4285  * Read in the extents to if_extents.
4286  * All inode fields are set up by caller, we just traverse the btree
4287  * and copy the records in. If the file system cannot contain unwritten
4288  * extents, the records are checked for no "state" flags.
4289  */
4290 int					/* error */
xfs_bmap_read_extents(xfs_trans_t * tp,xfs_inode_t * ip,int whichfork)4291 xfs_bmap_read_extents(
4292 	xfs_trans_t		*tp,	/* transaction pointer */
4293 	xfs_inode_t		*ip,	/* incore inode */
4294 	int			whichfork) /* data or attr fork */
4295 {
4296 	xfs_bmbt_block_t	*block;	/* current btree block */
4297 	xfs_fsblock_t		bno;	/* block # of "block" */
4298 	xfs_buf_t		*bp;	/* buffer for "block" */
4299 	int			error;	/* error return value */
4300 	xfs_exntfmt_t		exntf;	/* XFS_EXTFMT_NOSTATE, if checking */
4301 #ifdef XFS_BMAP_TRACE
4302 	static char		fname[] = "xfs_bmap_read_extents";
4303 #endif
4304 	xfs_extnum_t		i, j;	/* index into the extents list */
4305 	xfs_ifork_t		*ifp;	/* fork structure */
4306 	int			level;	/* btree level, for checking */
4307 	xfs_mount_t		*mp;	/* file system mount structure */
4308 	xfs_bmbt_ptr_t		*pp;	/* pointer to block address */
4309 	/* REFERENCED */
4310 	xfs_extnum_t		room;	/* number of entries there's room for */
4311 	xfs_bmbt_rec_t		*trp;	/* target record pointer */
4312 
4313 	bno = NULLFSBLOCK;
4314 	mp = ip->i_mount;
4315 	ifp = XFS_IFORK_PTR(ip, whichfork);
4316 	exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
4317 					XFS_EXTFMT_INODE(ip);
4318 	block = ifp->if_broot;
4319 	/*
4320 	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
4321 	 */
4322 	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
4323 	level = INT_GET(block->bb_level, ARCH_CONVERT);
4324 	pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
4325 	ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO);
4326 	ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount);
4327 	ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks);
4328 	bno = INT_GET(*pp, ARCH_CONVERT);
4329 	/*
4330 	 * Go down the tree until leaf level is reached, following the first
4331 	 * pointer (leftmost) at each level.
4332 	 */
4333 	while (level-- > 0) {
4334 		if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4335 				XFS_BMAP_BTREE_REF)))
4336 			return error;
4337 		block = XFS_BUF_TO_BMBT_BLOCK(bp);
4338 		XFS_WANT_CORRUPTED_GOTO(
4339 			XFS_BMAP_SANITY_CHECK(mp, block, level),
4340 			error0);
4341 		if (level == 0)
4342 			break;
4343 		pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block,
4344 			1, mp->m_bmap_dmxr[1]);
4345 		XFS_WANT_CORRUPTED_GOTO(
4346 			XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)),
4347 			error0);
4348 		bno = INT_GET(*pp, ARCH_CONVERT);
4349 		xfs_trans_brelse(tp, bp);
4350 	}
4351 	/*
4352 	 * Here with bp and block set to the leftmost leaf node in the tree.
4353 	 */
4354 	room = ifp->if_bytes / (uint)sizeof(*trp);
4355 	trp = ifp->if_u1.if_extents;
4356 	i = 0;
4357 	/*
4358 	 * Loop over all leaf nodes.  Copy information to the extent list.
4359 	 */
4360 	for (;;) {
4361 		xfs_bmbt_rec_t	*frp, *temp;
4362 		xfs_fsblock_t	nextbno;
4363 		xfs_extnum_t	num_recs;
4364 
4365 
4366 		num_recs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
4367 		if (unlikely(i + num_recs > room)) {
4368 			ASSERT(i + num_recs <= room);
4369 			xfs_fs_cmn_err(CE_WARN, ip->i_mount,
4370 				"corrupt dinode %Lu, (btree extents).  Unmount and run xfs_repair.",
4371 				(unsigned long long) ip->i_ino);
4372 			XFS_ERROR_REPORT("xfs_bmap_read_extents(1)",
4373 					 XFS_ERRLEVEL_LOW,
4374 					ip->i_mount);
4375 			goto error0;
4376 		}
4377 		XFS_WANT_CORRUPTED_GOTO(
4378 			XFS_BMAP_SANITY_CHECK(mp, block, 0),
4379 			error0);
4380 		/*
4381 		 * Read-ahead the next leaf block, if any.
4382 		 */
4383 		nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
4384 		if (nextbno != NULLFSBLOCK)
4385 			xfs_btree_reada_bufl(mp, nextbno, 1);
4386 		/*
4387 		 * Copy records into the extent list.
4388 		 */
4389 		frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
4390 			block, 1, mp->m_bmap_dmxr[0]);
4391 		temp = trp;
4392 		for (j = 0; j < num_recs; j++, frp++, trp++) {
4393 			trp->l0 = INT_GET(frp->l0, ARCH_CONVERT);
4394 			trp->l1 = INT_GET(frp->l1, ARCH_CONVERT);
4395 		}
4396 		if (exntf == XFS_EXTFMT_NOSTATE) {
4397 			/*
4398 			 * Check all attribute bmap btree records and
4399 			 * any "older" data bmap btree records for a
4400 			 * set bit in the "extent flag" position.
4401 			 */
4402 			if (unlikely(xfs_check_nostate_extents(temp, num_recs))) {
4403 				XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
4404 						 XFS_ERRLEVEL_LOW,
4405 						 ip->i_mount);
4406 				goto error0;
4407 			}
4408 		}
4409 		i += num_recs;
4410 		xfs_trans_brelse(tp, bp);
4411 		bno = nextbno;
4412 		/*
4413 		 * If we've reached the end, stop.
4414 		 */
4415 		if (bno == NULLFSBLOCK)
4416 			break;
4417 		if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
4418 				XFS_BMAP_BTREE_REF)))
4419 			return error;
4420 		block = XFS_BUF_TO_BMBT_BLOCK(bp);
4421 	}
4422 	ASSERT(i == ifp->if_bytes / (uint)sizeof(*trp));
4423 	ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
4424 	xfs_bmap_trace_exlist(fname, ip, i, whichfork);
4425 	return 0;
4426 error0:
4427 	xfs_trans_brelse(tp, bp);
4428 	return XFS_ERROR(EFSCORRUPTED);
4429 }
4430 
4431 #ifdef XFS_BMAP_TRACE
4432 /*
4433  * Add bmap trace insert entries for all the contents of the extent list.
4434  */
4435 void
xfs_bmap_trace_exlist(char * fname,xfs_inode_t * ip,xfs_extnum_t cnt,int whichfork)4436 xfs_bmap_trace_exlist(
4437 	char		*fname,		/* function name */
4438 	xfs_inode_t	*ip,		/* incore inode pointer */
4439 	xfs_extnum_t	cnt,		/* count of entries in the list */
4440 	int		whichfork)	/* data or attr fork */
4441 {
4442 	xfs_bmbt_rec_t	*base;		/* base of extent list */
4443 	xfs_bmbt_rec_t	*ep;		/* current entry in extent list */
4444 	xfs_extnum_t	idx;		/* extent list entry number */
4445 	xfs_ifork_t	*ifp;		/* inode fork pointer */
4446 	xfs_bmbt_irec_t	s;		/* extent list record */
4447 
4448 	ifp = XFS_IFORK_PTR(ip, whichfork);
4449 	ASSERT(cnt == ifp->if_bytes / (uint)sizeof(*base));
4450 	base = ifp->if_u1.if_extents;
4451 	for (idx = 0, ep = base; idx < cnt; idx++, ep++) {
4452 		xfs_bmbt_get_all(ep, &s);
4453 		xfs_bmap_trace_insert(fname, "exlist", ip, idx, 1, &s, NULL,
4454 			whichfork);
4455 	}
4456 }
4457 #endif
4458 
4459 #ifdef DEBUG
4460 /*
4461  * Validate that the bmbt_irecs being returned from bmapi are valid
4462  * given the callers original parameters.  Specifically check the
4463  * ranges of the returned irecs to ensure that they only extent beyond
4464  * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
4465  */
4466 STATIC void
xfs_bmap_validate_ret(xfs_fileoff_t bno,xfs_filblks_t len,int flags,xfs_bmbt_irec_t * mval,int nmap,int ret_nmap)4467 xfs_bmap_validate_ret(
4468 	xfs_fileoff_t		bno,
4469 	xfs_filblks_t		len,
4470 	int			flags,
4471 	xfs_bmbt_irec_t		*mval,
4472 	int			nmap,
4473 	int			ret_nmap)
4474 {
4475 	int			i;		/* index to map values */
4476 
4477 	ASSERT(ret_nmap <= nmap);
4478 
4479 	for (i = 0; i < ret_nmap; i++) {
4480 		ASSERT(mval[i].br_blockcount > 0);
4481 		if (!(flags & XFS_BMAPI_ENTIRE)) {
4482 			ASSERT(mval[i].br_startoff >= bno);
4483 			ASSERT(mval[i].br_blockcount <= len);
4484 			ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
4485 			       bno + len);
4486 		} else {
4487 			ASSERT(mval[i].br_startoff < bno + len);
4488 			ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
4489 			       bno);
4490 		}
4491 		ASSERT(i == 0 ||
4492 		       mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
4493 		       mval[i].br_startoff);
4494 		if ((flags & XFS_BMAPI_WRITE) && !(flags & XFS_BMAPI_DELAY))
4495 			ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
4496 			       mval[i].br_startblock != HOLESTARTBLOCK);
4497 		ASSERT(mval[i].br_state == XFS_EXT_NORM ||
4498 		       mval[i].br_state == XFS_EXT_UNWRITTEN);
4499 	}
4500 }
4501 #endif /* DEBUG */
4502 
4503 
4504 /*
4505  * Map file blocks to filesystem blocks.
4506  * File range is given by the bno/len pair.
4507  * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set)
4508  * into a hole or past eof.
4509  * Only allocates blocks from a single allocation group,
4510  * to avoid locking problems.
4511  * The returned value in "firstblock" from the first call in a transaction
4512  * must be remembered and presented to subsequent calls in "firstblock".
4513  * An upper bound for the number of blocks to be allocated is supplied to
4514  * the first call in "total"; if no allocation group has that many free
4515  * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
4516  */
4517 int					/* error */
xfs_bmapi(xfs_trans_t * tp,xfs_inode_t * ip,xfs_fileoff_t bno,xfs_filblks_t len,int flags,xfs_fsblock_t * firstblock,xfs_extlen_t total,xfs_bmbt_irec_t * mval,int * nmap,xfs_bmap_free_t * flist)4518 xfs_bmapi(
4519 	xfs_trans_t	*tp,		/* transaction pointer */
4520 	xfs_inode_t	*ip,		/* incore inode */
4521 	xfs_fileoff_t	bno,		/* starting file offs. mapped */
4522 	xfs_filblks_t	len,		/* length to map in file */
4523 	int		flags,		/* XFS_BMAPI_... */
4524 	xfs_fsblock_t	*firstblock,	/* first allocated block
4525 					   controls a.g. for allocs */
4526 	xfs_extlen_t	total,		/* total blocks needed */
4527 	xfs_bmbt_irec_t	*mval,		/* output: map values */
4528 	int		*nmap,		/* i/o: mval size/count */
4529 	xfs_bmap_free_t	*flist)		/* i/o: list extents to free */
4530 {
4531 	xfs_fsblock_t	abno;		/* allocated block number */
4532 	xfs_extlen_t	alen;		/* allocated extent length */
4533 	xfs_fileoff_t	aoff;		/* allocated file offset */
4534 	xfs_bmalloca_t	bma;		/* args for xfs_bmap_alloc */
4535 	char		contig;		/* allocation must be one extent */
4536 	xfs_btree_cur_t	*cur;		/* bmap btree cursor */
4537 	char		delay;		/* this request is for delayed alloc */
4538 	xfs_fileoff_t	end;		/* end of mapped file region */
4539 	int		eof;		/* we've hit the end of extent list */
4540 	xfs_bmbt_rec_t	*ep;		/* extent list entry pointer */
4541 	int		error;		/* error return */
4542 	char		exact;		/* don't do all of wasdelayed extent */
4543 	xfs_bmbt_irec_t	got;		/* current extent list record */
4544 	xfs_ifork_t	*ifp;		/* inode fork pointer */
4545 	xfs_extlen_t	indlen;		/* indirect blocks length */
4546 	char		inhole;		/* current location is hole in file */
4547 	xfs_extnum_t	lastx;		/* last useful extent number */
4548 	int		logflags;	/* flags for transaction logging */
4549 	xfs_extlen_t	minleft;	/* min blocks left after allocation */
4550 	xfs_extlen_t	minlen;		/* min allocation size */
4551 	xfs_mount_t	*mp;		/* xfs mount structure */
4552 	int		n;		/* current extent index */
4553 	int		nallocs;	/* number of extents alloc\'d */
4554 	xfs_extnum_t	nextents;	/* number of extents in file */
4555 	xfs_fileoff_t	obno;		/* old block number (offset) */
4556 	xfs_bmbt_irec_t	prev;		/* previous extent list record */
4557 	char		stateless;	/* ignore state flag set */
4558 	int		tmp_logflags;	/* temp flags holder */
4559 	char		trim;		/* output trimmed to match range */
4560 	char		userdata;	/* allocating non-metadata */
4561 	char		wasdelay;	/* old extent was delayed */
4562 	int		whichfork;	/* data or attr fork */
4563 	char		wr;		/* this is a write request */
4564 	char		rsvd;		/* OK to allocate reserved blocks */
4565 #ifdef DEBUG
4566 	xfs_fileoff_t	orig_bno;	/* original block number value */
4567 	int		orig_flags;	/* original flags arg value */
4568 	xfs_filblks_t	orig_len;	/* original value of len arg */
4569 	xfs_bmbt_irec_t	*orig_mval;	/* original value of mval */
4570 	int		orig_nmap;	/* original value of *nmap */
4571 
4572 	orig_bno = bno;
4573 	orig_len = len;
4574 	orig_flags = flags;
4575 	orig_mval = mval;
4576 	orig_nmap = *nmap;
4577 #endif
4578 	ASSERT(*nmap >= 1);
4579 	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP || !(flags & XFS_BMAPI_WRITE));
4580 	whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4581 		XFS_ATTR_FORK : XFS_DATA_FORK;
4582 	mp = ip->i_mount;
4583 	if (unlikely(XFS_TEST_ERROR(
4584 	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4585 	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
4586 	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL),
4587 	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4588 		XFS_ERROR_REPORT("xfs_bmapi", XFS_ERRLEVEL_LOW, mp);
4589 		return XFS_ERROR(EFSCORRUPTED);
4590 	}
4591 	if (XFS_FORCED_SHUTDOWN(mp))
4592 		return XFS_ERROR(EIO);
4593 	ifp = XFS_IFORK_PTR(ip, whichfork);
4594 	ASSERT(ifp->if_ext_max ==
4595 	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
4596 	if ((wr = (flags & XFS_BMAPI_WRITE)) != 0)
4597 		XFS_STATS_INC(xs_blk_mapw);
4598 	else
4599 		XFS_STATS_INC(xs_blk_mapr);
4600 	delay = (flags & XFS_BMAPI_DELAY) != 0;
4601 	trim = (flags & XFS_BMAPI_ENTIRE) == 0;
4602 	userdata = (flags & XFS_BMAPI_METADATA) == 0;
4603 	exact = (flags & XFS_BMAPI_EXACT) != 0;
4604 	rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
4605 	contig = (flags & XFS_BMAPI_CONTIG) != 0;
4606 	/*
4607 	 * stateless is used to combine extents which
4608 	 * differ only due to the state of the extents.
4609 	 * This technique is used from xfs_getbmap()
4610 	 * when the caller does not wish to see the
4611 	 * separation (which is the default).
4612 	 *
4613 	 * This technique is also used when writing a
4614 	 * buffer which has been partially written,
4615 	 * (usually by being flushed during a chunkread),
4616 	 * to ensure one write takes place. This also
4617 	 * prevents a change in the xfs inode extents at
4618 	 * this time, intentionally. This change occurs
4619 	 * on completion of the write operation, in
4620 	 * xfs_strat_comp(), where the xfs_bmapi() call
4621 	 * is transactioned, and the extents combined.
4622 	 */
4623 	stateless = (flags & XFS_BMAPI_IGSTATE) != 0;
4624 	if (stateless && wr)	/* if writing unwritten space, no */
4625 		wr = 0;		/* allocations are allowed */
4626 	ASSERT(wr || !delay);
4627 	logflags = 0;
4628 	nallocs = 0;
4629 	cur = NULL;
4630 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
4631 		ASSERT(wr && tp);
4632 		if ((error = xfs_bmap_local_to_extents(tp, ip,
4633 				firstblock, total, &logflags, whichfork)))
4634 			goto error0;
4635 	}
4636 	if (wr && *firstblock == NULLFSBLOCK) {
4637 		if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
4638 			minleft = INT_GET(ifp->if_broot->bb_level, ARCH_CONVERT) + 1;
4639 		else
4640 			minleft = 1;
4641 	} else
4642 		minleft = 0;
4643 	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
4644 	    (error = xfs_iread_extents(tp, ip, whichfork)))
4645 		goto error0;
4646 	ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
4647 		&prev);
4648 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4649 	n = 0;
4650 	end = bno + len;
4651 	obno = bno;
4652 	bma.ip = NULL;
4653 	while (bno < end && n < *nmap) {
4654 		/*
4655 		 * Reading past eof, act as though there's a hole
4656 		 * up to end.
4657 		 */
4658 		if (eof && !wr)
4659 			got.br_startoff = end;
4660 		inhole = eof || got.br_startoff > bno;
4661 		wasdelay = wr && !inhole && !delay &&
4662 			ISNULLSTARTBLOCK(got.br_startblock);
4663 		/*
4664 		 * First, deal with the hole before the allocated space
4665 		 * that we found, if any.
4666 		 */
4667 		if (wr && (inhole || wasdelay)) {
4668 			/*
4669 			 * For the wasdelay case, we could also just
4670 			 * allocate the stuff asked for in this bmap call
4671 			 * but that wouldn't be as good.
4672 			 */
4673 			if (wasdelay && !exact) {
4674 				alen = (xfs_extlen_t)got.br_blockcount;
4675 				aoff = got.br_startoff;
4676 				if (lastx != NULLEXTNUM && lastx) {
4677 					ep = &ifp->if_u1.if_extents[lastx - 1];
4678 					xfs_bmbt_get_all(ep, &prev);
4679 				}
4680 			} else if (wasdelay) {
4681 				alen = (xfs_extlen_t)
4682 					XFS_FILBLKS_MIN(len,
4683 						(got.br_startoff +
4684 						 got.br_blockcount) - bno);
4685 				aoff = bno;
4686 			} else {
4687 				alen = (xfs_extlen_t)
4688 					XFS_FILBLKS_MIN(len, MAXEXTLEN);
4689 				if (!eof)
4690 					alen = (xfs_extlen_t)
4691 						XFS_FILBLKS_MIN(alen,
4692 							got.br_startoff - bno);
4693 				aoff = bno;
4694 			}
4695 			minlen = contig ? alen : 1;
4696 			if (delay) {
4697 				indlen = (xfs_extlen_t)
4698 					xfs_bmap_worst_indlen(ip, alen);
4699 				ASSERT(indlen > 0);
4700 				/*
4701 				 * Make a transaction-less quota reservation for
4702 				 * delayed allocation blocks. This number gets
4703 				 * adjusted later.
4704 				 * We return EDQUOT if we haven't allocated
4705 				 * blks already inside this loop;
4706 				 */
4707 				if (XFS_TRANS_RESERVE_BLKQUOTA(
4708 						mp, NULL, ip, (long)alen)) {
4709 					if (n == 0) {
4710 						*nmap = 0;
4711 						ASSERT(cur == NULL);
4712 						return XFS_ERROR(EDQUOT);
4713 					}
4714 					break;
4715 				}
4716 
4717 				/*
4718 				 * Split changing sb for alen and indlen since
4719 				 * they could be coming from different places.
4720 				 */
4721 				if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) {
4722 					xfs_extlen_t	extsz;
4723 					xfs_extlen_t	ralen;
4724 					if (!(extsz = ip->i_d.di_extsize))
4725 						extsz = mp->m_sb.sb_rextsize;
4726 					ralen = roundup(alen, extsz);
4727 					ralen = ralen / mp->m_sb.sb_rextsize;
4728 					if (xfs_mod_incore_sb(mp,
4729 						XFS_SBS_FREXTENTS,
4730 						-(ralen), rsvd)) {
4731 						if (XFS_IS_QUOTA_ON(ip->i_mount))
4732 							XFS_TRANS_UNRESERVE_BLKQUOTA(
4733 						     		mp, NULL, ip,
4734 								(long)alen);
4735 						break;
4736 					}
4737 				} else {
4738 					if (xfs_mod_incore_sb(mp,
4739 							      XFS_SBS_FDBLOCKS,
4740 							      -(alen), rsvd)) {
4741 						if (XFS_IS_QUOTA_ON(ip->i_mount))
4742 							XFS_TRANS_UNRESERVE_BLKQUOTA(
4743 								mp, NULL, ip,
4744 								(long)alen);
4745 						break;
4746 					}
4747 				}
4748 
4749 				if (xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
4750 						-(indlen), rsvd)) {
4751 					XFS_TRANS_UNRESERVE_BLKQUOTA(
4752 						mp, NULL, ip, (long)alen);
4753 					break;
4754 				}
4755 				ip->i_delayed_blks += alen;
4756 				abno = NULLSTARTBLOCK(indlen);
4757 			} else {
4758 				/*
4759 				 * If first time, allocate and fill in
4760 				 * once-only bma fields.
4761 				 */
4762 				if (bma.ip == NULL) {
4763 					bma.tp = tp;
4764 					bma.ip = ip;
4765 					bma.prevp = &prev;
4766 					bma.gotp = &got;
4767 					bma.total = total;
4768 					bma.userdata = 0;
4769 				}
4770 				/* Indicate if this is the first user data
4771 				 * in the file, or just any user data.
4772 				 */
4773 				if (userdata) {
4774 					bma.userdata = (aoff == 0) ?
4775 						XFS_ALLOC_INITIAL_USER_DATA :
4776 						XFS_ALLOC_USERDATA;
4777 				}
4778 				/*
4779 				 * Fill in changeable bma fields.
4780 				 */
4781 				bma.eof = eof;
4782 				bma.firstblock = *firstblock;
4783 				bma.alen = alen;
4784 				bma.off = aoff;
4785 				bma.wasdel = wasdelay;
4786 				bma.minlen = minlen;
4787 				bma.low = flist->xbf_low;
4788 				bma.minleft = minleft;
4789 				/*
4790 				 * Only want to do the alignment at the
4791 				 * eof if it is userdata and allocation length
4792 				 * is larger than a stripe unit.
4793 				 */
4794 				if (mp->m_dalign && alen >= mp->m_dalign &&
4795 				    userdata && whichfork == XFS_DATA_FORK) {
4796 					if ((error = xfs_bmap_isaeof(ip, aoff,
4797 							whichfork, &bma.aeof)))
4798 						goto error0;
4799 				} else
4800 					bma.aeof = 0;
4801 				/*
4802 				 * Call allocator.
4803 				 */
4804 				if ((error = xfs_bmap_alloc(&bma)))
4805 					goto error0;
4806 				/*
4807 				 * Copy out result fields.
4808 				 */
4809 				abno = bma.rval;
4810 				if ((flist->xbf_low = bma.low))
4811 					minleft = 0;
4812 				alen = bma.alen;
4813 				aoff = bma.off;
4814 				ASSERT(*firstblock == NULLFSBLOCK ||
4815 				       XFS_FSB_TO_AGNO(mp, *firstblock) ==
4816 				       XFS_FSB_TO_AGNO(mp, bma.firstblock) ||
4817 				       (flist->xbf_low &&
4818 					XFS_FSB_TO_AGNO(mp, *firstblock) <
4819 					XFS_FSB_TO_AGNO(mp, bma.firstblock)));
4820 				*firstblock = bma.firstblock;
4821 				if (cur)
4822 					cur->bc_private.b.firstblock =
4823 						*firstblock;
4824 				if (abno == NULLFSBLOCK)
4825 					break;
4826 				if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
4827 					cur = xfs_btree_init_cursor(mp,
4828 						tp, NULL, 0, XFS_BTNUM_BMAP,
4829 						ip, whichfork);
4830 					cur->bc_private.b.firstblock =
4831 						*firstblock;
4832 					cur->bc_private.b.flist = flist;
4833 				}
4834 				/*
4835 				 * Bump the number of extents we've allocated
4836 				 * in this call.
4837 				 */
4838 				nallocs++;
4839 			}
4840 			if (cur)
4841 				cur->bc_private.b.flags =
4842 					wasdelay ? XFS_BTCUR_BPRV_WASDEL : 0;
4843 			got.br_startoff = aoff;
4844 			got.br_startblock = abno;
4845 			got.br_blockcount = alen;
4846 			got.br_state = XFS_EXT_NORM;	/* assume normal */
4847 			/*
4848 			 * Determine state of extent, and the filesystem.
4849 			 * A wasdelay extent has been initialized, so
4850 			 * shouldn't be flagged as unwritten.
4851 			 */
4852 			if (wr && XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
4853 				if (!wasdelay && (flags & XFS_BMAPI_PREALLOC))
4854 					got.br_state = XFS_EXT_UNWRITTEN;
4855 			}
4856 			error = xfs_bmap_add_extent(ip, lastx, &cur, &got,
4857 				firstblock, flist, &tmp_logflags, whichfork,
4858 				rsvd);
4859 			logflags |= tmp_logflags;
4860 			if (error)
4861 				goto error0;
4862 			lastx = ifp->if_lastex;
4863 			ep = &ifp->if_u1.if_extents[lastx];
4864 			nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4865 			xfs_bmbt_get_all(ep, &got);
4866 			ASSERT(got.br_startoff <= aoff);
4867 			ASSERT(got.br_startoff + got.br_blockcount >=
4868 				aoff + alen);
4869 #ifdef DEBUG
4870 			if (delay) {
4871 				ASSERT(ISNULLSTARTBLOCK(got.br_startblock));
4872 				ASSERT(STARTBLOCKVAL(got.br_startblock) > 0);
4873 			}
4874 			ASSERT(got.br_state == XFS_EXT_NORM ||
4875 			       got.br_state == XFS_EXT_UNWRITTEN);
4876 #endif
4877 			/*
4878 			 * Fall down into the found allocated space case.
4879 			 */
4880 		} else if (inhole) {
4881 			/*
4882 			 * Reading in a hole.
4883 			 */
4884 			mval->br_startoff = bno;
4885 			mval->br_startblock = HOLESTARTBLOCK;
4886 			mval->br_blockcount =
4887 				XFS_FILBLKS_MIN(len, got.br_startoff - bno);
4888 			mval->br_state = XFS_EXT_NORM;
4889 			bno += mval->br_blockcount;
4890 			len -= mval->br_blockcount;
4891 			mval++;
4892 			n++;
4893 			continue;
4894 		}
4895 		/*
4896 		 * Then deal with the allocated space we found.
4897 		 */
4898 		ASSERT(ep != NULL);
4899 		if (trim && (got.br_startoff + got.br_blockcount > obno)) {
4900 			if (obno > bno)
4901 				bno = obno;
4902 			ASSERT((bno >= obno) || (n == 0));
4903 			ASSERT(bno < end);
4904 			mval->br_startoff = bno;
4905 			if (ISNULLSTARTBLOCK(got.br_startblock)) {
4906 				ASSERT(!wr || delay);
4907 				mval->br_startblock = DELAYSTARTBLOCK;
4908 			} else
4909 				mval->br_startblock =
4910 					got.br_startblock +
4911 					(bno - got.br_startoff);
4912 			/*
4913 			 * Return the minimum of what we got and what we
4914 			 * asked for for the length.  We can use the len
4915 			 * variable here because it is modified below
4916 			 * and we could have been there before coming
4917 			 * here if the first part of the allocation
4918 			 * didn't overlap what was asked for.
4919 			 */
4920 			mval->br_blockcount =
4921 				XFS_FILBLKS_MIN(end - bno, got.br_blockcount -
4922 					(bno - got.br_startoff));
4923 			mval->br_state = got.br_state;
4924 			ASSERT(mval->br_blockcount <= len);
4925 		} else {
4926 			*mval = got;
4927 			if (ISNULLSTARTBLOCK(mval->br_startblock)) {
4928 				ASSERT(!wr || delay);
4929 				mval->br_startblock = DELAYSTARTBLOCK;
4930 			}
4931 		}
4932 
4933 		/*
4934 		 * Check if writing previously allocated but
4935 		 * unwritten extents.
4936 		 */
4937 		if (wr && mval->br_state == XFS_EXT_UNWRITTEN &&
4938 		    ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) {
4939 			/*
4940 			 * Modify (by adding) the state flag, if writing.
4941 			 */
4942 			ASSERT(mval->br_blockcount <= len);
4943 			if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
4944 				cur = xfs_btree_init_cursor(mp,
4945 					tp, NULL, 0, XFS_BTNUM_BMAP,
4946 					ip, whichfork);
4947 				cur->bc_private.b.firstblock =
4948 					*firstblock;
4949 				cur->bc_private.b.flist = flist;
4950 			}
4951 			mval->br_state = XFS_EXT_NORM;
4952 			error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
4953 				firstblock, flist, &tmp_logflags, whichfork,
4954 				rsvd);
4955 			logflags |= tmp_logflags;
4956 			if (error)
4957 				goto error0;
4958 			lastx = ifp->if_lastex;
4959 			ep = &ifp->if_u1.if_extents[lastx];
4960 			nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
4961 			xfs_bmbt_get_all(ep, &got);
4962 			/*
4963 			 * We may have combined previously unwritten
4964 			 * space with written space, so generate
4965 			 * another request.
4966 			 */
4967 			if (mval->br_blockcount < len)
4968 				continue;
4969 		}
4970 
4971 		ASSERT(!trim ||
4972 		       ((mval->br_startoff + mval->br_blockcount) <= end));
4973 		ASSERT(!trim || (mval->br_blockcount <= len) ||
4974 		       (mval->br_startoff < obno));
4975 		bno = mval->br_startoff + mval->br_blockcount;
4976 		len = end - bno;
4977 		if (n > 0 && mval->br_startoff == mval[-1].br_startoff) {
4978 			ASSERT(mval->br_startblock == mval[-1].br_startblock);
4979 			ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
4980 			ASSERT(mval->br_state == mval[-1].br_state);
4981 			mval[-1].br_blockcount = mval->br_blockcount;
4982 			mval[-1].br_state = mval->br_state;
4983 		} else if (n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
4984 			   mval[-1].br_startblock != DELAYSTARTBLOCK &&
4985 			   mval[-1].br_startblock != HOLESTARTBLOCK &&
4986 			   mval->br_startblock ==
4987 			   mval[-1].br_startblock + mval[-1].br_blockcount &&
4988 			   (stateless || mval[-1].br_state == mval->br_state)) {
4989 			ASSERT(mval->br_startoff ==
4990 			       mval[-1].br_startoff + mval[-1].br_blockcount);
4991 			mval[-1].br_blockcount += mval->br_blockcount;
4992 		} else if (n > 0 &&
4993 			   mval->br_startblock == DELAYSTARTBLOCK &&
4994 			   mval[-1].br_startblock == DELAYSTARTBLOCK &&
4995 			   mval->br_startoff ==
4996 			   mval[-1].br_startoff + mval[-1].br_blockcount) {
4997 			mval[-1].br_blockcount += mval->br_blockcount;
4998 			mval[-1].br_state = mval->br_state;
4999 		} else if (!((n == 0) &&
5000 			     ((mval->br_startoff + mval->br_blockcount) <=
5001 			      obno))) {
5002 			mval++;
5003 			n++;
5004 		}
5005 		/*
5006 		 * If we're done, stop now.  Stop when we've allocated
5007 		 * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
5008 		 * the transaction may get too big.
5009 		 */
5010 		if (bno >= end || n >= *nmap || nallocs >= *nmap)
5011 			break;
5012 		/*
5013 		 * Else go on to the next record.
5014 		 */
5015 		ep++;
5016 		lastx++;
5017 		if (lastx >= nextents) {
5018 			eof = 1;
5019 			prev = got;
5020 		} else
5021 			xfs_bmbt_get_all(ep, &got);
5022 	}
5023 	ifp->if_lastex = lastx;
5024 	*nmap = n;
5025 	/*
5026 	 * Transform from btree to extents, give it cur.
5027 	 */
5028 	if (tp && XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
5029 	    XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
5030 		ASSERT(wr && cur);
5031 		error = xfs_bmap_btree_to_extents(tp, ip, cur,
5032 			&tmp_logflags, whichfork);
5033 		logflags |= tmp_logflags;
5034 		if (error)
5035 			goto error0;
5036 	}
5037 	ASSERT(ifp->if_ext_max ==
5038 	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5039 	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
5040 	       XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max);
5041 	error = 0;
5042 
5043 error0:
5044 	/*
5045 	 * Log everything.  Do this after conversion, there's no point in
5046 	 * logging the extent list if we've converted to btree format.
5047 	 */
5048 	if ((logflags & XFS_ILOG_FEXT(whichfork)) &&
5049 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
5050 		logflags &= ~XFS_ILOG_FEXT(whichfork);
5051 	else if ((logflags & XFS_ILOG_FBROOT(whichfork)) &&
5052 		 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
5053 		logflags &= ~XFS_ILOG_FBROOT(whichfork);
5054 	/*
5055 	 * Log whatever the flags say, even if error.  Otherwise we might miss
5056 	 * detecting a case where the data is changed, there's an error,
5057 	 * and it's not logged so we don't shutdown when we should.
5058 	 */
5059 	if (logflags) {
5060 		ASSERT(tp && wr);
5061 		xfs_trans_log_inode(tp, ip, logflags);
5062 	}
5063 	if (cur) {
5064 		if (!error) {
5065 			ASSERT(*firstblock == NULLFSBLOCK ||
5066 			       XFS_FSB_TO_AGNO(mp, *firstblock) ==
5067 			       XFS_FSB_TO_AGNO(mp,
5068 				       cur->bc_private.b.firstblock) ||
5069 			       (flist->xbf_low &&
5070 				XFS_FSB_TO_AGNO(mp, *firstblock) <
5071 				XFS_FSB_TO_AGNO(mp,
5072 					cur->bc_private.b.firstblock)));
5073 			*firstblock = cur->bc_private.b.firstblock;
5074 		}
5075 		xfs_btree_del_cursor(cur,
5076 			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5077 	}
5078 	if (!error)
5079 		xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
5080 			orig_nmap, *nmap);
5081 	return error;
5082 }
5083 
5084 /*
5085  * Map file blocks to filesystem blocks, simple version.
5086  * One block (extent) only, read-only.
5087  * For flags, only the XFS_BMAPI_ATTRFORK flag is examined.
5088  * For the other flag values, the effect is as if XFS_BMAPI_METADATA
5089  * was set and all the others were clear.
5090  */
5091 int						/* error */
xfs_bmapi_single(xfs_trans_t * tp,xfs_inode_t * ip,int whichfork,xfs_fsblock_t * fsb,xfs_fileoff_t bno)5092 xfs_bmapi_single(
5093 	xfs_trans_t	*tp,		/* transaction pointer */
5094 	xfs_inode_t	*ip,		/* incore inode */
5095 	int		whichfork,	/* data or attr fork */
5096 	xfs_fsblock_t	*fsb,		/* output: mapped block */
5097 	xfs_fileoff_t	bno)		/* starting file offs. mapped */
5098 {
5099 	int		eof;		/* we've hit the end of extent list */
5100 	int		error;		/* error return */
5101 	xfs_bmbt_irec_t	got;		/* current extent list record */
5102 	xfs_ifork_t	*ifp;		/* inode fork pointer */
5103 	xfs_extnum_t	lastx;		/* last useful extent number */
5104 	xfs_bmbt_irec_t	prev;		/* previous extent list record */
5105 
5106 	ifp = XFS_IFORK_PTR(ip, whichfork);
5107 	if (unlikely(
5108 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
5109 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)) {
5110 	       XFS_ERROR_REPORT("xfs_bmapi_single", XFS_ERRLEVEL_LOW,
5111 				ip->i_mount);
5112 	       return XFS_ERROR(EFSCORRUPTED);
5113 	}
5114 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
5115 		return XFS_ERROR(EIO);
5116 	XFS_STATS_INC(xs_blk_mapr);
5117 	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5118 	    (error = xfs_iread_extents(tp, ip, whichfork)))
5119 		return error;
5120 	(void)xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
5121 		&prev);
5122 	/*
5123 	 * Reading past eof, act as though there's a hole
5124 	 * up to end.
5125 	 */
5126 	if (eof || got.br_startoff > bno) {
5127 		*fsb = NULLFSBLOCK;
5128 		return 0;
5129 	}
5130 	ASSERT(!ISNULLSTARTBLOCK(got.br_startblock));
5131 	ASSERT(bno < got.br_startoff + got.br_blockcount);
5132 	*fsb = got.br_startblock + (bno - got.br_startoff);
5133 	ifp->if_lastex = lastx;
5134 	return 0;
5135 }
5136 
5137 /*
5138  * Unmap (remove) blocks from a file.
5139  * If nexts is nonzero then the number of extents to remove is limited to
5140  * that value.  If not all extents in the block range can be removed then
5141  * *done is set.
5142  */
5143 int						/* error */
xfs_bunmapi(xfs_trans_t * tp,struct xfs_inode * ip,xfs_fileoff_t bno,xfs_filblks_t len,int flags,xfs_extnum_t nexts,xfs_fsblock_t * firstblock,xfs_bmap_free_t * flist,int * done)5144 xfs_bunmapi(
5145 	xfs_trans_t		*tp,		/* transaction pointer */
5146 	struct xfs_inode	*ip,		/* incore inode */
5147 	xfs_fileoff_t		bno,		/* starting offset to unmap */
5148 	xfs_filblks_t		len,		/* length to unmap in file */
5149 	int			flags,		/* misc flags */
5150 	xfs_extnum_t		nexts,		/* number of extents max */
5151 	xfs_fsblock_t		*firstblock,	/* first allocated block
5152 						   controls a.g. for allocs */
5153 	xfs_bmap_free_t		*flist,		/* i/o: list extents to free */
5154 	int			*done)		/* set if not done yet */
5155 {
5156 	xfs_btree_cur_t		*cur;		/* bmap btree cursor */
5157 	xfs_bmbt_irec_t		del;		/* extent being deleted */
5158 	int			eof;		/* is deleting at eof */
5159 	xfs_bmbt_rec_t		*ep;		/* extent list entry pointer */
5160 	int			error;		/* error return value */
5161 	xfs_extnum_t		extno;		/* extent number in list */
5162 	xfs_bmbt_irec_t		got;		/* current extent list entry */
5163 	xfs_ifork_t		*ifp;		/* inode fork pointer */
5164 	int			isrt;		/* freeing in rt area */
5165 	xfs_extnum_t		lastx;		/* last extent index used */
5166 	int			logflags;	/* transaction logging flags */
5167 	xfs_extlen_t		mod;		/* rt extent offset */
5168 	xfs_mount_t		*mp;		/* mount structure */
5169 	xfs_extnum_t		nextents;	/* size of extent list */
5170 	xfs_bmbt_irec_t		prev;		/* previous extent list entry */
5171 	xfs_fileoff_t		start;		/* first file offset deleted */
5172 	int			tmp_logflags;	/* partial logging flags */
5173 	int			wasdel;		/* was a delayed alloc extent */
5174 	int			whichfork;	/* data or attribute fork */
5175 	int			rsvd;		/* OK to allocate reserved blocks */
5176 	xfs_fsblock_t		sum;
5177 
5178 	xfs_bunmap_trace(ip, bno, len, flags, (inst_t *)__return_address);
5179 	whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
5180 		XFS_ATTR_FORK : XFS_DATA_FORK;
5181 	ifp = XFS_IFORK_PTR(ip, whichfork);
5182 	if (unlikely(
5183 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5184 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
5185 		XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
5186 				 ip->i_mount);
5187 		return XFS_ERROR(EFSCORRUPTED);
5188 	}
5189 	mp = ip->i_mount;
5190 	if (XFS_FORCED_SHUTDOWN(mp))
5191 		return XFS_ERROR(EIO);
5192 	rsvd = (flags & XFS_BMAPI_RSVBLOCKS) != 0;
5193 	ASSERT(len > 0);
5194 	ASSERT(nexts >= 0);
5195 	ASSERT(ifp->if_ext_max ==
5196 	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5197 	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5198 	    (error = xfs_iread_extents(tp, ip, whichfork)))
5199 		return error;
5200 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
5201 	if (nextents == 0) {
5202 		*done = 1;
5203 		return 0;
5204 	}
5205 	XFS_STATS_INC(xs_blk_unmap);
5206 	isrt = (whichfork == XFS_DATA_FORK) &&
5207 	       (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
5208 	start = bno;
5209 	bno = start + len - 1;
5210 	ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
5211 		&prev);
5212 	/*
5213 	 * Check to see if the given block number is past the end of the
5214 	 * file, back up to the last block if so...
5215 	 */
5216 	if (eof) {
5217 		ep = &ifp->if_u1.if_extents[--lastx];
5218 		xfs_bmbt_get_all(ep, &got);
5219 		bno = got.br_startoff + got.br_blockcount - 1;
5220 	}
5221 	logflags = 0;
5222 	if (ifp->if_flags & XFS_IFBROOT) {
5223 		ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5224 		cur = xfs_btree_init_cursor(mp, tp, NULL, 0, XFS_BTNUM_BMAP, ip,
5225 			whichfork);
5226 		cur->bc_private.b.firstblock = *firstblock;
5227 		cur->bc_private.b.flist = flist;
5228 		cur->bc_private.b.flags = 0;
5229 	} else
5230 		cur = NULL;
5231 	extno = 0;
5232 	while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
5233 	       (nexts == 0 || extno < nexts)) {
5234 		/*
5235 		 * Is the found extent after a hole in which bno lives?
5236 		 * Just back up to the previous extent, if so.
5237 		 */
5238 		if (got.br_startoff > bno) {
5239 			if (--lastx < 0)
5240 				break;
5241 			ep--;
5242 			xfs_bmbt_get_all(ep, &got);
5243 		}
5244 		/*
5245 		 * Is the last block of this extent before the range
5246 		 * we're supposed to delete?  If so, we're done.
5247 		 */
5248 		bno = XFS_FILEOFF_MIN(bno,
5249 			got.br_startoff + got.br_blockcount - 1);
5250 		if (bno < start)
5251 			break;
5252 		/*
5253 		 * Then deal with the (possibly delayed) allocated space
5254 		 * we found.
5255 		 */
5256 		ASSERT(ep != NULL);
5257 		del = got;
5258 		wasdel = ISNULLSTARTBLOCK(del.br_startblock);
5259 		if (got.br_startoff < start) {
5260 			del.br_startoff = start;
5261 			del.br_blockcount -= start - got.br_startoff;
5262 			if (!wasdel)
5263 				del.br_startblock += start - got.br_startoff;
5264 		}
5265 		if (del.br_startoff + del.br_blockcount > bno + 1)
5266 			del.br_blockcount = bno + 1 - del.br_startoff;
5267 		sum = del.br_startblock + del.br_blockcount;
5268 		if (isrt &&
5269 		    (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
5270 			/*
5271 			 * Realtime extent not lined up at the end.
5272 			 * The extent could have been split into written
5273 			 * and unwritten pieces, or we could just be
5274 			 * unmapping part of it.  But we can't really
5275 			 * get rid of part of a realtime extent.
5276 			 */
5277 			if (del.br_state == XFS_EXT_UNWRITTEN ||
5278 			    !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
5279 				/*
5280 				 * This piece is unwritten, or we're not
5281 				 * using unwritten extents.  Skip over it.
5282 				 */
5283 				ASSERT(bno >= mod);
5284 				bno -= mod > del.br_blockcount ?
5285 					del.br_blockcount : mod;
5286 				if (bno < got.br_startoff) {
5287 					if (--lastx >= 0)
5288 						xfs_bmbt_get_all(--ep, &got);
5289 				}
5290 				continue;
5291 			}
5292 			/*
5293 			 * It's written, turn it unwritten.
5294 			 * This is better than zeroing it.
5295 			 */
5296 			ASSERT(del.br_state == XFS_EXT_NORM);
5297 			ASSERT(xfs_trans_get_block_res(tp) > 0);
5298 			/*
5299 			 * If this spans a realtime extent boundary,
5300 			 * chop it back to the start of the one we end at.
5301 			 */
5302 			if (del.br_blockcount > mod) {
5303 				del.br_startoff += del.br_blockcount - mod;
5304 				del.br_startblock += del.br_blockcount - mod;
5305 				del.br_blockcount = mod;
5306 			}
5307 			del.br_state = XFS_EXT_UNWRITTEN;
5308 			error = xfs_bmap_add_extent(ip, lastx, &cur, &del,
5309 				firstblock, flist, &logflags, XFS_DATA_FORK, 0);
5310 			if (error)
5311 				goto error0;
5312 			goto nodelete;
5313 		}
5314 		if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
5315 			/*
5316 			 * Realtime extent is lined up at the end but not
5317 			 * at the front.  We'll get rid of full extents if
5318 			 * we can.
5319 			 */
5320 			mod = mp->m_sb.sb_rextsize - mod;
5321 			if (del.br_blockcount > mod) {
5322 				del.br_blockcount -= mod;
5323 				del.br_startoff += mod;
5324 				del.br_startblock += mod;
5325 			} else if ((del.br_startoff == start &&
5326 				    (del.br_state == XFS_EXT_UNWRITTEN ||
5327 				     xfs_trans_get_block_res(tp) == 0)) ||
5328 				   !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
5329 				/*
5330 				 * Can't make it unwritten.  There isn't
5331 				 * a full extent here so just skip it.
5332 				 */
5333 				ASSERT(bno >= del.br_blockcount);
5334 				bno -= del.br_blockcount;
5335 				if (bno < got.br_startoff) {
5336 					if (--lastx >= 0)
5337 						xfs_bmbt_get_all(--ep, &got);
5338 				}
5339 				continue;
5340 			} else if (del.br_state == XFS_EXT_UNWRITTEN) {
5341 				/*
5342 				 * This one is already unwritten.
5343 				 * It must have a written left neighbor.
5344 				 * Unwrite the killed part of that one and
5345 				 * try again.
5346 				 */
5347 				ASSERT(lastx > 0);
5348 				xfs_bmbt_get_all(ep - 1, &prev);
5349 				ASSERT(prev.br_state == XFS_EXT_NORM);
5350 				ASSERT(!ISNULLSTARTBLOCK(prev.br_startblock));
5351 				ASSERT(del.br_startblock ==
5352 				       prev.br_startblock + prev.br_blockcount);
5353 				if (prev.br_startoff < start) {
5354 					mod = start - prev.br_startoff;
5355 					prev.br_blockcount -= mod;
5356 					prev.br_startblock += mod;
5357 					prev.br_startoff = start;
5358 				}
5359 				prev.br_state = XFS_EXT_UNWRITTEN;
5360 				error = xfs_bmap_add_extent(ip, lastx - 1, &cur,
5361 					&prev, firstblock, flist, &logflags,
5362 					XFS_DATA_FORK, 0);
5363 				if (error)
5364 					goto error0;
5365 				goto nodelete;
5366 			} else {
5367 				ASSERT(del.br_state == XFS_EXT_NORM);
5368 				del.br_state = XFS_EXT_UNWRITTEN;
5369 				error = xfs_bmap_add_extent(ip, lastx, &cur,
5370 					&del, firstblock, flist, &logflags,
5371 					XFS_DATA_FORK, 0);
5372 				if (error)
5373 					goto error0;
5374 				goto nodelete;
5375 			}
5376 		}
5377 		if (wasdel) {
5378 			ASSERT(STARTBLOCKVAL(del.br_startblock) > 0);
5379 			xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
5380 				(int)del.br_blockcount, rsvd);
5381 			/* Unreserve our quota space */
5382 			XFS_TRANS_RESERVE_QUOTA_NBLKS(
5383 				mp, NULL, ip, -((long)del.br_blockcount), 0,
5384 				isrt ?	XFS_QMOPT_RES_RTBLKS :
5385 					XFS_QMOPT_RES_REGBLKS);
5386 			ip->i_delayed_blks -= del.br_blockcount;
5387 			if (cur)
5388 				cur->bc_private.b.flags |=
5389 					XFS_BTCUR_BPRV_WASDEL;
5390 		} else if (cur)
5391 			cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
5392 		/*
5393 		 * If it's the case where the directory code is running
5394 		 * with no block reservation, and the deleted block is in
5395 		 * the middle of its extent, and the resulting insert
5396 		 * of an extent would cause transformation to btree format,
5397 		 * then reject it.  The calling code will then swap
5398 		 * blocks around instead.
5399 		 * We have to do this now, rather than waiting for the
5400 		 * conversion to btree format, since the transaction
5401 		 * will be dirty.
5402 		 */
5403 		if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
5404 		    XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
5405 		    XFS_IFORK_NEXTENTS(ip, whichfork) >= ifp->if_ext_max &&
5406 		    del.br_startoff > got.br_startoff &&
5407 		    del.br_startoff + del.br_blockcount <
5408 		    got.br_startoff + got.br_blockcount) {
5409 			error = XFS_ERROR(ENOSPC);
5410 			goto error0;
5411 		}
5412 		error = xfs_bmap_del_extent(ip, tp, lastx, flist, cur, &del,
5413 			&tmp_logflags, whichfork, rsvd);
5414 		logflags |= tmp_logflags;
5415 		if (error)
5416 			goto error0;
5417 		bno = del.br_startoff - 1;
5418 nodelete:
5419 		lastx = ifp->if_lastex;
5420 		/*
5421 		 * If not done go on to the next (previous) record.
5422 		 * Reset ep in case the extents array was re-alloced.
5423 		 */
5424 		ep = &ifp->if_u1.if_extents[lastx];
5425 		if (bno != (xfs_fileoff_t)-1 && bno >= start) {
5426 			if (lastx >= XFS_IFORK_NEXTENTS(ip, whichfork) ||
5427 			    xfs_bmbt_get_startoff(ep) > bno) {
5428 				lastx--;
5429 				ep--;
5430 			}
5431 			if (lastx >= 0)
5432 				xfs_bmbt_get_all(ep, &got);
5433 			extno++;
5434 		}
5435 	}
5436 	ifp->if_lastex = lastx;
5437 	*done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
5438 	ASSERT(ifp->if_ext_max ==
5439 	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5440 	/*
5441 	 * Convert to a btree if necessary.
5442 	 */
5443 	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
5444 	    XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) {
5445 		ASSERT(cur == NULL);
5446 		error = xfs_bmap_extents_to_btree(tp, ip, firstblock, flist,
5447 			&cur, 0, &tmp_logflags, whichfork);
5448 		logflags |= tmp_logflags;
5449 		if (error)
5450 			goto error0;
5451 	}
5452 	/*
5453 	 * transform from btree to extents, give it cur
5454 	 */
5455 	else if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
5456 		 XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
5457 		ASSERT(cur != NULL);
5458 		error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
5459 			whichfork);
5460 		logflags |= tmp_logflags;
5461 		if (error)
5462 			goto error0;
5463 	}
5464 	/*
5465 	 * transform from extents to local?
5466 	 */
5467 	ASSERT(ifp->if_ext_max ==
5468 	       XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
5469 	error = 0;
5470 error0:
5471 	/*
5472 	 * Log everything.  Do this after conversion, there's no point in
5473 	 * logging the extent list if we've converted to btree format.
5474 	 */
5475 	if ((logflags & XFS_ILOG_FEXT(whichfork)) &&
5476 	    XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
5477 		logflags &= ~XFS_ILOG_FEXT(whichfork);
5478 	else if ((logflags & XFS_ILOG_FBROOT(whichfork)) &&
5479 		 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
5480 		logflags &= ~XFS_ILOG_FBROOT(whichfork);
5481 	/*
5482 	 * Log inode even in the error case, if the transaction
5483 	 * is dirty we'll need to shut down the filesystem.
5484 	 */
5485 	if (logflags)
5486 		xfs_trans_log_inode(tp, ip, logflags);
5487 	if (cur) {
5488 		if (!error) {
5489 			*firstblock = cur->bc_private.b.firstblock;
5490 			cur->bc_private.b.allocated = 0;
5491 		}
5492 		xfs_btree_del_cursor(cur,
5493 			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5494 	}
5495 	return error;
5496 }
5497 
5498 /*
5499  * Fcntl interface to xfs_bmapi.
5500  */
5501 int						/* error code */
xfs_getbmap(bhv_desc_t * bdp,struct getbmap * bmv,void __user * ap,int interface)5502 xfs_getbmap(
5503 	bhv_desc_t		*bdp,		/* XFS behavior descriptor*/
5504 	struct getbmap		*bmv,		/* user bmap structure */
5505 	void			__user *ap,	/* pointer to user's array */
5506 	int			interface)	/* interface flags */
5507 {
5508 	__int64_t		bmvend;		/* last block requested */
5509 	int			error;		/* return value */
5510 	__int64_t		fixlen;		/* length for -1 case */
5511 	int			i;		/* extent number */
5512 	xfs_inode_t		*ip;		/* xfs incore inode pointer */
5513 	vnode_t			*vp;		/* corresponding vnode */
5514 	int			lock;		/* lock state */
5515 	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
5516 	xfs_mount_t		*mp;		/* file system mount point */
5517 	int			nex;		/* # of user extents can do */
5518 	int			nexleft;	/* # of user extents left */
5519 	int			subnex;		/* # of bmapi's can do */
5520 	int			nmap;		/* number of map entries */
5521 	struct getbmap		out;		/* output structure */
5522 	int			whichfork;	/* data or attr fork */
5523 	int			prealloced;	/* this is a file with
5524 						 * preallocated data space */
5525 	int			sh_unwritten;	/* true, if unwritten */
5526 						/* extents listed separately */
5527 	int			bmapi_flags;	/* flags for xfs_bmapi */
5528 	__int32_t		oflags;		/* getbmapx bmv_oflags field */
5529 
5530 	vp = BHV_TO_VNODE(bdp);
5531 	ip = XFS_BHVTOI(bdp);
5532 	mp = ip->i_mount;
5533 
5534 	whichfork = interface & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
5535 	sh_unwritten = (interface & BMV_IF_PREALLOC) != 0;
5536 
5537 	/*	If the BMV_IF_NO_DMAPI_READ interface bit specified, do not
5538 	 *	generate a DMAPI read event.  Otherwise, if the DM_EVENT_READ
5539 	 *	bit is set for the file, generate a read event in order
5540 	 *	that the DMAPI application may do its thing before we return
5541 	 *	the extents.  Usually this means restoring user file data to
5542 	 *	regions of the file that look like holes.
5543 	 *
5544 	 *	The "old behavior" (from XFS_IOC_GETBMAP) is to not specify
5545 	 *	BMV_IF_NO_DMAPI_READ so that read events are generated.
5546 	 *	If this were not true, callers of ioctl( XFS_IOC_GETBMAP )
5547 	 *	could misinterpret holes in a DMAPI file as true holes,
5548 	 *	when in fact they may represent offline user data.
5549 	 */
5550 	if (   (interface & BMV_IF_NO_DMAPI_READ) == 0
5551 	    && DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ)
5552 	    && whichfork == XFS_DATA_FORK) {
5553 
5554 		error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, 0, 0, 0, NULL);
5555 		if (error)
5556 			return XFS_ERROR(error);
5557 	}
5558 
5559 	if (whichfork == XFS_ATTR_FORK) {
5560 		if (XFS_IFORK_Q(ip)) {
5561 			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
5562 			    ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
5563 			    ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
5564 				return XFS_ERROR(EINVAL);
5565 		} else if (unlikely(
5566 			   ip->i_d.di_aformat != 0 &&
5567 			   ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
5568 			XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
5569 					 ip->i_mount);
5570 			return XFS_ERROR(EFSCORRUPTED);
5571 		}
5572 	} else if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
5573 		   ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
5574 		   ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
5575 		return XFS_ERROR(EINVAL);
5576 	if (whichfork == XFS_DATA_FORK) {
5577 		if (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC) {
5578 			prealloced = 1;
5579 			fixlen = XFS_MAXIOFFSET(mp);
5580 		} else {
5581 			prealloced = 0;
5582 			fixlen = ip->i_d.di_size;
5583 		}
5584 	} else {
5585 		prealloced = 0;
5586 		fixlen = 1LL << 32;
5587 	}
5588 
5589 	if (bmv->bmv_length == -1) {
5590 		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
5591 		bmv->bmv_length = MAX( (__int64_t)(fixlen - bmv->bmv_offset),
5592 					(__int64_t)0);
5593 	} else if (bmv->bmv_length < 0)
5594 		return XFS_ERROR(EINVAL);
5595 	if (bmv->bmv_length == 0) {
5596 		bmv->bmv_entries = 0;
5597 		return 0;
5598 	}
5599 	nex = bmv->bmv_count - 1;
5600 	if (nex <= 0)
5601 		return XFS_ERROR(EINVAL);
5602 	bmvend = bmv->bmv_offset + bmv->bmv_length;
5603 
5604 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
5605 
5606 	if (whichfork == XFS_DATA_FORK && ip->i_delayed_blks) {
5607 		/* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
5608 		VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
5609 	}
5610 
5611 	ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0);
5612 
5613 	lock = xfs_ilock_map_shared(ip);
5614 
5615 	/*
5616 	 * Don't let nex be bigger than the number of extents
5617 	 * we can have assuming alternating holes and real extents.
5618 	 */
5619 	if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
5620 		nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
5621 
5622 	bmapi_flags = XFS_BMAPI_AFLAG(whichfork) |
5623 			((sh_unwritten) ? 0 : XFS_BMAPI_IGSTATE);
5624 
5625 	/*
5626 	 * Allocate enough space to handle "subnex" maps at a time.
5627 	 */
5628 	subnex = 16;
5629 	map = kmem_alloc(subnex * sizeof(*map), KM_SLEEP);
5630 
5631 	bmv->bmv_entries = 0;
5632 
5633 	if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0) {
5634 		error = 0;
5635 		goto unlock_and_return;
5636 	}
5637 
5638 	nexleft = nex;
5639 
5640 	do {
5641 		nmap = (nexleft > subnex) ? subnex : nexleft;
5642 		error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
5643 				  XFS_BB_TO_FSB(mp, bmv->bmv_length),
5644 				  bmapi_flags, NULL, 0, map, &nmap, NULL);
5645 		if (error)
5646 			goto unlock_and_return;
5647 		ASSERT(nmap <= subnex);
5648 
5649 		for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
5650 			nexleft--;
5651 			oflags = (map[i].br_state == XFS_EXT_UNWRITTEN) ?
5652 					BMV_OF_PREALLOC : 0;
5653 			out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff);
5654 			out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
5655 			ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
5656 			if (prealloced &&
5657 			    map[i].br_startblock == HOLESTARTBLOCK &&
5658 			    out.bmv_offset + out.bmv_length == bmvend) {
5659 				/*
5660 				 * came to hole at end of file
5661 				 */
5662 				goto unlock_and_return;
5663 			} else {
5664 				out.bmv_block =
5665 				    (map[i].br_startblock == HOLESTARTBLOCK) ?
5666 					-1 :
5667 					XFS_FSB_TO_DB(ip, map[i].br_startblock);
5668 
5669 				/* return either getbmap/getbmapx structure. */
5670 				if (interface & BMV_IF_EXTENDED) {
5671 					struct	getbmapx	outx;
5672 
5673 					GETBMAP_CONVERT(out,outx);
5674 					outx.bmv_oflags = oflags;
5675 					outx.bmv_unused1 = outx.bmv_unused2 = 0;
5676 					if (copy_to_user(ap, &outx,
5677 							sizeof(outx))) {
5678 						error = XFS_ERROR(EFAULT);
5679 						goto unlock_and_return;
5680 					}
5681 				} else {
5682 					if (copy_to_user(ap, &out,
5683 							sizeof(out))) {
5684 						error = XFS_ERROR(EFAULT);
5685 						goto unlock_and_return;
5686 					}
5687 				}
5688 				bmv->bmv_offset =
5689 					out.bmv_offset + out.bmv_length;
5690 				bmv->bmv_length = MAX((__int64_t)0,
5691 					(__int64_t)(bmvend - bmv->bmv_offset));
5692 				bmv->bmv_entries++;
5693 				ap = (interface & BMV_IF_EXTENDED) ?
5694 						(void __user *)
5695 					((struct getbmapx __user *)ap + 1) :
5696 						(void __user *)
5697 					((struct getbmap __user *)ap + 1);
5698 			}
5699 		}
5700 	} while (nmap && nexleft && bmv->bmv_length);
5701 
5702 unlock_and_return:
5703 	xfs_iunlock_map_shared(ip, lock);
5704 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
5705 
5706 	kmem_free(map, subnex * sizeof(*map));
5707 
5708 	return error;
5709 }
5710 
5711 /*
5712  * Check the last inode extent to determine whether this allocation will result
5713  * in blocks being allocated at the end of the file. When we allocate new data
5714  * blocks at the end of the file which do not start at the previous data block,
5715  * we will try to align the new blocks at stripe unit boundaries.
5716  */
5717 int					/* error */
xfs_bmap_isaeof(xfs_inode_t * ip,xfs_fileoff_t off,int whichfork,char * aeof)5718 xfs_bmap_isaeof(
5719 	xfs_inode_t	*ip,		/* incore inode pointer */
5720 	xfs_fileoff_t   off,		/* file offset in fsblocks */
5721 	int             whichfork,	/* data or attribute fork */
5722 	char		*aeof)		/* return value */
5723 {
5724 	int		error;		/* error return value */
5725 	xfs_ifork_t	*ifp;		/* inode fork pointer */
5726 	xfs_bmbt_rec_t	*lastrec;	/* extent list entry pointer */
5727 	xfs_extnum_t	nextents;	/* size of extent list */
5728 	xfs_bmbt_irec_t	s;		/* expanded extent list entry */
5729 
5730 	ASSERT(whichfork == XFS_DATA_FORK);
5731 	ifp = XFS_IFORK_PTR(ip, whichfork);
5732 	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5733 	    (error = xfs_iread_extents(NULL, ip, whichfork)))
5734 		return error;
5735 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
5736 	if (nextents == 0) {
5737 		*aeof = 1;
5738 		return 0;
5739 	}
5740 	/*
5741 	 * Go to the last extent
5742 	 */
5743 	lastrec = &ifp->if_u1.if_extents[nextents - 1];
5744 	xfs_bmbt_get_all(lastrec, &s);
5745 	/*
5746 	 * Check we are allocating in the last extent (for delayed allocations)
5747 	 * or past the last extent for non-delayed allocations.
5748 	 */
5749 	*aeof = (off >= s.br_startoff &&
5750 		 off < s.br_startoff + s.br_blockcount &&
5751 		 ISNULLSTARTBLOCK(s.br_startblock)) ||
5752 		off >= s.br_startoff + s.br_blockcount;
5753 	return 0;
5754 }
5755 
5756 /*
5757  * Check if the endoff is outside the last extent. If so the caller will grow
5758  * the allocation to a stripe unit boundary.
5759  */
5760 int					/* error */
xfs_bmap_eof(xfs_inode_t * ip,xfs_fileoff_t endoff,int whichfork,int * eof)5761 xfs_bmap_eof(
5762 	xfs_inode_t	*ip,		/* incore inode pointer */
5763 	xfs_fileoff_t	endoff,		/* file offset in fsblocks */
5764 	int		whichfork,	/* data or attribute fork */
5765 	int		*eof)		/* result value */
5766 {
5767 	xfs_fsblock_t	blockcount;	/* extent block count */
5768 	int		error;		/* error return value */
5769 	xfs_ifork_t	*ifp;		/* inode fork pointer */
5770 	xfs_bmbt_rec_t	*lastrec;	/* extent list entry pointer */
5771 	xfs_extnum_t	nextents;	/* size of extent list */
5772 	xfs_fileoff_t	startoff;	/* extent starting file offset */
5773 
5774 	ASSERT(whichfork == XFS_DATA_FORK);
5775 	ifp = XFS_IFORK_PTR(ip, whichfork);
5776 	if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5777 	    (error = xfs_iread_extents(NULL, ip, whichfork)))
5778 		return error;
5779 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
5780 	if (nextents == 0) {
5781 		*eof = 1;
5782 		return 0;
5783 	}
5784 	/*
5785 	 * Go to the last extent
5786 	 */
5787 	lastrec = &ifp->if_u1.if_extents[nextents - 1];
5788 	startoff = xfs_bmbt_get_startoff(lastrec);
5789 	blockcount = xfs_bmbt_get_blockcount(lastrec);
5790 	*eof = endoff >= startoff + blockcount;
5791 	return 0;
5792 }
5793 
5794 #ifdef DEBUG
5795 /*
5796  * Check that the extents list for the inode ip is in the right order.
5797  */
5798 STATIC void
xfs_bmap_check_extents(xfs_inode_t * ip,int whichfork)5799 xfs_bmap_check_extents(
5800 	xfs_inode_t		*ip,		/* incore inode pointer */
5801 	int			whichfork)	/* data or attr fork */
5802 {
5803 	xfs_bmbt_rec_t		*base;		/* base of extents list */
5804 	xfs_bmbt_rec_t		*ep;		/* current extent entry */
5805 	xfs_ifork_t		*ifp;		/* inode fork pointer */
5806 	xfs_extnum_t		nextents;	/* number of extents in list */
5807 
5808 	ifp = XFS_IFORK_PTR(ip, whichfork);
5809 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
5810 	base = ifp->if_u1.if_extents;
5811 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
5812 	for (ep = base; ep < &base[nextents - 1]; ep++) {
5813 		xfs_btree_check_rec(XFS_BTNUM_BMAP, (void *)ep,
5814 			(void *)(ep + 1));
5815 	}
5816 }
5817 
5818 STATIC
5819 xfs_buf_t *
xfs_bmap_get_bp(xfs_btree_cur_t * cur,xfs_fsblock_t bno)5820 xfs_bmap_get_bp(
5821 	xfs_btree_cur_t         *cur,
5822 	xfs_fsblock_t		bno)
5823 {
5824 	int i;
5825 	xfs_buf_t *bp;
5826 
5827 	if (!cur)
5828 		return(NULL);
5829 
5830 	bp = NULL;
5831 	for(i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
5832 		bp = cur->bc_bufs[i];
5833 		if (!bp) break;
5834 		if (XFS_BUF_ADDR(bp) == bno)
5835 			break;	/* Found it */
5836 	}
5837 	if (i == XFS_BTREE_MAXLEVELS)
5838 		bp = NULL;
5839 
5840 	if (!bp) { /* Chase down all the log items to see if the bp is there */
5841 		xfs_log_item_chunk_t    *licp;
5842 		xfs_trans_t		*tp;
5843 
5844 		tp = cur->bc_tp;
5845 		licp = &tp->t_items;
5846 		while (!bp && licp != NULL) {
5847 			if (XFS_LIC_ARE_ALL_FREE(licp)) {
5848 				licp = licp->lic_next;
5849 				continue;
5850 			}
5851 			for (i = 0; i < licp->lic_unused; i++) {
5852 				xfs_log_item_desc_t	*lidp;
5853 				xfs_log_item_t		*lip;
5854 				xfs_buf_log_item_t	*bip;
5855 				xfs_buf_t		*lbp;
5856 
5857 				if (XFS_LIC_ISFREE(licp, i)) {
5858 					continue;
5859 				}
5860 
5861 				lidp = XFS_LIC_SLOT(licp, i);
5862 				lip = lidp->lid_item;
5863 				if (lip->li_type != XFS_LI_BUF)
5864 					continue;
5865 
5866 				bip = (xfs_buf_log_item_t *)lip;
5867 				lbp = bip->bli_buf;
5868 
5869 				if (XFS_BUF_ADDR(lbp) == bno) {
5870 					bp = lbp;
5871 					break; /* Found it */
5872 				}
5873 			}
5874 			licp = licp->lic_next;
5875 		}
5876 	}
5877 	return(bp);
5878 }
5879 
5880 void
xfs_check_block(xfs_bmbt_block_t * block,xfs_mount_t * mp,int root,short sz)5881 xfs_check_block(
5882 	xfs_bmbt_block_t        *block,
5883 	xfs_mount_t		*mp,
5884 	int			root,
5885 	short			sz)
5886 {
5887 	int			i, j, dmxr;
5888 	xfs_bmbt_ptr_t		*pp, *thispa;	/* pointer to block address */
5889 	xfs_bmbt_key_t		*prevp, *keyp;
5890 
5891 	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
5892 
5893 	prevp = NULL;
5894 	for( i = 1; i <= INT_GET(block->bb_numrecs, ARCH_CONVERT);i++) {
5895 		dmxr = mp->m_bmap_dmxr[0];
5896 
5897 		if (root) {
5898 			keyp = XFS_BMAP_BROOT_KEY_ADDR(block, i, sz);
5899 		} else {
5900 			keyp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize,
5901 				xfs_bmbt, block, i, dmxr);
5902 		}
5903 
5904 		if (prevp) {
5905 			xfs_btree_check_key(XFS_BTNUM_BMAP, prevp, keyp);
5906 		}
5907 		prevp = keyp;
5908 
5909 		/*
5910 		 * Compare the block numbers to see if there are dups.
5911 		 */
5912 
5913 		if (root) {
5914 			pp = XFS_BMAP_BROOT_PTR_ADDR(block, i, sz);
5915 		} else {
5916 			pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
5917 				xfs_bmbt, block, i, dmxr);
5918 		}
5919 		for (j = i+1; j <= INT_GET(block->bb_numrecs, ARCH_CONVERT); j++) {
5920 			if (root) {
5921 				thispa = XFS_BMAP_BROOT_PTR_ADDR(block, j, sz);
5922 			} else {
5923 				thispa = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
5924 					xfs_bmbt, block, j, dmxr);
5925 			}
5926 			if (INT_GET(*thispa, ARCH_CONVERT) ==
5927 			    INT_GET(*pp, ARCH_CONVERT)) {
5928 				cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld",
5929 					__FUNCTION__, j, i,
5930 					INT_GET(*thispa, ARCH_CONVERT));
5931 				panic("%s: ptrs are equal in node\n",
5932 					__FUNCTION__);
5933 			}
5934 		}
5935 	}
5936 }
5937 
5938 /*
5939  * Check that the extents for the inode ip are in the right order in all
5940  * btree leaves.
5941  */
5942 
5943 STATIC void
xfs_bmap_check_leaf_extents(xfs_btree_cur_t * cur,xfs_inode_t * ip,int whichfork)5944 xfs_bmap_check_leaf_extents(
5945 	xfs_btree_cur_t		*cur,	/* btree cursor or null */
5946 	xfs_inode_t		*ip,		/* incore inode pointer */
5947 	int			whichfork)	/* data or attr fork */
5948 {
5949 	xfs_bmbt_block_t	*block;	/* current btree block */
5950 	xfs_fsblock_t		bno;	/* block # of "block" */
5951 	xfs_buf_t		*bp;	/* buffer for "block" */
5952 	int			error;	/* error return value */
5953 	xfs_extnum_t		i=0;	/* index into the extents list */
5954 	xfs_ifork_t		*ifp;	/* fork structure */
5955 	int			level;	/* btree level, for checking */
5956 	xfs_mount_t		*mp;	/* file system mount structure */
5957 	xfs_bmbt_ptr_t		*pp;	/* pointer to block address */
5958 	xfs_bmbt_rec_t		*ep, *lastp;	/* extent pointers in block entry */
5959 	int			bp_release = 0;
5960 
5961 	if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
5962 		return;
5963 	}
5964 
5965 	bno = NULLFSBLOCK;
5966 	mp = ip->i_mount;
5967 	ifp = XFS_IFORK_PTR(ip, whichfork);
5968 	block = ifp->if_broot;
5969 	/*
5970 	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
5971 	 */
5972 	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
5973 	level = INT_GET(block->bb_level, ARCH_CONVERT);
5974 	xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
5975 	pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
5976 	ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO);
5977 	ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount);
5978 	ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks);
5979 	bno = INT_GET(*pp, ARCH_CONVERT);
5980 	/*
5981 	 * Go down the tree until leaf level is reached, following the first
5982 	 * pointer (leftmost) at each level.
5983 	 */
5984 	while (level-- > 0) {
5985 		/* See if buf is in cur first */
5986 		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
5987 		if (bp) {
5988 			bp_release = 0;
5989 		} else {
5990 			bp_release = 1;
5991 		}
5992 		if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
5993 				XFS_BMAP_BTREE_REF)))
5994 			goto error_norelse;
5995 		block = XFS_BUF_TO_BMBT_BLOCK(bp);
5996 		XFS_WANT_CORRUPTED_GOTO(
5997 			XFS_BMAP_SANITY_CHECK(mp, block, level),
5998 			error0);
5999 		if (level == 0)
6000 			break;
6001 
6002 		/*
6003 		 * Check this block for basic sanity (increasing keys and
6004 		 * no duplicate blocks).
6005 		 */
6006 
6007 		xfs_check_block(block, mp, 0, 0);
6008 		pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block,
6009 			1, mp->m_bmap_dmxr[1]);
6010 		XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, INT_GET(*pp, ARCH_CONVERT)), error0);
6011 		bno = INT_GET(*pp, ARCH_CONVERT);
6012 		if (bp_release) {
6013 			bp_release = 0;
6014 			xfs_trans_brelse(NULL, bp);
6015 		}
6016 	}
6017 
6018 	/*
6019 	 * Here with bp and block set to the leftmost leaf node in the tree.
6020 	 */
6021 	i = 0;
6022 
6023 	/*
6024 	 * Loop over all leaf nodes checking that all extents are in the right order.
6025 	 */
6026 	lastp = NULL;
6027 	for (;;) {
6028 		xfs_bmbt_rec_t	*frp;
6029 		xfs_fsblock_t	nextbno;
6030 		xfs_extnum_t	num_recs;
6031 
6032 
6033 		num_recs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
6034 
6035 		/*
6036 		 * Read-ahead the next leaf block, if any.
6037 		 */
6038 
6039 		nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
6040 
6041 		/*
6042 		 * Check all the extents to make sure they are OK.
6043 		 * If we had a previous block, the last entry should
6044 		 * conform with the first entry in this one.
6045 		 */
6046 
6047 		frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
6048 			block, 1, mp->m_bmap_dmxr[0]);
6049 
6050 		for (ep = frp;ep < frp + (num_recs - 1); ep++) {
6051 			if (lastp) {
6052 				xfs_btree_check_rec(XFS_BTNUM_BMAP,
6053 					(void *)lastp, (void *)ep);
6054 			}
6055 			xfs_btree_check_rec(XFS_BTNUM_BMAP, (void *)ep,
6056 				(void *)(ep + 1));
6057 		}
6058 		lastp = frp + num_recs - 1; /* For the next iteration */
6059 
6060 		i += num_recs;
6061 		if (bp_release) {
6062 			bp_release = 0;
6063 			xfs_trans_brelse(NULL, bp);
6064 		}
6065 		bno = nextbno;
6066 		/*
6067 		 * If we've reached the end, stop.
6068 		 */
6069 		if (bno == NULLFSBLOCK)
6070 			break;
6071 
6072 		bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
6073 		if (bp) {
6074 			bp_release = 0;
6075 		} else {
6076 			bp_release = 1;
6077 		}
6078 		if (!bp && (error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
6079 				XFS_BMAP_BTREE_REF)))
6080 			goto error_norelse;
6081 		block = XFS_BUF_TO_BMBT_BLOCK(bp);
6082 	}
6083 	if (bp_release) {
6084 		bp_release = 0;
6085 		xfs_trans_brelse(NULL, bp);
6086 	}
6087 	return;
6088 
6089 error0:
6090 	cmn_err(CE_WARN, "%s: at error0", __FUNCTION__);
6091 	if (bp_release)
6092 		xfs_trans_brelse(NULL, bp);
6093 error_norelse:
6094 	cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents",
6095 		i, __FUNCTION__);
6096 	panic("%s: CORRUPTED BTREE OR SOMETHING", __FUNCTION__);
6097 	return;
6098 }
6099 #endif
6100 
6101 /*
6102  * Count fsblocks of the given fork.
6103  */
6104 int						/* error */
xfs_bmap_count_blocks(xfs_trans_t * tp,xfs_inode_t * ip,int whichfork,int * count)6105 xfs_bmap_count_blocks(
6106 	xfs_trans_t		*tp,		/* transaction pointer */
6107 	xfs_inode_t		*ip,		/* incore inode */
6108 	int			whichfork,	/* data or attr fork */
6109 	int			*count)		/* out: count of blocks */
6110 {
6111 	xfs_bmbt_block_t	*block;	/* current btree block */
6112 	xfs_fsblock_t		bno;	/* block # of "block" */
6113 	xfs_ifork_t		*ifp;	/* fork structure */
6114 	int			level;	/* btree level, for checking */
6115 	xfs_mount_t		*mp;	/* file system mount structure */
6116 	xfs_bmbt_ptr_t		*pp;	/* pointer to block address */
6117 
6118 	bno = NULLFSBLOCK;
6119 	mp = ip->i_mount;
6120 	ifp = XFS_IFORK_PTR(ip, whichfork);
6121 	if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
6122 		if (unlikely(xfs_bmap_count_leaves(ifp->if_u1.if_extents,
6123 			ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
6124 			count) < 0)) {
6125 			XFS_ERROR_REPORT("xfs_bmap_count_blocks(1)",
6126 					 XFS_ERRLEVEL_LOW, mp);
6127 			return XFS_ERROR(EFSCORRUPTED);
6128 		}
6129 		return 0;
6130 	}
6131 
6132 	/*
6133 	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
6134 	 */
6135 	block = ifp->if_broot;
6136 	ASSERT(INT_GET(block->bb_level, ARCH_CONVERT) > 0);
6137 	level = INT_GET(block->bb_level, ARCH_CONVERT);
6138 	pp = XFS_BMAP_BROOT_PTR_ADDR(block, 1, ifp->if_broot_bytes);
6139 	ASSERT(INT_GET(*pp, ARCH_CONVERT) != NULLDFSBNO);
6140 	ASSERT(XFS_FSB_TO_AGNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agcount);
6141 	ASSERT(XFS_FSB_TO_AGBNO(mp, INT_GET(*pp, ARCH_CONVERT)) < mp->m_sb.sb_agblocks);
6142 	bno = INT_GET(*pp, ARCH_CONVERT);
6143 
6144 	if (unlikely(xfs_bmap_count_tree(mp, tp, bno, level, count) < 0)) {
6145 		XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
6146 				 mp);
6147 		return XFS_ERROR(EFSCORRUPTED);
6148 	}
6149 
6150 	return 0;
6151 }
6152 
6153 /*
6154  * Recursively walks each level of a btree
6155  * to count total fsblocks is use.
6156  */
6157 int                                     /* error */
xfs_bmap_count_tree(xfs_mount_t * mp,xfs_trans_t * tp,xfs_fsblock_t blockno,int levelin,int * count)6158 xfs_bmap_count_tree(
6159 	xfs_mount_t     *mp,            /* file system mount point */
6160 	xfs_trans_t     *tp,            /* transaction pointer */
6161 	xfs_fsblock_t   blockno,	/* file system block number */
6162 	int             levelin,	/* level in btree */
6163 	int		*count)		/* Count of blocks */
6164 {
6165 	int			error;
6166 	xfs_buf_t		*bp, *nbp;
6167 	int			level = levelin;
6168 	xfs_bmbt_ptr_t          *pp;
6169 	xfs_fsblock_t           bno = blockno;
6170 	xfs_fsblock_t		nextbno;
6171 	xfs_bmbt_block_t        *block, *nextblock;
6172 	int			numrecs;
6173 	xfs_bmbt_rec_t		*frp;
6174 
6175 	if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF)))
6176 		return error;
6177 	*count += 1;
6178 	block = XFS_BUF_TO_BMBT_BLOCK(bp);
6179 
6180 	if (--level) {
6181 		/* Not at node above leafs, count this level of nodes */
6182 		nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
6183 		while (nextbno != NULLFSBLOCK) {
6184 			if ((error = xfs_btree_read_bufl(mp, tp, nextbno,
6185 				0, &nbp, XFS_BMAP_BTREE_REF)))
6186 				return error;
6187 			*count += 1;
6188 			nextblock = XFS_BUF_TO_BMBT_BLOCK(nbp);
6189 			nextbno = INT_GET(nextblock->bb_rightsib, ARCH_CONVERT);
6190 			xfs_trans_brelse(tp, nbp);
6191 		}
6192 
6193 		/* Dive to the next level */
6194 		pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
6195 			xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
6196 		bno = INT_GET(*pp, ARCH_CONVERT);
6197 		if (unlikely((error =
6198 		     xfs_bmap_count_tree(mp, tp, bno, level, count)) < 0)) {
6199 			xfs_trans_brelse(tp, bp);
6200 			XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
6201 					 XFS_ERRLEVEL_LOW, mp);
6202 			return XFS_ERROR(EFSCORRUPTED);
6203 		}
6204 		xfs_trans_brelse(tp, bp);
6205 	} else {
6206 		/* count all level 1 nodes and their leaves */
6207 		for (;;) {
6208 			nextbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
6209 			numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
6210 			frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize,
6211 				xfs_bmbt, block, 1, mp->m_bmap_dmxr[0]);
6212 			if (unlikely(xfs_bmap_count_leaves(frp, numrecs, count) < 0)) {
6213 				xfs_trans_brelse(tp, bp);
6214 				XFS_ERROR_REPORT("xfs_bmap_count_tree(2)",
6215 						 XFS_ERRLEVEL_LOW, mp);
6216 				return XFS_ERROR(EFSCORRUPTED);
6217 			}
6218 			xfs_trans_brelse(tp, bp);
6219 			if (nextbno == NULLFSBLOCK)
6220 				break;
6221 			bno = nextbno;
6222 			if ((error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
6223 				XFS_BMAP_BTREE_REF)))
6224 				return error;
6225 			*count += 1;
6226 			block = XFS_BUF_TO_BMBT_BLOCK(bp);
6227 		}
6228 	}
6229 	return 0;
6230 }
6231 
6232 /*
6233  * Count leaf blocks given a pointer to an extent list.
6234  */
6235 int
xfs_bmap_count_leaves(xfs_bmbt_rec_t * frp,int numrecs,int * count)6236 xfs_bmap_count_leaves(
6237 	xfs_bmbt_rec_t		*frp,
6238 	int			numrecs,
6239 	int			*count)
6240 {
6241 	int		b;
6242 
6243 	for ( b = 1; b <= numrecs; b++, frp++)
6244 		*count += xfs_bmbt_disk_get_blockcount(frp);
6245 	return 0;
6246 }
6247