1 /*
2  * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32 
33 /*
34  * Free space allocation for XFS.
35  */
36 #include "xfs.h"
37 #include "xfs_macros.h"
38 #include "xfs_types.h"
39 #include "xfs_inum.h"
40 #include "xfs_log.h"
41 #include "xfs_trans.h"
42 #include "xfs_sb.h"
43 #include "xfs_ag.h"
44 #include "xfs_dir.h"
45 #include "xfs_dmapi.h"
46 #include "xfs_mount.h"
47 #include "xfs_alloc_btree.h"
48 #include "xfs_bmap_btree.h"
49 #include "xfs_ialloc_btree.h"
50 #include "xfs_btree.h"
51 #include "xfs_ialloc.h"
52 #include "xfs_alloc.h"
53 #include "xfs_bit.h"
54 #include "xfs_error.h"
55 
56 
57 #define XFS_ABSDIFF(a,b)	(((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
58 
59 #define	XFSA_FIXUP_BNO_OK	1
60 #define	XFSA_FIXUP_CNT_OK	2
61 
62 int
63 xfs_alloc_search_busy(xfs_trans_t *tp,
64 		    xfs_agnumber_t agno,
65 		    xfs_agblock_t bno,
66 		    xfs_extlen_t len);
67 
68 #if defined(XFS_ALLOC_TRACE)
69 ktrace_t *xfs_alloc_trace_buf;
70 
71 #define	TRACE_ALLOC(s,a)	\
72 	xfs_alloc_trace_alloc(fname, s, a, __LINE__)
73 #define	TRACE_FREE(s,a,b,x,f)	\
74 	xfs_alloc_trace_free(fname, s, mp, a, b, x, f, __LINE__)
75 #define	TRACE_MODAGF(s,a,f)	\
76 	xfs_alloc_trace_modagf(fname, s, mp, a, f, __LINE__)
77 #define	TRACE_BUSY(fname,s,ag,agb,l,sl,tp)	\
78 	xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__)
79 #define	TRACE_UNBUSY(fname,s,ag,sl,tp)	\
80 	xfs_alloc_trace_busy(fname, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__)
81 #define	TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp)	\
82 	xfs_alloc_trace_busy(fname, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__)
83 #else
84 #define	TRACE_ALLOC(s,a)
85 #define	TRACE_FREE(s,a,b,x,f)
86 #define	TRACE_MODAGF(s,a,f)
87 #define	TRACE_BUSY(s,a,ag,agb,l,sl,tp)
88 #define	TRACE_UNBUSY(fname,s,ag,sl,tp)
89 #define	TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp)
90 #endif	/* XFS_ALLOC_TRACE */
91 
92 /*
93  * Prototypes for per-ag allocation routines
94  */
95 
96 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
97 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
98 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
99 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
100 	xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
101 
102 /*
103  * Internal functions.
104  */
105 
106 /*
107  * Compute aligned version of the found extent.
108  * Takes alignment and min length into account.
109  */
110 STATIC int				/* success (>= minlen) */
xfs_alloc_compute_aligned(xfs_agblock_t foundbno,xfs_extlen_t foundlen,xfs_extlen_t alignment,xfs_extlen_t minlen,xfs_agblock_t * resbno,xfs_extlen_t * reslen)111 xfs_alloc_compute_aligned(
112 	xfs_agblock_t	foundbno,	/* starting block in found extent */
113 	xfs_extlen_t	foundlen,	/* length in found extent */
114 	xfs_extlen_t	alignment,	/* alignment for allocation */
115 	xfs_extlen_t	minlen,		/* minimum length for allocation */
116 	xfs_agblock_t	*resbno,	/* result block number */
117 	xfs_extlen_t	*reslen)	/* result length */
118 {
119 	xfs_agblock_t	bno;
120 	xfs_extlen_t	diff;
121 	xfs_extlen_t	len;
122 
123 	if (alignment > 1 && foundlen >= minlen) {
124 		bno = roundup(foundbno, alignment);
125 		diff = bno - foundbno;
126 		len = diff >= foundlen ? 0 : foundlen - diff;
127 	} else {
128 		bno = foundbno;
129 		len = foundlen;
130 	}
131 	*resbno = bno;
132 	*reslen = len;
133 	return len >= minlen;
134 }
135 
136 /*
137  * Compute best start block and diff for "near" allocations.
138  * freelen >= wantlen already checked by caller.
139  */
140 STATIC xfs_extlen_t			/* difference value (absolute) */
xfs_alloc_compute_diff(xfs_agblock_t wantbno,xfs_extlen_t wantlen,xfs_extlen_t alignment,xfs_agblock_t freebno,xfs_extlen_t freelen,xfs_agblock_t * newbnop)141 xfs_alloc_compute_diff(
142 	xfs_agblock_t	wantbno,	/* target starting block */
143 	xfs_extlen_t	wantlen,	/* target length */
144 	xfs_extlen_t	alignment,	/* target alignment */
145 	xfs_agblock_t	freebno,	/* freespace's starting block */
146 	xfs_extlen_t	freelen,	/* freespace's length */
147 	xfs_agblock_t	*newbnop)	/* result: best start block from free */
148 {
149 	xfs_agblock_t	freeend;	/* end of freespace extent */
150 	xfs_agblock_t	newbno1;	/* return block number */
151 	xfs_agblock_t	newbno2;	/* other new block number */
152 	xfs_extlen_t	newlen1=0;	/* length with newbno1 */
153 	xfs_extlen_t	newlen2=0;	/* length with newbno2 */
154 	xfs_agblock_t	wantend;	/* end of target extent */
155 
156 	ASSERT(freelen >= wantlen);
157 	freeend = freebno + freelen;
158 	wantend = wantbno + wantlen;
159 	if (freebno >= wantbno) {
160 		if ((newbno1 = roundup(freebno, alignment)) >= freeend)
161 			newbno1 = NULLAGBLOCK;
162 	} else if (freeend >= wantend && alignment > 1) {
163 		newbno1 = roundup(wantbno, alignment);
164 		newbno2 = newbno1 - alignment;
165 		if (newbno1 >= freeend)
166 			newbno1 = NULLAGBLOCK;
167 		else
168 			newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
169 		if (newbno2 < freebno)
170 			newbno2 = NULLAGBLOCK;
171 		else
172 			newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
173 		if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
174 			if (newlen1 < newlen2 ||
175 			    (newlen1 == newlen2 &&
176 			     XFS_ABSDIFF(newbno1, wantbno) >
177 			     XFS_ABSDIFF(newbno2, wantbno)))
178 				newbno1 = newbno2;
179 		} else if (newbno2 != NULLAGBLOCK)
180 			newbno1 = newbno2;
181 	} else if (freeend >= wantend) {
182 		newbno1 = wantbno;
183 	} else if (alignment > 1) {
184 		newbno1 = roundup(freeend - wantlen, alignment);
185 		if (newbno1 > freeend - wantlen &&
186 		    newbno1 - alignment >= freebno)
187 			newbno1 -= alignment;
188 		else if (newbno1 >= freeend)
189 			newbno1 = NULLAGBLOCK;
190 	} else
191 		newbno1 = freeend - wantlen;
192 	*newbnop = newbno1;
193 	return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
194 }
195 
196 /*
197  * Fix up the length, based on mod and prod.
198  * len should be k * prod + mod for some k.
199  * If len is too small it is returned unchanged.
200  * If len hits maxlen it is left alone.
201  */
202 STATIC void
xfs_alloc_fix_len(xfs_alloc_arg_t * args)203 xfs_alloc_fix_len(
204 	xfs_alloc_arg_t	*args)		/* allocation argument structure */
205 {
206 	xfs_extlen_t	k;
207 	xfs_extlen_t	rlen;
208 
209 	ASSERT(args->mod < args->prod);
210 	rlen = args->len;
211 	ASSERT(rlen >= args->minlen);
212 	ASSERT(rlen <= args->maxlen);
213 	if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
214 	    (args->mod == 0 && rlen < args->prod))
215 		return;
216 	k = rlen % args->prod;
217 	if (k == args->mod)
218 		return;
219 	if (k > args->mod) {
220 		if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
221 			return;
222 	} else {
223 		if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
224 		    (int)args->minlen)
225 			return;
226 	}
227 	ASSERT(rlen >= args->minlen);
228 	ASSERT(rlen <= args->maxlen);
229 	args->len = rlen;
230 }
231 
232 /*
233  * Fix up length if there is too little space left in the a.g.
234  * Return 1 if ok, 0 if too little, should give up.
235  */
236 STATIC int
xfs_alloc_fix_minleft(xfs_alloc_arg_t * args)237 xfs_alloc_fix_minleft(
238 	xfs_alloc_arg_t	*args)		/* allocation argument structure */
239 {
240 	xfs_agf_t	*agf;		/* a.g. freelist header */
241 	int		diff;		/* free space difference */
242 
243 	if (args->minleft == 0)
244 		return 1;
245 	agf = XFS_BUF_TO_AGF(args->agbp);
246 	diff = INT_GET(agf->agf_freeblks, ARCH_CONVERT)
247 		+ INT_GET(agf->agf_flcount, ARCH_CONVERT)
248 		- args->len - args->minleft;
249 	if (diff >= 0)
250 		return 1;
251 	args->len += diff;		/* shrink the allocated space */
252 	if (args->len >= args->minlen)
253 		return 1;
254 	args->agbno = NULLAGBLOCK;
255 	return 0;
256 }
257 
258 /*
259  * Update the two btrees, logically removing from freespace the extent
260  * starting at rbno, rlen blocks.  The extent is contained within the
261  * actual (current) free extent fbno for flen blocks.
262  * Flags are passed in indicating whether the cursors are set to the
263  * relevant records.
264  */
265 STATIC int				/* error code */
xfs_alloc_fixup_trees(xfs_btree_cur_t * cnt_cur,xfs_btree_cur_t * bno_cur,xfs_agblock_t fbno,xfs_extlen_t flen,xfs_agblock_t rbno,xfs_extlen_t rlen,int flags)266 xfs_alloc_fixup_trees(
267 	xfs_btree_cur_t	*cnt_cur,	/* cursor for by-size btree */
268 	xfs_btree_cur_t	*bno_cur,	/* cursor for by-block btree */
269 	xfs_agblock_t	fbno,		/* starting block of free extent */
270 	xfs_extlen_t	flen,		/* length of free extent */
271 	xfs_agblock_t	rbno,		/* starting block of returned extent */
272 	xfs_extlen_t	rlen,		/* length of returned extent */
273 	int		flags)		/* flags, XFSA_FIXUP_... */
274 {
275 	int		error;		/* error code */
276 	int		i;		/* operation results */
277 	xfs_agblock_t	nfbno1;		/* first new free startblock */
278 	xfs_agblock_t	nfbno2;		/* second new free startblock */
279 	xfs_extlen_t	nflen1=0;	/* first new free length */
280 	xfs_extlen_t	nflen2=0;	/* second new free length */
281 
282 	/*
283 	 * Look up the record in the by-size tree if necessary.
284 	 */
285 	if (flags & XFSA_FIXUP_CNT_OK) {
286 #ifdef DEBUG
287 		if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
288 			return error;
289 		XFS_WANT_CORRUPTED_RETURN(
290 			i == 1 && nfbno1 == fbno && nflen1 == flen);
291 #endif
292 	} else {
293 		if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
294 			return error;
295 		XFS_WANT_CORRUPTED_RETURN(i == 1);
296 	}
297 	/*
298 	 * Look up the record in the by-block tree if necessary.
299 	 */
300 	if (flags & XFSA_FIXUP_BNO_OK) {
301 #ifdef DEBUG
302 		if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
303 			return error;
304 		XFS_WANT_CORRUPTED_RETURN(
305 			i == 1 && nfbno1 == fbno && nflen1 == flen);
306 #endif
307 	} else {
308 		if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
309 			return error;
310 		XFS_WANT_CORRUPTED_RETURN(i == 1);
311 	}
312 #ifdef DEBUG
313 	{
314 		xfs_alloc_block_t	*bnoblock;
315 		xfs_alloc_block_t	*cntblock;
316 
317 		if (bno_cur->bc_nlevels == 1 &&
318 		    cnt_cur->bc_nlevels == 1) {
319 			bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]);
320 			cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]);
321 			XFS_WANT_CORRUPTED_RETURN(
322 				INT_GET(bnoblock->bb_numrecs, ARCH_CONVERT) == INT_GET(cntblock->bb_numrecs, ARCH_CONVERT));
323 		}
324 	}
325 #endif
326 	/*
327 	 * Deal with all four cases: the allocated record is contained
328 	 * within the freespace record, so we can have new freespace
329 	 * at either (or both) end, or no freespace remaining.
330 	 */
331 	if (rbno == fbno && rlen == flen)
332 		nfbno1 = nfbno2 = NULLAGBLOCK;
333 	else if (rbno == fbno) {
334 		nfbno1 = rbno + rlen;
335 		nflen1 = flen - rlen;
336 		nfbno2 = NULLAGBLOCK;
337 	} else if (rbno + rlen == fbno + flen) {
338 		nfbno1 = fbno;
339 		nflen1 = flen - rlen;
340 		nfbno2 = NULLAGBLOCK;
341 	} else {
342 		nfbno1 = fbno;
343 		nflen1 = rbno - fbno;
344 		nfbno2 = rbno + rlen;
345 		nflen2 = (fbno + flen) - nfbno2;
346 	}
347 	/*
348 	 * Delete the entry from the by-size btree.
349 	 */
350 	if ((error = xfs_alloc_delete(cnt_cur, &i)))
351 		return error;
352 	XFS_WANT_CORRUPTED_RETURN(i == 1);
353 	/*
354 	 * Add new by-size btree entry(s).
355 	 */
356 	if (nfbno1 != NULLAGBLOCK) {
357 		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
358 			return error;
359 		XFS_WANT_CORRUPTED_RETURN(i == 0);
360 		if ((error = xfs_alloc_insert(cnt_cur, &i)))
361 			return error;
362 		XFS_WANT_CORRUPTED_RETURN(i == 1);
363 	}
364 	if (nfbno2 != NULLAGBLOCK) {
365 		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
366 			return error;
367 		XFS_WANT_CORRUPTED_RETURN(i == 0);
368 		if ((error = xfs_alloc_insert(cnt_cur, &i)))
369 			return error;
370 		XFS_WANT_CORRUPTED_RETURN(i == 1);
371 	}
372 	/*
373 	 * Fix up the by-block btree entry(s).
374 	 */
375 	if (nfbno1 == NULLAGBLOCK) {
376 		/*
377 		 * No remaining freespace, just delete the by-block tree entry.
378 		 */
379 		if ((error = xfs_alloc_delete(bno_cur, &i)))
380 			return error;
381 		XFS_WANT_CORRUPTED_RETURN(i == 1);
382 	} else {
383 		/*
384 		 * Update the by-block entry to start later|be shorter.
385 		 */
386 		if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
387 			return error;
388 	}
389 	if (nfbno2 != NULLAGBLOCK) {
390 		/*
391 		 * 2 resulting free entries, need to add one.
392 		 */
393 		if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
394 			return error;
395 		XFS_WANT_CORRUPTED_RETURN(i == 0);
396 		if ((error = xfs_alloc_insert(bno_cur, &i)))
397 			return error;
398 		XFS_WANT_CORRUPTED_RETURN(i == 1);
399 	}
400 	return 0;
401 }
402 
403 /*
404  * Read in the allocation group free block array.
405  */
406 STATIC int				/* error */
xfs_alloc_read_agfl(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,xfs_buf_t ** bpp)407 xfs_alloc_read_agfl(
408 	xfs_mount_t	*mp,		/* mount point structure */
409 	xfs_trans_t	*tp,		/* transaction pointer */
410 	xfs_agnumber_t	agno,		/* allocation group number */
411 	xfs_buf_t	**bpp)		/* buffer for the ag free block array */
412 {
413 	xfs_buf_t	*bp;		/* return value */
414 	int		error;
415 
416 	ASSERT(agno != NULLAGNUMBER);
417 	error = xfs_trans_read_buf(
418 			mp, tp, mp->m_ddev_targp,
419 			XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
420 			XFS_FSS_TO_BB(mp, 1), 0, &bp);
421 	if (error)
422 		return error;
423 	ASSERT(bp);
424 	ASSERT(!XFS_BUF_GETERROR(bp));
425 	XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
426 	*bpp = bp;
427 	return 0;
428 }
429 
430 #if defined(XFS_ALLOC_TRACE)
431 /*
432  * Add an allocation trace entry for an alloc call.
433  */
434 STATIC void
xfs_alloc_trace_alloc(char * name,char * str,xfs_alloc_arg_t * args,int line)435 xfs_alloc_trace_alloc(
436 	char		*name,		/* function tag string */
437 	char		*str,		/* additional string */
438 	xfs_alloc_arg_t	*args,		/* allocation argument structure */
439 	int		line)		/* source line number */
440 {
441 	ktrace_enter(xfs_alloc_trace_buf,
442 		(void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)),
443 		(void *)name,
444 		(void *)str,
445 		(void *)args->mp,
446 		(void *)(__psunsigned_t)args->agno,
447 		(void *)(__psunsigned_t)args->agbno,
448 		(void *)(__psunsigned_t)args->minlen,
449 		(void *)(__psunsigned_t)args->maxlen,
450 		(void *)(__psunsigned_t)args->mod,
451 		(void *)(__psunsigned_t)args->prod,
452 		(void *)(__psunsigned_t)args->minleft,
453 		(void *)(__psunsigned_t)args->total,
454 		(void *)(__psunsigned_t)args->alignment,
455 		(void *)(__psunsigned_t)args->len,
456 		(void *)((((__psint_t)args->type) << 16) |
457 			 (__psint_t)args->otype),
458 		(void *)(__psint_t)((args->wasdel << 3) |
459 				    (args->wasfromfl << 2) |
460 				    (args->isfl << 1) |
461 				    (args->userdata << 0)));
462 }
463 
464 /*
465  * Add an allocation trace entry for a free call.
466  */
467 STATIC void
xfs_alloc_trace_free(char * name,char * str,xfs_mount_t * mp,xfs_agnumber_t agno,xfs_agblock_t agbno,xfs_extlen_t len,int isfl,int line)468 xfs_alloc_trace_free(
469 	char		*name,		/* function tag string */
470 	char		*str,		/* additional string */
471 	xfs_mount_t	*mp,		/* file system mount point */
472 	xfs_agnumber_t	agno,		/* allocation group number */
473 	xfs_agblock_t	agbno,		/* a.g. relative block number */
474 	xfs_extlen_t	len,		/* length of extent */
475 	int		isfl,		/* set if is freelist allocation/free */
476 	int		line)		/* source line number */
477 {
478 	ktrace_enter(xfs_alloc_trace_buf,
479 		(void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)),
480 		(void *)name,
481 		(void *)str,
482 		(void *)mp,
483 		(void *)(__psunsigned_t)agno,
484 		(void *)(__psunsigned_t)agbno,
485 		(void *)(__psunsigned_t)len,
486 		(void *)(__psint_t)isfl,
487 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
488 }
489 
490 /*
491  * Add an allocation trace entry for modifying an agf.
492  */
493 STATIC void
xfs_alloc_trace_modagf(char * name,char * str,xfs_mount_t * mp,xfs_agf_t * agf,int flags,int line)494 xfs_alloc_trace_modagf(
495 	char		*name,		/* function tag string */
496 	char		*str,		/* additional string */
497 	xfs_mount_t	*mp,		/* file system mount point */
498 	xfs_agf_t	*agf,		/* new agf value */
499 	int		flags,		/* logging flags for agf */
500 	int		line)		/* source line number */
501 {
502 	ktrace_enter(xfs_alloc_trace_buf,
503 		(void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)),
504 		(void *)name,
505 		(void *)str,
506 		(void *)mp,
507 		(void *)(__psint_t)flags,
508 		(void *)(__psunsigned_t)INT_GET(agf->agf_seqno, ARCH_CONVERT),
509 		(void *)(__psunsigned_t)INT_GET(agf->agf_length, ARCH_CONVERT),
510 		(void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_BNO],
511 						ARCH_CONVERT),
512 		(void *)(__psunsigned_t)INT_GET(agf->agf_roots[XFS_BTNUM_CNT],
513 						ARCH_CONVERT),
514 		(void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_BNO],
515 						ARCH_CONVERT),
516 		(void *)(__psunsigned_t)INT_GET(agf->agf_levels[XFS_BTNUM_CNT],
517 						ARCH_CONVERT),
518 		(void *)(__psunsigned_t)INT_GET(agf->agf_flfirst, ARCH_CONVERT),
519 		(void *)(__psunsigned_t)INT_GET(agf->agf_fllast, ARCH_CONVERT),
520 		(void *)(__psunsigned_t)INT_GET(agf->agf_flcount, ARCH_CONVERT),
521 		(void *)(__psunsigned_t)INT_GET(agf->agf_freeblks, ARCH_CONVERT),
522 		(void *)(__psunsigned_t)INT_GET(agf->agf_longest, ARCH_CONVERT));
523 }
524 
525 STATIC void
xfs_alloc_trace_busy(char * name,char * str,xfs_mount_t * mp,xfs_agnumber_t agno,xfs_agblock_t agbno,xfs_extlen_t len,int slot,xfs_trans_t * tp,int trtype,int line)526 xfs_alloc_trace_busy(
527 	char		*name,		/* function tag string */
528 	char		*str,		/* additional string */
529 	xfs_mount_t	*mp,		/* file system mount poing */
530 	xfs_agnumber_t	agno,		/* allocation group number */
531 	xfs_agblock_t	agbno,		/* a.g. relative block number */
532 	xfs_extlen_t	len,		/* length of extent */
533 	int		slot,		/* perag Busy slot */
534 	xfs_trans_t	*tp,
535 	int		trtype,		/* type: add, delete, search */
536 	int		line)		/* source line number */
537 {
538 	ktrace_enter(xfs_alloc_trace_buf,
539 		(void *)(__psint_t)(trtype | (line << 16)),
540 		(void *)name,
541 		(void *)str,
542 		(void *)mp,
543 		(void *)(__psunsigned_t)agno,
544 		(void *)(__psunsigned_t)agbno,
545 		(void *)(__psunsigned_t)len,
546 		(void *)(__psint_t)slot,
547 		(void *)tp,
548 		NULL, NULL, NULL, NULL, NULL, NULL, NULL);
549 }
550 #endif	/* XFS_ALLOC_TRACE */
551 
552 /*
553  * Allocation group level functions.
554  */
555 
556 /*
557  * Allocate a variable extent in the allocation group agno.
558  * Type and bno are used to determine where in the allocation group the
559  * extent will start.
560  * Extent's length (returned in *len) will be between minlen and maxlen,
561  * and of the form k * prod + mod unless there's nothing that large.
562  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
563  */
564 STATIC int			/* error */
xfs_alloc_ag_vextent(xfs_alloc_arg_t * args)565 xfs_alloc_ag_vextent(
566 	xfs_alloc_arg_t	*args)	/* argument structure for allocation */
567 {
568 	int		error=0;
569 #ifdef XFS_ALLOC_TRACE
570 	static char	fname[] = "xfs_alloc_ag_vextent";
571 #endif
572 
573 	ASSERT(args->minlen > 0);
574 	ASSERT(args->maxlen > 0);
575 	ASSERT(args->minlen <= args->maxlen);
576 	ASSERT(args->mod < args->prod);
577 	ASSERT(args->alignment > 0);
578 	/*
579 	 * Branch to correct routine based on the type.
580 	 */
581 	args->wasfromfl = 0;
582 	switch (args->type) {
583 	case XFS_ALLOCTYPE_THIS_AG:
584 		error = xfs_alloc_ag_vextent_size(args);
585 		break;
586 	case XFS_ALLOCTYPE_NEAR_BNO:
587 		error = xfs_alloc_ag_vextent_near(args);
588 		break;
589 	case XFS_ALLOCTYPE_THIS_BNO:
590 		error = xfs_alloc_ag_vextent_exact(args);
591 		break;
592 	default:
593 		ASSERT(0);
594 		/* NOTREACHED */
595 	}
596 	if (error)
597 		return error;
598 	/*
599 	 * If the allocation worked, need to change the agf structure
600 	 * (and log it), and the superblock.
601 	 */
602 	if (args->agbno != NULLAGBLOCK) {
603 		xfs_agf_t	*agf;	/* allocation group freelist header */
604 #ifdef XFS_ALLOC_TRACE
605 		xfs_mount_t	*mp = args->mp;
606 #endif
607 		long		slen = (long)args->len;
608 
609 		ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
610 		ASSERT(!(args->wasfromfl) || !args->isfl);
611 		ASSERT(args->agbno % args->alignment == 0);
612 		if (!(args->wasfromfl)) {
613 
614 			agf = XFS_BUF_TO_AGF(args->agbp);
615 			INT_MOD(agf->agf_freeblks, ARCH_CONVERT, -(args->len));
616 			xfs_trans_agblocks_delta(args->tp,
617 						 -((long)(args->len)));
618 			args->pag->pagf_freeblks -= args->len;
619 			ASSERT(INT_GET(agf->agf_freeblks, ARCH_CONVERT)
620 				<= INT_GET(agf->agf_length, ARCH_CONVERT));
621 			TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
622 			xfs_alloc_log_agf(args->tp, args->agbp,
623 						XFS_AGF_FREEBLKS);
624 			/* search the busylist for these blocks */
625 			xfs_alloc_search_busy(args->tp, args->agno,
626 					args->agbno, args->len);
627 		}
628 		if (!args->isfl)
629 			xfs_trans_mod_sb(args->tp,
630 				args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
631 					XFS_TRANS_SB_FDBLOCKS, -slen);
632 		XFS_STATS_INC(xs_allocx);
633 		XFS_STATS_ADD(xs_allocb, args->len);
634 	}
635 	return 0;
636 }
637 
638 /*
639  * Allocate a variable extent at exactly agno/bno.
640  * Extent's length (returned in *len) will be between minlen and maxlen,
641  * and of the form k * prod + mod unless there's nothing that large.
642  * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
643  */
644 STATIC int			/* error */
xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t * args)645 xfs_alloc_ag_vextent_exact(
646 	xfs_alloc_arg_t	*args)	/* allocation argument structure */
647 {
648 	xfs_btree_cur_t	*bno_cur;/* by block-number btree cursor */
649 	xfs_btree_cur_t	*cnt_cur;/* by count btree cursor */
650 	xfs_agblock_t	end;	/* end of allocated extent */
651 	int		error;
652 	xfs_agblock_t	fbno;	/* start block of found extent */
653 	xfs_agblock_t	fend;	/* end block of found extent */
654 	xfs_extlen_t	flen;	/* length of found extent */
655 #ifdef XFS_ALLOC_TRACE
656 	static char	fname[] = "xfs_alloc_ag_vextent_exact";
657 #endif
658 	int		i;	/* success/failure of operation */
659 	xfs_agblock_t	maxend;	/* end of maximal extent */
660 	xfs_agblock_t	minend;	/* end of minimal extent */
661 	xfs_extlen_t	rlen;	/* length of returned extent */
662 
663 	ASSERT(args->alignment == 1);
664 	/*
665 	 * Allocate/initialize a cursor for the by-number freespace btree.
666 	 */
667 	bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
668 		args->agno, XFS_BTNUM_BNO, NULL, 0);
669 	/*
670 	 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
671 	 * Look for the closest free block <= bno, it must contain bno
672 	 * if any free block does.
673 	 */
674 	if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i)))
675 		goto error0;
676 	if (!i) {
677 		/*
678 		 * Didn't find it, return null.
679 		 */
680 		xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
681 		args->agbno = NULLAGBLOCK;
682 		return 0;
683 	}
684 	/*
685 	 * Grab the freespace record.
686 	 */
687 	if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i)))
688 		goto error0;
689 	XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
690 	ASSERT(fbno <= args->agbno);
691 	minend = args->agbno + args->minlen;
692 	maxend = args->agbno + args->maxlen;
693 	fend = fbno + flen;
694 	/*
695 	 * Give up if the freespace isn't long enough for the minimum request.
696 	 */
697 	if (fend < minend) {
698 		xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
699 		args->agbno = NULLAGBLOCK;
700 		return 0;
701 	}
702 	/*
703 	 * End of extent will be smaller of the freespace end and the
704 	 * maximal requested end.
705 	 */
706 	end = XFS_AGBLOCK_MIN(fend, maxend);
707 	/*
708 	 * Fix the length according to mod and prod if given.
709 	 */
710 	args->len = end - args->agbno;
711 	xfs_alloc_fix_len(args);
712 	if (!xfs_alloc_fix_minleft(args)) {
713 		xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
714 		return 0;
715 	}
716 	rlen = args->len;
717 	ASSERT(args->agbno + rlen <= fend);
718 	end = args->agbno + rlen;
719 	/*
720 	 * We are allocating agbno for rlen [agbno .. end]
721 	 * Allocate/initialize a cursor for the by-size btree.
722 	 */
723 	cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
724 		args->agno, XFS_BTNUM_CNT, NULL, 0);
725 	ASSERT(args->agbno + args->len <=
726 		INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
727 			ARCH_CONVERT));
728 	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
729 			args->agbno, args->len, XFSA_FIXUP_BNO_OK))) {
730 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
731 		goto error0;
732 	}
733 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
734 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
735 	TRACE_ALLOC("normal", args);
736 	args->wasfromfl = 0;
737 	return 0;
738 
739 error0:
740 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
741 	TRACE_ALLOC("error", args);
742 	return error;
743 }
744 
745 /*
746  * Allocate a variable extent near bno in the allocation group agno.
747  * Extent's length (returned in len) will be between minlen and maxlen,
748  * and of the form k * prod + mod unless there's nothing that large.
749  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
750  */
751 STATIC int				/* error */
xfs_alloc_ag_vextent_near(xfs_alloc_arg_t * args)752 xfs_alloc_ag_vextent_near(
753 	xfs_alloc_arg_t	*args)		/* allocation argument structure */
754 {
755 	xfs_btree_cur_t	*bno_cur_gt;	/* cursor for bno btree, right side */
756 	xfs_btree_cur_t	*bno_cur_lt;	/* cursor for bno btree, left side */
757 	xfs_btree_cur_t	*cnt_cur;	/* cursor for count btree */
758 #ifdef XFS_ALLOC_TRACE
759 	static char	fname[] = "xfs_alloc_ag_vextent_near";
760 #endif
761 	xfs_agblock_t	gtbno;		/* start bno of right side entry */
762 	xfs_agblock_t	gtbnoa;		/* aligned ... */
763 	xfs_extlen_t	gtdiff;		/* difference to right side entry */
764 	xfs_extlen_t	gtlen;		/* length of right side entry */
765 	xfs_extlen_t	gtlena;		/* aligned ... */
766 	xfs_agblock_t	gtnew;		/* useful start bno of right side */
767 	int		error;		/* error code */
768 	int		i;		/* result code, temporary */
769 	int		j;		/* result code, temporary */
770 	xfs_agblock_t	ltbno;		/* start bno of left side entry */
771 	xfs_agblock_t	ltbnoa;		/* aligned ... */
772 	xfs_extlen_t	ltdiff;		/* difference to left side entry */
773 	/*REFERENCED*/
774 	xfs_agblock_t	ltend;		/* end bno of left side entry */
775 	xfs_extlen_t	ltlen;		/* length of left side entry */
776 	xfs_extlen_t	ltlena;		/* aligned ... */
777 	xfs_agblock_t	ltnew;		/* useful start bno of left side */
778 	xfs_extlen_t	rlen;		/* length of returned extent */
779 #if defined(DEBUG) && defined(__KERNEL__)
780 	/*
781 	 * Randomly don't execute the first algorithm.
782 	 */
783 	int		dofirst;	/* set to do first algorithm */
784 
785 	dofirst = random() & 1;
786 #endif
787 	/*
788 	 * Get a cursor for the by-size btree.
789 	 */
790 	cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
791 		args->agno, XFS_BTNUM_CNT, NULL, 0);
792 	ltlen = 0;
793 	bno_cur_lt = bno_cur_gt = NULL;
794 	/*
795 	 * See if there are any free extents as big as maxlen.
796 	 */
797 	if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
798 		goto error0;
799 	/*
800 	 * If none, then pick up the last entry in the tree unless the
801 	 * tree is empty.
802 	 */
803 	if (!i) {
804 		if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
805 				&ltlen, &i)))
806 			goto error0;
807 		if (i == 0 || ltlen == 0) {
808 			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
809 			return 0;
810 		}
811 		ASSERT(i == 1);
812 	}
813 	args->wasfromfl = 0;
814 	/*
815 	 * First algorithm.
816 	 * If the requested extent is large wrt the freespaces available
817 	 * in this a.g., then the cursor will be pointing to a btree entry
818 	 * near the right edge of the tree.  If it's in the last btree leaf
819 	 * block, then we just examine all the entries in that block
820 	 * that are big enough, and pick the best one.
821 	 * This is written as a while loop so we can break out of it,
822 	 * but we never loop back to the top.
823 	 */
824 	while (xfs_btree_islastblock(cnt_cur, 0)) {
825 		xfs_extlen_t	bdiff;
826 		int		besti=0;
827 		xfs_extlen_t	blen=0;
828 		xfs_agblock_t	bnew=0;
829 
830 #if defined(DEBUG) && defined(__KERNEL__)
831 		if (!dofirst)
832 			break;
833 #endif
834 		/*
835 		 * Start from the entry that lookup found, sequence through
836 		 * all larger free blocks.  If we're actually pointing at a
837 		 * record smaller than maxlen, go to the start of this block,
838 		 * and skip all those smaller than minlen.
839 		 */
840 		if (ltlen || args->alignment > 1) {
841 			cnt_cur->bc_ptrs[0] = 1;
842 			do {
843 				if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
844 						&ltlen, &i)))
845 					goto error0;
846 				XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
847 				if (ltlen >= args->minlen)
848 					break;
849 				if ((error = xfs_alloc_increment(cnt_cur, 0, &i)))
850 					goto error0;
851 			} while (i);
852 			ASSERT(ltlen >= args->minlen);
853 			if (!i)
854 				break;
855 		}
856 		i = cnt_cur->bc_ptrs[0];
857 		for (j = 1, blen = 0, bdiff = 0;
858 		     !error && j && (blen < args->maxlen || bdiff > 0);
859 		     error = xfs_alloc_increment(cnt_cur, 0, &j)) {
860 			/*
861 			 * For each entry, decide if it's better than
862 			 * the previous best entry.
863 			 */
864 			if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
865 				goto error0;
866 			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
867 			if (!xfs_alloc_compute_aligned(ltbno, ltlen,
868 					args->alignment, args->minlen,
869 					&ltbnoa, &ltlena))
870 				continue;
871 			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
872 			xfs_alloc_fix_len(args);
873 			ASSERT(args->len >= args->minlen);
874 			if (args->len < blen)
875 				continue;
876 			ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
877 				args->alignment, ltbno, ltlen, &ltnew);
878 			if (ltnew != NULLAGBLOCK &&
879 			    (args->len > blen || ltdiff < bdiff)) {
880 				bdiff = ltdiff;
881 				bnew = ltnew;
882 				blen = args->len;
883 				besti = cnt_cur->bc_ptrs[0];
884 			}
885 		}
886 		/*
887 		 * It didn't work.  We COULD be in a case where
888 		 * there's a good record somewhere, so try again.
889 		 */
890 		if (blen == 0)
891 			break;
892 		/*
893 		 * Point at the best entry, and retrieve it again.
894 		 */
895 		cnt_cur->bc_ptrs[0] = besti;
896 		if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
897 			goto error0;
898 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
899 		ltend = ltbno + ltlen;
900 		ASSERT(ltend <= INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
901 				ARCH_CONVERT));
902 		args->len = blen;
903 		if (!xfs_alloc_fix_minleft(args)) {
904 			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
905 			TRACE_ALLOC("nominleft", args);
906 			return 0;
907 		}
908 		blen = args->len;
909 		/*
910 		 * We are allocating starting at bnew for blen blocks.
911 		 */
912 		args->agbno = bnew;
913 		ASSERT(bnew >= ltbno);
914 		ASSERT(bnew + blen <= ltend);
915 		/*
916 		 * Set up a cursor for the by-bno tree.
917 		 */
918 		bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp,
919 			args->agbp, args->agno, XFS_BTNUM_BNO, NULL, 0);
920 		/*
921 		 * Fix up the btree entries.
922 		 */
923 		if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
924 				ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
925 			goto error0;
926 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
927 		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
928 		TRACE_ALLOC("first", args);
929 		return 0;
930 	}
931 	/*
932 	 * Second algorithm.
933 	 * Search in the by-bno tree to the left and to the right
934 	 * simultaneously, until in each case we find a space big enough,
935 	 * or run into the edge of the tree.  When we run into the edge,
936 	 * we deallocate that cursor.
937 	 * If both searches succeed, we compare the two spaces and pick
938 	 * the better one.
939 	 * With alignment, it's possible for both to fail; the upper
940 	 * level algorithm that picks allocation groups for allocations
941 	 * is not supposed to do this.
942 	 */
943 	/*
944 	 * Allocate and initialize the cursor for the leftward search.
945 	 */
946 	bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
947 		args->agno, XFS_BTNUM_BNO, NULL, 0);
948 	/*
949 	 * Lookup <= bno to find the leftward search's starting point.
950 	 */
951 	if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
952 		goto error0;
953 	if (!i) {
954 		/*
955 		 * Didn't find anything; use this cursor for the rightward
956 		 * search.
957 		 */
958 		bno_cur_gt = bno_cur_lt;
959 		bno_cur_lt = NULL;
960 	}
961 	/*
962 	 * Found something.  Duplicate the cursor for the rightward search.
963 	 */
964 	else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
965 		goto error0;
966 	/*
967 	 * Increment the cursor, so we will point at the entry just right
968 	 * of the leftward entry if any, or to the leftmost entry.
969 	 */
970 	if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
971 		goto error0;
972 	if (!i) {
973 		/*
974 		 * It failed, there are no rightward entries.
975 		 */
976 		xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
977 		bno_cur_gt = NULL;
978 	}
979 	/*
980 	 * Loop going left with the leftward cursor, right with the
981 	 * rightward cursor, until either both directions give up or
982 	 * we find an entry at least as big as minlen.
983 	 */
984 	do {
985 		if (bno_cur_lt) {
986 			if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
987 				goto error0;
988 			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
989 			if (xfs_alloc_compute_aligned(ltbno, ltlen,
990 					args->alignment, args->minlen,
991 					&ltbnoa, &ltlena))
992 				break;
993 			if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i)))
994 				goto error0;
995 			if (!i) {
996 				xfs_btree_del_cursor(bno_cur_lt,
997 						     XFS_BTREE_NOERROR);
998 				bno_cur_lt = NULL;
999 			}
1000 		}
1001 		if (bno_cur_gt) {
1002 			if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1003 				goto error0;
1004 			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1005 			if (xfs_alloc_compute_aligned(gtbno, gtlen,
1006 					args->alignment, args->minlen,
1007 					&gtbnoa, &gtlena))
1008 				break;
1009 			if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i)))
1010 				goto error0;
1011 			if (!i) {
1012 				xfs_btree_del_cursor(bno_cur_gt,
1013 						     XFS_BTREE_NOERROR);
1014 				bno_cur_gt = NULL;
1015 			}
1016 		}
1017 	} while (bno_cur_lt || bno_cur_gt);
1018 	/*
1019 	 * Got both cursors still active, need to find better entry.
1020 	 */
1021 	if (bno_cur_lt && bno_cur_gt) {
1022 		/*
1023 		 * Left side is long enough, look for a right side entry.
1024 		 */
1025 		if (ltlena >= args->minlen) {
1026 			/*
1027 			 * Fix up the length.
1028 			 */
1029 			args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1030 			xfs_alloc_fix_len(args);
1031 			rlen = args->len;
1032 			ltdiff = xfs_alloc_compute_diff(args->agbno, rlen,
1033 				args->alignment, ltbno, ltlen, &ltnew);
1034 			/*
1035 			 * Not perfect.
1036 			 */
1037 			if (ltdiff) {
1038 				/*
1039 				 * Look until we find a better one, run out of
1040 				 * space, or run off the end.
1041 				 */
1042 				while (bno_cur_lt && bno_cur_gt) {
1043 					if ((error = xfs_alloc_get_rec(
1044 							bno_cur_gt, &gtbno,
1045 							&gtlen, &i)))
1046 						goto error0;
1047 					XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1048 					xfs_alloc_compute_aligned(gtbno, gtlen,
1049 						args->alignment, args->minlen,
1050 						&gtbnoa, &gtlena);
1051 					/*
1052 					 * The left one is clearly better.
1053 					 */
1054 					if (gtbnoa >= args->agbno + ltdiff) {
1055 						xfs_btree_del_cursor(
1056 							bno_cur_gt,
1057 							XFS_BTREE_NOERROR);
1058 						bno_cur_gt = NULL;
1059 						break;
1060 					}
1061 					/*
1062 					 * If we reach a big enough entry,
1063 					 * compare the two and pick the best.
1064 					 */
1065 					if (gtlena >= args->minlen) {
1066 						args->len =
1067 							XFS_EXTLEN_MIN(gtlena,
1068 								args->maxlen);
1069 						xfs_alloc_fix_len(args);
1070 						rlen = args->len;
1071 						gtdiff = xfs_alloc_compute_diff(
1072 							args->agbno, rlen,
1073 							args->alignment,
1074 							gtbno, gtlen, &gtnew);
1075 						/*
1076 						 * Right side is better.
1077 						 */
1078 						if (gtdiff < ltdiff) {
1079 							xfs_btree_del_cursor(
1080 								bno_cur_lt,
1081 								XFS_BTREE_NOERROR);
1082 							bno_cur_lt = NULL;
1083 						}
1084 						/*
1085 						 * Left side is better.
1086 						 */
1087 						else {
1088 							xfs_btree_del_cursor(
1089 								bno_cur_gt,
1090 								XFS_BTREE_NOERROR);
1091 							bno_cur_gt = NULL;
1092 						}
1093 						break;
1094 					}
1095 					/*
1096 					 * Fell off the right end.
1097 					 */
1098 					if ((error = xfs_alloc_increment(
1099 							bno_cur_gt, 0, &i)))
1100 						goto error0;
1101 					if (!i) {
1102 						xfs_btree_del_cursor(
1103 							bno_cur_gt,
1104 							XFS_BTREE_NOERROR);
1105 						bno_cur_gt = NULL;
1106 						break;
1107 					}
1108 				}
1109 			}
1110 			/*
1111 			 * The left side is perfect, trash the right side.
1112 			 */
1113 			else {
1114 				xfs_btree_del_cursor(bno_cur_gt,
1115 						     XFS_BTREE_NOERROR);
1116 				bno_cur_gt = NULL;
1117 			}
1118 		}
1119 		/*
1120 		 * It's the right side that was found first, look left.
1121 		 */
1122 		else {
1123 			/*
1124 			 * Fix up the length.
1125 			 */
1126 			args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1127 			xfs_alloc_fix_len(args);
1128 			rlen = args->len;
1129 			gtdiff = xfs_alloc_compute_diff(args->agbno, rlen,
1130 				args->alignment, gtbno, gtlen, &gtnew);
1131 			/*
1132 			 * Right side entry isn't perfect.
1133 			 */
1134 			if (gtdiff) {
1135 				/*
1136 				 * Look until we find a better one, run out of
1137 				 * space, or run off the end.
1138 				 */
1139 				while (bno_cur_lt && bno_cur_gt) {
1140 					if ((error = xfs_alloc_get_rec(
1141 							bno_cur_lt, &ltbno,
1142 							&ltlen, &i)))
1143 						goto error0;
1144 					XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1145 					xfs_alloc_compute_aligned(ltbno, ltlen,
1146 						args->alignment, args->minlen,
1147 						&ltbnoa, &ltlena);
1148 					/*
1149 					 * The right one is clearly better.
1150 					 */
1151 					if (ltbnoa <= args->agbno - gtdiff) {
1152 						xfs_btree_del_cursor(
1153 							bno_cur_lt,
1154 							XFS_BTREE_NOERROR);
1155 						bno_cur_lt = NULL;
1156 						break;
1157 					}
1158 					/*
1159 					 * If we reach a big enough entry,
1160 					 * compare the two and pick the best.
1161 					 */
1162 					if (ltlena >= args->minlen) {
1163 						args->len = XFS_EXTLEN_MIN(
1164 							ltlena, args->maxlen);
1165 						xfs_alloc_fix_len(args);
1166 						rlen = args->len;
1167 						ltdiff = xfs_alloc_compute_diff(
1168 							args->agbno, rlen,
1169 							args->alignment,
1170 							ltbno, ltlen, &ltnew);
1171 						/*
1172 						 * Left side is better.
1173 						 */
1174 						if (ltdiff < gtdiff) {
1175 							xfs_btree_del_cursor(
1176 								bno_cur_gt,
1177 								XFS_BTREE_NOERROR);
1178 							bno_cur_gt = NULL;
1179 						}
1180 						/*
1181 						 * Right side is better.
1182 						 */
1183 						else {
1184 							xfs_btree_del_cursor(
1185 								bno_cur_lt,
1186 								XFS_BTREE_NOERROR);
1187 							bno_cur_lt = NULL;
1188 						}
1189 						break;
1190 					}
1191 					/*
1192 					 * Fell off the left end.
1193 					 */
1194 					if ((error = xfs_alloc_decrement(
1195 							bno_cur_lt, 0, &i)))
1196 						goto error0;
1197 					if (!i) {
1198 						xfs_btree_del_cursor(bno_cur_lt,
1199 							XFS_BTREE_NOERROR);
1200 						bno_cur_lt = NULL;
1201 						break;
1202 					}
1203 				}
1204 			}
1205 			/*
1206 			 * The right side is perfect, trash the left side.
1207 			 */
1208 			else {
1209 				xfs_btree_del_cursor(bno_cur_lt,
1210 					XFS_BTREE_NOERROR);
1211 				bno_cur_lt = NULL;
1212 			}
1213 		}
1214 	}
1215 	/*
1216 	 * If we couldn't get anything, give up.
1217 	 */
1218 	if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1219 		TRACE_ALLOC("neither", args);
1220 		args->agbno = NULLAGBLOCK;
1221 		return 0;
1222 	}
1223 	/*
1224 	 * At this point we have selected a freespace entry, either to the
1225 	 * left or to the right.  If it's on the right, copy all the
1226 	 * useful variables to the "left" set so we only have one
1227 	 * copy of this code.
1228 	 */
1229 	if (bno_cur_gt) {
1230 		bno_cur_lt = bno_cur_gt;
1231 		bno_cur_gt = NULL;
1232 		ltbno = gtbno;
1233 		ltbnoa = gtbnoa;
1234 		ltlen = gtlen;
1235 		ltlena = gtlena;
1236 		j = 1;
1237 	} else
1238 		j = 0;
1239 	/*
1240 	 * Fix up the length and compute the useful address.
1241 	 */
1242 	ltend = ltbno + ltlen;
1243 	args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1244 	xfs_alloc_fix_len(args);
1245 	if (!xfs_alloc_fix_minleft(args)) {
1246 		TRACE_ALLOC("nominleft", args);
1247 		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1248 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1249 		return 0;
1250 	}
1251 	rlen = args->len;
1252 	(void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
1253 		ltlen, &ltnew);
1254 	ASSERT(ltnew >= ltbno);
1255 	ASSERT(ltnew + rlen <= ltend);
1256 	ASSERT(ltnew + rlen <= INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
1257 		ARCH_CONVERT));
1258 	args->agbno = ltnew;
1259 	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1260 			ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1261 		goto error0;
1262 	TRACE_ALLOC(j ? "gt" : "lt", args);
1263 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1264 	xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1265 	return 0;
1266 
1267  error0:
1268 	TRACE_ALLOC("error", args);
1269 	if (cnt_cur != NULL)
1270 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1271 	if (bno_cur_lt != NULL)
1272 		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1273 	if (bno_cur_gt != NULL)
1274 		xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1275 	return error;
1276 }
1277 
1278 /*
1279  * Allocate a variable extent anywhere in the allocation group agno.
1280  * Extent's length (returned in len) will be between minlen and maxlen,
1281  * and of the form k * prod + mod unless there's nothing that large.
1282  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1283  */
1284 STATIC int				/* error */
xfs_alloc_ag_vextent_size(xfs_alloc_arg_t * args)1285 xfs_alloc_ag_vextent_size(
1286 	xfs_alloc_arg_t	*args)		/* allocation argument structure */
1287 {
1288 	xfs_btree_cur_t	*bno_cur;	/* cursor for bno btree */
1289 	xfs_btree_cur_t	*cnt_cur;	/* cursor for cnt btree */
1290 	int		error;		/* error result */
1291 	xfs_agblock_t	fbno;		/* start of found freespace */
1292 	xfs_extlen_t	flen;		/* length of found freespace */
1293 #ifdef XFS_ALLOC_TRACE
1294 	static char	fname[] = "xfs_alloc_ag_vextent_size";
1295 #endif
1296 	int		i;		/* temp status variable */
1297 	xfs_agblock_t	rbno;		/* returned block number */
1298 	xfs_extlen_t	rlen;		/* length of returned extent */
1299 
1300 	/*
1301 	 * Allocate and initialize a cursor for the by-size btree.
1302 	 */
1303 	cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
1304 		args->agno, XFS_BTNUM_CNT, NULL, 0);
1305 	bno_cur = NULL;
1306 	/*
1307 	 * Look for an entry >= maxlen+alignment-1 blocks.
1308 	 */
1309 	if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1310 			args->maxlen + args->alignment - 1, &i)))
1311 		goto error0;
1312 	/*
1313 	 * If none, then pick up the last entry in the tree unless the
1314 	 * tree is empty.
1315 	 */
1316 	if (!i) {
1317 		if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
1318 				&flen, &i)))
1319 			goto error0;
1320 		if (i == 0 || flen == 0) {
1321 			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1322 			TRACE_ALLOC("noentry", args);
1323 			return 0;
1324 		}
1325 		ASSERT(i == 1);
1326 	}
1327 	/*
1328 	 * There's a freespace as big as maxlen+alignment-1, get it.
1329 	 */
1330 	else {
1331 		if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
1332 			goto error0;
1333 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1334 	}
1335 	/*
1336 	 * In the first case above, we got the last entry in the
1337 	 * by-size btree.  Now we check to see if the space hits maxlen
1338 	 * once aligned; if not, we search left for something better.
1339 	 * This can't happen in the second case above.
1340 	 */
1341 	xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen,
1342 		&rbno, &rlen);
1343 	rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1344 	XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1345 			(rlen <= flen && rbno + rlen <= fbno + flen), error0);
1346 	if (rlen < args->maxlen) {
1347 		xfs_agblock_t	bestfbno;
1348 		xfs_extlen_t	bestflen;
1349 		xfs_agblock_t	bestrbno;
1350 		xfs_extlen_t	bestrlen;
1351 
1352 		bestrlen = rlen;
1353 		bestrbno = rbno;
1354 		bestflen = flen;
1355 		bestfbno = fbno;
1356 		for (;;) {
1357 			if ((error = xfs_alloc_decrement(cnt_cur, 0, &i)))
1358 				goto error0;
1359 			if (i == 0)
1360 				break;
1361 			if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1362 					&i)))
1363 				goto error0;
1364 			XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1365 			if (flen < bestrlen)
1366 				break;
1367 			xfs_alloc_compute_aligned(fbno, flen, args->alignment,
1368 				args->minlen, &rbno, &rlen);
1369 			rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1370 			XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1371 				(rlen <= flen && rbno + rlen <= fbno + flen),
1372 				error0);
1373 			if (rlen > bestrlen) {
1374 				bestrlen = rlen;
1375 				bestrbno = rbno;
1376 				bestflen = flen;
1377 				bestfbno = fbno;
1378 				if (rlen == args->maxlen)
1379 					break;
1380 			}
1381 		}
1382 		if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1383 				&i)))
1384 			goto error0;
1385 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1386 		rlen = bestrlen;
1387 		rbno = bestrbno;
1388 		flen = bestflen;
1389 		fbno = bestfbno;
1390 	}
1391 	args->wasfromfl = 0;
1392 	/*
1393 	 * Fix up the length.
1394 	 */
1395 	args->len = rlen;
1396 	xfs_alloc_fix_len(args);
1397 	if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
1398 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1399 		TRACE_ALLOC("nominleft", args);
1400 		args->agbno = NULLAGBLOCK;
1401 		return 0;
1402 	}
1403 	rlen = args->len;
1404 	XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1405 	/*
1406 	 * Allocate and initialize a cursor for the by-block tree.
1407 	 */
1408 	bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp,
1409 		args->agno, XFS_BTNUM_BNO, NULL, 0);
1410 	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1411 			rbno, rlen, XFSA_FIXUP_CNT_OK)))
1412 		goto error0;
1413 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1414 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1415 	cnt_cur = bno_cur = NULL;
1416 	args->len = rlen;
1417 	args->agbno = rbno;
1418 	XFS_WANT_CORRUPTED_GOTO(
1419 		args->agbno + args->len <=
1420 			INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
1421 			ARCH_CONVERT),
1422 		error0);
1423 	TRACE_ALLOC("normal", args);
1424 	return 0;
1425 
1426 error0:
1427 	TRACE_ALLOC("error", args);
1428 	if (cnt_cur)
1429 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1430 	if (bno_cur)
1431 		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1432 	return error;
1433 }
1434 
1435 /*
1436  * Deal with the case where only small freespaces remain.
1437  * Either return the contents of the last freespace record,
1438  * or allocate space from the freelist if there is nothing in the tree.
1439  */
1440 STATIC int			/* error */
xfs_alloc_ag_vextent_small(xfs_alloc_arg_t * args,xfs_btree_cur_t * ccur,xfs_agblock_t * fbnop,xfs_extlen_t * flenp,int * stat)1441 xfs_alloc_ag_vextent_small(
1442 	xfs_alloc_arg_t	*args,	/* allocation argument structure */
1443 	xfs_btree_cur_t	*ccur,	/* by-size cursor */
1444 	xfs_agblock_t	*fbnop,	/* result block number */
1445 	xfs_extlen_t	*flenp,	/* result length */
1446 	int		*stat)	/* status: 0-freelist, 1-normal/none */
1447 {
1448 	int		error;
1449 	xfs_agblock_t	fbno;
1450 	xfs_extlen_t	flen;
1451 #ifdef XFS_ALLOC_TRACE
1452 	static char	fname[] = "xfs_alloc_ag_vextent_small";
1453 #endif
1454 	int		i;
1455 
1456 	if ((error = xfs_alloc_decrement(ccur, 0, &i)))
1457 		goto error0;
1458 	if (i) {
1459 		if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1460 			goto error0;
1461 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1462 	}
1463 	/*
1464 	 * Nothing in the btree, try the freelist.  Make sure
1465 	 * to respect minleft even when pulling from the
1466 	 * freelist.
1467 	 */
1468 	else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
1469 		 (INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_flcount,
1470 			ARCH_CONVERT) > args->minleft)) {
1471 		if ((error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno)))
1472 			goto error0;
1473 		if (fbno != NULLAGBLOCK) {
1474 			if (args->userdata) {
1475 				xfs_buf_t	*bp;
1476 
1477 				bp = xfs_btree_get_bufs(args->mp, args->tp,
1478 					args->agno, fbno, 0);
1479 				xfs_trans_binval(args->tp, bp);
1480 			}
1481 			args->len = 1;
1482 			args->agbno = fbno;
1483 			XFS_WANT_CORRUPTED_GOTO(
1484 				args->agbno + args->len <=
1485 				INT_GET(XFS_BUF_TO_AGF(args->agbp)->agf_length,
1486 					ARCH_CONVERT),
1487 				error0);
1488 			args->wasfromfl = 1;
1489 			TRACE_ALLOC("freelist", args);
1490 			*stat = 0;
1491 			return 0;
1492 		}
1493 		/*
1494 		 * Nothing in the freelist.
1495 		 */
1496 		else
1497 			flen = 0;
1498 	}
1499 	/*
1500 	 * Can't allocate from the freelist for some reason.
1501 	 */
1502 	else
1503 		flen = 0;
1504 	/*
1505 	 * Can't do the allocation, give up.
1506 	 */
1507 	if (flen < args->minlen) {
1508 		args->agbno = NULLAGBLOCK;
1509 		TRACE_ALLOC("notenough", args);
1510 		flen = 0;
1511 	}
1512 	*fbnop = fbno;
1513 	*flenp = flen;
1514 	*stat = 1;
1515 	TRACE_ALLOC("normal", args);
1516 	return 0;
1517 
1518 error0:
1519 	TRACE_ALLOC("error", args);
1520 	return error;
1521 }
1522 
1523 /*
1524  * Free the extent starting at agno/bno for length.
1525  */
1526 STATIC int			/* error */
xfs_free_ag_extent(xfs_trans_t * tp,xfs_buf_t * agbp,xfs_agnumber_t agno,xfs_agblock_t bno,xfs_extlen_t len,int isfl)1527 xfs_free_ag_extent(
1528 	xfs_trans_t	*tp,	/* transaction pointer */
1529 	xfs_buf_t	*agbp,	/* buffer for a.g. freelist header */
1530 	xfs_agnumber_t	agno,	/* allocation group number */
1531 	xfs_agblock_t	bno,	/* starting block number */
1532 	xfs_extlen_t	len,	/* length of extent */
1533 	int		isfl)	/* set if is freelist blocks - no sb acctg */
1534 {
1535 	xfs_btree_cur_t	*bno_cur;	/* cursor for by-block btree */
1536 	xfs_btree_cur_t	*cnt_cur;	/* cursor for by-size btree */
1537 	int		error;		/* error return value */
1538 #ifdef XFS_ALLOC_TRACE
1539 	static char	fname[] = "xfs_free_ag_extent";
1540 #endif
1541 	xfs_agblock_t	gtbno;		/* start of right neighbor block */
1542 	xfs_extlen_t	gtlen;		/* length of right neighbor block */
1543 	int		haveleft;	/* have a left neighbor block */
1544 	int		haveright;	/* have a right neighbor block */
1545 	int		i;		/* temp, result code */
1546 	xfs_agblock_t	ltbno;		/* start of left neighbor block */
1547 	xfs_extlen_t	ltlen;		/* length of left neighbor block */
1548 	xfs_mount_t	*mp;		/* mount point struct for filesystem */
1549 	xfs_agblock_t	nbno;		/* new starting block of freespace */
1550 	xfs_extlen_t	nlen;		/* new length of freespace */
1551 
1552 	mp = tp->t_mountp;
1553 	/*
1554 	 * Allocate and initialize a cursor for the by-block btree.
1555 	 */
1556 	bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, NULL,
1557 		0);
1558 	cnt_cur = NULL;
1559 	/*
1560 	 * Look for a neighboring block on the left (lower block numbers)
1561 	 * that is contiguous with this space.
1562 	 */
1563 	if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1564 		goto error0;
1565 	if (haveleft) {
1566 		/*
1567 		 * There is a block to our left.
1568 		 */
1569 		if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1570 			goto error0;
1571 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1572 		/*
1573 		 * It's not contiguous, though.
1574 		 */
1575 		if (ltbno + ltlen < bno)
1576 			haveleft = 0;
1577 		else {
1578 			/*
1579 			 * If this failure happens the request to free this
1580 			 * space was invalid, it's (partly) already free.
1581 			 * Very bad.
1582 			 */
1583 			XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
1584 		}
1585 	}
1586 	/*
1587 	 * Look for a neighboring block on the right (higher block numbers)
1588 	 * that is contiguous with this space.
1589 	 */
1590 	if ((error = xfs_alloc_increment(bno_cur, 0, &haveright)))
1591 		goto error0;
1592 	if (haveright) {
1593 		/*
1594 		 * There is a block to our right.
1595 		 */
1596 		if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1597 			goto error0;
1598 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1599 		/*
1600 		 * It's not contiguous, though.
1601 		 */
1602 		if (bno + len < gtbno)
1603 			haveright = 0;
1604 		else {
1605 			/*
1606 			 * If this failure happens the request to free this
1607 			 * space was invalid, it's (partly) already free.
1608 			 * Very bad.
1609 			 */
1610 			XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
1611 		}
1612 	}
1613 	/*
1614 	 * Now allocate and initialize a cursor for the by-size tree.
1615 	 */
1616 	cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, NULL,
1617 		0);
1618 	/*
1619 	 * Have both left and right contiguous neighbors.
1620 	 * Merge all three into a single free block.
1621 	 */
1622 	if (haveleft && haveright) {
1623 		/*
1624 		 * Delete the old by-size entry on the left.
1625 		 */
1626 		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1627 			goto error0;
1628 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1629 		if ((error = xfs_alloc_delete(cnt_cur, &i)))
1630 			goto error0;
1631 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1632 		/*
1633 		 * Delete the old by-size entry on the right.
1634 		 */
1635 		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1636 			goto error0;
1637 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1638 		if ((error = xfs_alloc_delete(cnt_cur, &i)))
1639 			goto error0;
1640 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1641 		/*
1642 		 * Delete the old by-block entry for the right block.
1643 		 */
1644 		if ((error = xfs_alloc_delete(bno_cur, &i)))
1645 			goto error0;
1646 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1647 		/*
1648 		 * Move the by-block cursor back to the left neighbor.
1649 		 */
1650 		if ((error = xfs_alloc_decrement(bno_cur, 0, &i)))
1651 			goto error0;
1652 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1653 #ifdef DEBUG
1654 		/*
1655 		 * Check that this is the right record: delete didn't
1656 		 * mangle the cursor.
1657 		 */
1658 		{
1659 			xfs_agblock_t	xxbno;
1660 			xfs_extlen_t	xxlen;
1661 
1662 			if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1663 					&i)))
1664 				goto error0;
1665 			XFS_WANT_CORRUPTED_GOTO(
1666 				i == 1 && xxbno == ltbno && xxlen == ltlen,
1667 				error0);
1668 		}
1669 #endif
1670 		/*
1671 		 * Update remaining by-block entry to the new, joined block.
1672 		 */
1673 		nbno = ltbno;
1674 		nlen = len + ltlen + gtlen;
1675 		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1676 			goto error0;
1677 	}
1678 	/*
1679 	 * Have only a left contiguous neighbor.
1680 	 * Merge it together with the new freespace.
1681 	 */
1682 	else if (haveleft) {
1683 		/*
1684 		 * Delete the old by-size entry on the left.
1685 		 */
1686 		if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1687 			goto error0;
1688 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1689 		if ((error = xfs_alloc_delete(cnt_cur, &i)))
1690 			goto error0;
1691 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1692 		/*
1693 		 * Back up the by-block cursor to the left neighbor, and
1694 		 * update its length.
1695 		 */
1696 		if ((error = xfs_alloc_decrement(bno_cur, 0, &i)))
1697 			goto error0;
1698 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1699 		nbno = ltbno;
1700 		nlen = len + ltlen;
1701 		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1702 			goto error0;
1703 	}
1704 	/*
1705 	 * Have only a right contiguous neighbor.
1706 	 * Merge it together with the new freespace.
1707 	 */
1708 	else if (haveright) {
1709 		/*
1710 		 * Delete the old by-size entry on the right.
1711 		 */
1712 		if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1713 			goto error0;
1714 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1715 		if ((error = xfs_alloc_delete(cnt_cur, &i)))
1716 			goto error0;
1717 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1718 		/*
1719 		 * Update the starting block and length of the right
1720 		 * neighbor in the by-block tree.
1721 		 */
1722 		nbno = bno;
1723 		nlen = len + gtlen;
1724 		if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1725 			goto error0;
1726 	}
1727 	/*
1728 	 * No contiguous neighbors.
1729 	 * Insert the new freespace into the by-block tree.
1730 	 */
1731 	else {
1732 		nbno = bno;
1733 		nlen = len;
1734 		if ((error = xfs_alloc_insert(bno_cur, &i)))
1735 			goto error0;
1736 		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1737 	}
1738 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1739 	bno_cur = NULL;
1740 	/*
1741 	 * In all cases we need to insert the new freespace in the by-size tree.
1742 	 */
1743 	if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1744 		goto error0;
1745 	XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
1746 	if ((error = xfs_alloc_insert(cnt_cur, &i)))
1747 		goto error0;
1748 	XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1749 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1750 	cnt_cur = NULL;
1751 	/*
1752 	 * Update the freespace totals in the ag and superblock.
1753 	 */
1754 	{
1755 		xfs_agf_t	*agf;
1756 		xfs_perag_t	*pag;		/* per allocation group data */
1757 
1758 		agf = XFS_BUF_TO_AGF(agbp);
1759 		pag = &mp->m_perag[agno];
1760 		INT_MOD(agf->agf_freeblks, ARCH_CONVERT, len);
1761 		xfs_trans_agblocks_delta(tp, len);
1762 		pag->pagf_freeblks += len;
1763 		XFS_WANT_CORRUPTED_GOTO(
1764 			INT_GET(agf->agf_freeblks, ARCH_CONVERT)
1765 				<= INT_GET(agf->agf_length, ARCH_CONVERT),
1766 			error0);
1767 		TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS);
1768 		xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
1769 		if (!isfl)
1770 			xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1771 		XFS_STATS_INC(xs_freex);
1772 		XFS_STATS_ADD(xs_freeb, len);
1773 	}
1774 	TRACE_FREE(haveleft ?
1775 			(haveright ? "both" : "left") :
1776 			(haveright ? "right" : "none"),
1777 		agno, bno, len, isfl);
1778 
1779 	/*
1780 	 * Since blocks move to the free list without the coordination
1781 	 * used in xfs_bmap_finish, we can't allow block to be available
1782 	 * for reallocation and non-transaction writing (user data)
1783 	 * until we know that the transaction that moved it to the free
1784 	 * list is permanently on disk.  We track the blocks by declaring
1785 	 * these blocks as "busy"; the busy list is maintained on a per-ag
1786 	 * basis and each transaction records which entries should be removed
1787 	 * when the iclog commits to disk.  If a busy block is allocated,
1788 	 * the iclog is pushed up to the LSN that freed the block.
1789 	 */
1790 	xfs_alloc_mark_busy(tp, agno, bno, len);
1791 	return 0;
1792 
1793  error0:
1794 	TRACE_FREE("error", agno, bno, len, isfl);
1795 	if (bno_cur)
1796 		xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1797 	if (cnt_cur)
1798 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1799 	return error;
1800 }
1801 
1802 /*
1803  * Visible (exported) allocation/free functions.
1804  * Some of these are used just by xfs_alloc_btree.c and this file.
1805  */
1806 
1807 /*
1808  * Compute and fill in value of m_ag_maxlevels.
1809  */
1810 void
xfs_alloc_compute_maxlevels(xfs_mount_t * mp)1811 xfs_alloc_compute_maxlevels(
1812 	xfs_mount_t	*mp)	/* file system mount structure */
1813 {
1814 	int		level;
1815 	uint		maxblocks;
1816 	uint		maxleafents;
1817 	int		minleafrecs;
1818 	int		minnoderecs;
1819 
1820 	maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1821 	minleafrecs = mp->m_alloc_mnr[0];
1822 	minnoderecs = mp->m_alloc_mnr[1];
1823 	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1824 	for (level = 1; maxblocks > 1; level++)
1825 		maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1826 	mp->m_ag_maxlevels = level;
1827 }
1828 
1829 /*
1830  * Decide whether to use this allocation group for this allocation.
1831  * If so, fix up the btree freelist's size.
1832  */
1833 STATIC int			/* error */
xfs_alloc_fix_freelist(xfs_alloc_arg_t * args,int flags)1834 xfs_alloc_fix_freelist(
1835 	xfs_alloc_arg_t	*args,	/* allocation argument structure */
1836 	int		flags)	/* XFS_ALLOC_FLAG_... */
1837 {
1838 	xfs_buf_t	*agbp;	/* agf buffer pointer */
1839 	xfs_agf_t	*agf;	/* a.g. freespace structure pointer */
1840 	xfs_buf_t	*agflbp;/* agfl buffer pointer */
1841 	xfs_agblock_t	bno;	/* freelist block */
1842 	xfs_extlen_t	delta;	/* new blocks needed in freelist */
1843 	int		error;	/* error result code */
1844 	xfs_extlen_t	longest;/* longest extent in allocation group */
1845 	xfs_mount_t	*mp;	/* file system mount point structure */
1846 	xfs_extlen_t	need;	/* total blocks needed in freelist */
1847 	xfs_perag_t	*pag;	/* per-ag information structure */
1848 	xfs_alloc_arg_t	targs;	/* local allocation arguments */
1849 	xfs_trans_t	*tp;	/* transaction pointer */
1850 
1851 	mp = args->mp;
1852 
1853 	pag = args->pag;
1854 	tp = args->tp;
1855 	if (!pag->pagf_init) {
1856 		if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1857 				&agbp)))
1858 			return error;
1859 		if (!pag->pagf_init) {
1860 			args->agbp = NULL;
1861 			return 0;
1862 		}
1863 	} else
1864 		agbp = NULL;
1865 
1866 	/* If this is a metadata prefered pag and we are user data
1867 	 * then try somewhere else if we are not being asked to
1868 	 * try harder at this point
1869 	 */
1870 	if (pag->pagf_metadata && args->userdata && flags) {
1871 		args->agbp = NULL;
1872 		return 0;
1873 	}
1874 
1875 	need = XFS_MIN_FREELIST_PAG(pag, mp);
1876 	delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
1877 	/*
1878 	 * If it looks like there isn't a long enough extent, or enough
1879 	 * total blocks, reject it.
1880 	 */
1881 	longest = (pag->pagf_longest > delta) ?
1882 		(pag->pagf_longest - delta) :
1883 		(pag->pagf_flcount > 0 || pag->pagf_longest > 0);
1884 	if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
1885 	    (args->minleft &&
1886 	     (int)(pag->pagf_freeblks + pag->pagf_flcount -
1887 		   need - args->total) <
1888 	     (int)args->minleft)) {
1889 		if (agbp)
1890 			xfs_trans_brelse(tp, agbp);
1891 		args->agbp = NULL;
1892 		return 0;
1893 	}
1894 	/*
1895 	 * Get the a.g. freespace buffer.
1896 	 * Can fail if we're not blocking on locks, and it's held.
1897 	 */
1898 	if (agbp == NULL) {
1899 		if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1900 				&agbp)))
1901 			return error;
1902 		if (agbp == NULL) {
1903 			args->agbp = NULL;
1904 			return 0;
1905 		}
1906 	}
1907 	/*
1908 	 * Figure out how many blocks we should have in the freelist.
1909 	 */
1910 	agf = XFS_BUF_TO_AGF(agbp);
1911 	need = XFS_MIN_FREELIST(agf, mp);
1912 	delta = need > INT_GET(agf->agf_flcount, ARCH_CONVERT) ?
1913 		(need - INT_GET(agf->agf_flcount, ARCH_CONVERT)) : 0;
1914 	/*
1915 	 * If there isn't enough total or single-extent, reject it.
1916 	 */
1917 	longest = INT_GET(agf->agf_longest, ARCH_CONVERT);
1918 	longest = (longest > delta) ? (longest - delta) :
1919 		(INT_GET(agf->agf_flcount, ARCH_CONVERT) > 0 || longest > 0);
1920 	if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
1921 	     (args->minleft &&
1922 		(int)(INT_GET(agf->agf_freeblks, ARCH_CONVERT) +
1923 		   INT_GET(agf->agf_flcount, ARCH_CONVERT) - need - args->total) <
1924 	     (int)args->minleft)) {
1925 		xfs_trans_brelse(tp, agbp);
1926 		args->agbp = NULL;
1927 		return 0;
1928 	}
1929 	/*
1930 	 * Make the freelist shorter if it's too long.
1931 	 */
1932 	while (INT_GET(agf->agf_flcount, ARCH_CONVERT) > need) {
1933 		xfs_buf_t	*bp;
1934 
1935 		if ((error = xfs_alloc_get_freelist(tp, agbp, &bno)))
1936 			return error;
1937 		if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
1938 			return error;
1939 		bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1940 		xfs_trans_binval(tp, bp);
1941 	}
1942 	/*
1943 	 * Initialize the args structure.
1944 	 */
1945 	targs.tp = tp;
1946 	targs.mp = mp;
1947 	targs.agbp = agbp;
1948 	targs.agno = args->agno;
1949 	targs.mod = targs.minleft = targs.wasdel = targs.userdata =
1950 		targs.minalignslop = 0;
1951 	targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1952 	targs.type = XFS_ALLOCTYPE_THIS_AG;
1953 	targs.pag = pag;
1954 	if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
1955 		return error;
1956 	/*
1957 	 * Make the freelist longer if it's too short.
1958 	 */
1959 	while (INT_GET(agf->agf_flcount, ARCH_CONVERT) < need) {
1960 		targs.agbno = 0;
1961 		targs.maxlen = need - INT_GET(agf->agf_flcount, ARCH_CONVERT);
1962 		/*
1963 		 * Allocate as many blocks as possible at once.
1964 		 */
1965 		if ((error = xfs_alloc_ag_vextent(&targs)))
1966 			return error;
1967 		/*
1968 		 * Stop if we run out.  Won't happen if callers are obeying
1969 		 * the restrictions correctly.  Can happen for free calls
1970 		 * on a completely full ag.
1971 		 */
1972 		if (targs.agbno == NULLAGBLOCK)
1973 			break;
1974 		/*
1975 		 * Put each allocated block on the list.
1976 		 */
1977 		for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
1978 			if ((error = xfs_alloc_put_freelist(tp, agbp, agflbp,
1979 					bno)))
1980 				return error;
1981 		}
1982 	}
1983 	args->agbp = agbp;
1984 	return 0;
1985 }
1986 
1987 /*
1988  * Get a block from the freelist.
1989  * Returns with the buffer for the block gotten.
1990  */
1991 int				/* error */
xfs_alloc_get_freelist(xfs_trans_t * tp,xfs_buf_t * agbp,xfs_agblock_t * bnop)1992 xfs_alloc_get_freelist(
1993 	xfs_trans_t	*tp,	/* transaction pointer */
1994 	xfs_buf_t	*agbp,	/* buffer containing the agf structure */
1995 	xfs_agblock_t	*bnop)	/* block address retrieved from freelist */
1996 {
1997 	xfs_agf_t	*agf;	/* a.g. freespace structure */
1998 	xfs_agfl_t	*agfl;	/* a.g. freelist structure */
1999 	xfs_buf_t	*agflbp;/* buffer for a.g. freelist structure */
2000 	xfs_agblock_t	bno;	/* block number returned */
2001 	int		error;
2002 #ifdef XFS_ALLOC_TRACE
2003 	static char	fname[] = "xfs_alloc_get_freelist";
2004 #endif
2005 	xfs_mount_t	*mp;	/* mount structure */
2006 	xfs_perag_t	*pag;	/* per allocation group data */
2007 
2008 	agf = XFS_BUF_TO_AGF(agbp);
2009 	/*
2010 	 * Freelist is empty, give up.
2011 	 */
2012 	if (INT_ISZERO(agf->agf_flcount, ARCH_CONVERT)) {
2013 		*bnop = NULLAGBLOCK;
2014 		return 0;
2015 	}
2016 	/*
2017 	 * Read the array of free blocks.
2018 	 */
2019 	mp = tp->t_mountp;
2020 	if ((error = xfs_alloc_read_agfl(mp, tp,
2021 			INT_GET(agf->agf_seqno, ARCH_CONVERT), &agflbp)))
2022 		return error;
2023 	agfl = XFS_BUF_TO_AGFL(agflbp);
2024 	/*
2025 	 * Get the block number and update the data structures.
2026 	 */
2027 	bno = INT_GET(agfl->agfl_bno[INT_GET(agf->agf_flfirst, ARCH_CONVERT)], ARCH_CONVERT);
2028 	INT_MOD(agf->agf_flfirst, ARCH_CONVERT, 1);
2029 	xfs_trans_brelse(tp, agflbp);
2030 	if (INT_GET(agf->agf_flfirst, ARCH_CONVERT) == XFS_AGFL_SIZE(mp))
2031 		INT_ZERO(agf->agf_flfirst, ARCH_CONVERT);
2032 	pag = &mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)];
2033 	INT_MOD(agf->agf_flcount, ARCH_CONVERT, -1);
2034 	xfs_trans_agflist_delta(tp, -1);
2035 	pag->pagf_flcount--;
2036 	TRACE_MODAGF(NULL, agf, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
2037 	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT);
2038 	*bnop = bno;
2039 
2040 	/*
2041 	 * As blocks are freed, they are added to the per-ag busy list
2042 	 * and remain there until the freeing transaction is committed to
2043 	 * disk.  Now that we have allocated blocks, this list must be
2044 	 * searched to see if a block is being reused.  If one is, then
2045 	 * the freeing transaction must be pushed to disk NOW by forcing
2046 	 * to disk all iclogs up that transaction's LSN.
2047 	 */
2048 	xfs_alloc_search_busy(tp, INT_GET(agf->agf_seqno, ARCH_CONVERT), bno, 1);
2049 	return 0;
2050 }
2051 
2052 /*
2053  * Log the given fields from the agf structure.
2054  */
2055 void
xfs_alloc_log_agf(xfs_trans_t * tp,xfs_buf_t * bp,int fields)2056 xfs_alloc_log_agf(
2057 	xfs_trans_t	*tp,	/* transaction pointer */
2058 	xfs_buf_t	*bp,	/* buffer for a.g. freelist header */
2059 	int		fields)	/* mask of fields to be logged (XFS_AGF_...) */
2060 {
2061 	int	first;		/* first byte offset */
2062 	int	last;		/* last byte offset */
2063 	static const short	offsets[] = {
2064 		offsetof(xfs_agf_t, agf_magicnum),
2065 		offsetof(xfs_agf_t, agf_versionnum),
2066 		offsetof(xfs_agf_t, agf_seqno),
2067 		offsetof(xfs_agf_t, agf_length),
2068 		offsetof(xfs_agf_t, agf_roots[0]),
2069 		offsetof(xfs_agf_t, agf_levels[0]),
2070 		offsetof(xfs_agf_t, agf_flfirst),
2071 		offsetof(xfs_agf_t, agf_fllast),
2072 		offsetof(xfs_agf_t, agf_flcount),
2073 		offsetof(xfs_agf_t, agf_freeblks),
2074 		offsetof(xfs_agf_t, agf_longest),
2075 		sizeof(xfs_agf_t)
2076 	};
2077 
2078 	xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2079 	xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2080 }
2081 
2082 /*
2083  * Interface for inode allocation to force the pag data to be initialized.
2084  */
2085 int					/* error */
xfs_alloc_pagf_init(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,int flags)2086 xfs_alloc_pagf_init(
2087 	xfs_mount_t		*mp,	/* file system mount structure */
2088 	xfs_trans_t		*tp,	/* transaction pointer */
2089 	xfs_agnumber_t		agno,	/* allocation group number */
2090 	int			flags)	/* XFS_ALLOC_FLAGS_... */
2091 {
2092 	xfs_buf_t		*bp;
2093 	int			error;
2094 
2095 	if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2096 		return error;
2097 	if (bp)
2098 		xfs_trans_brelse(tp, bp);
2099 	return 0;
2100 }
2101 
2102 /*
2103  * Put the block on the freelist for the allocation group.
2104  */
2105 int					/* error */
xfs_alloc_put_freelist(xfs_trans_t * tp,xfs_buf_t * agbp,xfs_buf_t * agflbp,xfs_agblock_t bno)2106 xfs_alloc_put_freelist(
2107 	xfs_trans_t		*tp,	/* transaction pointer */
2108 	xfs_buf_t		*agbp,	/* buffer for a.g. freelist header */
2109 	xfs_buf_t		*agflbp,/* buffer for a.g. free block array */
2110 	xfs_agblock_t		bno)	/* block being freed */
2111 {
2112 	xfs_agf_t		*agf;	/* a.g. freespace structure */
2113 	xfs_agfl_t		*agfl;	/* a.g. free block array */
2114 	xfs_agblock_t		*blockp;/* pointer to array entry */
2115 	int			error;
2116 #ifdef XFS_ALLOC_TRACE
2117 	static char		fname[] = "xfs_alloc_put_freelist";
2118 #endif
2119 	xfs_mount_t		*mp;	/* mount structure */
2120 	xfs_perag_t		*pag;	/* per allocation group data */
2121 
2122 	agf = XFS_BUF_TO_AGF(agbp);
2123 	mp = tp->t_mountp;
2124 
2125 	if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
2126 			INT_GET(agf->agf_seqno, ARCH_CONVERT), &agflbp)))
2127 		return error;
2128 	agfl = XFS_BUF_TO_AGFL(agflbp);
2129 	INT_MOD(agf->agf_fllast, ARCH_CONVERT, 1);
2130 	if (INT_GET(agf->agf_fllast, ARCH_CONVERT) == XFS_AGFL_SIZE(mp))
2131 		INT_ZERO(agf->agf_fllast, ARCH_CONVERT);
2132 	pag = &mp->m_perag[INT_GET(agf->agf_seqno, ARCH_CONVERT)];
2133 	INT_MOD(agf->agf_flcount, ARCH_CONVERT, 1);
2134 	xfs_trans_agflist_delta(tp, 1);
2135 	pag->pagf_flcount++;
2136 	ASSERT(INT_GET(agf->agf_flcount, ARCH_CONVERT) <= XFS_AGFL_SIZE(mp));
2137 	blockp = &agfl->agfl_bno[INT_GET(agf->agf_fllast, ARCH_CONVERT)];
2138 	INT_SET(*blockp, ARCH_CONVERT, bno);
2139 	TRACE_MODAGF(NULL, agf, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
2140 	xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLLAST | XFS_AGF_FLCOUNT);
2141 	xfs_trans_log_buf(tp, agflbp,
2142 		(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
2143 		(int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
2144 			sizeof(xfs_agblock_t) - 1));
2145 	return 0;
2146 }
2147 
2148 /*
2149  * Read in the allocation group header (free/alloc section).
2150  */
2151 int					/* error */
xfs_alloc_read_agf(xfs_mount_t * mp,xfs_trans_t * tp,xfs_agnumber_t agno,int flags,xfs_buf_t ** bpp)2152 xfs_alloc_read_agf(
2153 	xfs_mount_t	*mp,		/* mount point structure */
2154 	xfs_trans_t	*tp,		/* transaction pointer */
2155 	xfs_agnumber_t	agno,		/* allocation group number */
2156 	int		flags,		/* XFS_ALLOC_FLAG_... */
2157 	xfs_buf_t	**bpp)		/* buffer for the ag freelist header */
2158 {
2159 	xfs_agf_t	*agf;		/* ag freelist header */
2160 	int		agf_ok;		/* set if agf is consistent */
2161 	xfs_buf_t	*bp;		/* return value */
2162 	xfs_perag_t	*pag;		/* per allocation group data */
2163 	int		error;
2164 
2165 	ASSERT(agno != NULLAGNUMBER);
2166 	error = xfs_trans_read_buf(
2167 			mp, tp, mp->m_ddev_targp,
2168 			XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
2169 			XFS_FSS_TO_BB(mp, 1),
2170 			(flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U,
2171 			&bp);
2172 	if (error)
2173 		return error;
2174 	ASSERT(!bp || !XFS_BUF_GETERROR(bp));
2175 	if (!bp) {
2176 		*bpp = NULL;
2177 		return 0;
2178 	}
2179 	/*
2180 	 * Validate the magic number of the agf block.
2181 	 */
2182 	agf = XFS_BUF_TO_AGF(bp);
2183 	agf_ok =
2184 		INT_GET(agf->agf_magicnum, ARCH_CONVERT) == XFS_AGF_MAGIC &&
2185 		XFS_AGF_GOOD_VERSION(
2186 			INT_GET(agf->agf_versionnum, ARCH_CONVERT)) &&
2187 		INT_GET(agf->agf_freeblks, ARCH_CONVERT) <=
2188 				INT_GET(agf->agf_length, ARCH_CONVERT) &&
2189 		INT_GET(agf->agf_flfirst, ARCH_CONVERT) < XFS_AGFL_SIZE(mp) &&
2190 		INT_GET(agf->agf_fllast,  ARCH_CONVERT) < XFS_AGFL_SIZE(mp) &&
2191 		INT_GET(agf->agf_flcount, ARCH_CONVERT) <= XFS_AGFL_SIZE(mp);
2192 	if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2193 			XFS_RANDOM_ALLOC_READ_AGF))) {
2194 		XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
2195 				     XFS_ERRLEVEL_LOW, mp, agf);
2196 		xfs_trans_brelse(tp, bp);
2197 		return XFS_ERROR(EFSCORRUPTED);
2198 	}
2199 	pag = &mp->m_perag[agno];
2200 	if (!pag->pagf_init) {
2201 		pag->pagf_freeblks = INT_GET(agf->agf_freeblks, ARCH_CONVERT);
2202 		pag->pagf_flcount = INT_GET(agf->agf_flcount, ARCH_CONVERT);
2203 		pag->pagf_longest = INT_GET(agf->agf_longest, ARCH_CONVERT);
2204 		pag->pagf_levels[XFS_BTNUM_BNOi] =
2205 			INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT);
2206 		pag->pagf_levels[XFS_BTNUM_CNTi] =
2207 			INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT);
2208 		spinlock_init(&pag->pagb_lock, "xfspagb");
2209 		pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS *
2210 					sizeof(xfs_perag_busy_t), KM_SLEEP);
2211 		pag->pagf_init = 1;
2212 	}
2213 #ifdef DEBUG
2214 	else if (!XFS_FORCED_SHUTDOWN(mp)) {
2215 		ASSERT(pag->pagf_freeblks == INT_GET(agf->agf_freeblks, ARCH_CONVERT));
2216 		ASSERT(pag->pagf_flcount == INT_GET(agf->agf_flcount, ARCH_CONVERT));
2217 		ASSERT(pag->pagf_longest == INT_GET(agf->agf_longest, ARCH_CONVERT));
2218 		ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
2219 		       INT_GET(agf->agf_levels[XFS_BTNUM_BNOi], ARCH_CONVERT));
2220 		ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
2221 		       INT_GET(agf->agf_levels[XFS_BTNUM_CNTi], ARCH_CONVERT));
2222 	}
2223 #endif
2224 	XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF);
2225 	*bpp = bp;
2226 	return 0;
2227 }
2228 
2229 /*
2230  * Allocate an extent (variable-size).
2231  * Depending on the allocation type, we either look in a single allocation
2232  * group or loop over the allocation groups to find the result.
2233  */
2234 int				/* error */
xfs_alloc_vextent(xfs_alloc_arg_t * args)2235 xfs_alloc_vextent(
2236 	xfs_alloc_arg_t	*args)	/* allocation argument structure */
2237 {
2238 	xfs_agblock_t	agsize;	/* allocation group size */
2239 	int		error;
2240 	int		flags;	/* XFS_ALLOC_FLAG_... locking flags */
2241 #ifdef XFS_ALLOC_TRACE
2242 	static char	fname[] = "xfs_alloc_vextent";
2243 #endif
2244 	xfs_extlen_t	minleft;/* minimum left value, temp copy */
2245 	xfs_mount_t	*mp;	/* mount structure pointer */
2246 	xfs_agnumber_t	sagno;	/* starting allocation group number */
2247 	xfs_alloctype_t	type;	/* input allocation type */
2248 	int		bump_rotor = 0;
2249 	int		no_min = 0;
2250 	xfs_agnumber_t	rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2251 
2252 	mp = args->mp;
2253 	type = args->otype = args->type;
2254 	args->agbno = NULLAGBLOCK;
2255 	/*
2256 	 * Just fix this up, for the case where the last a.g. is shorter
2257 	 * (or there's only one a.g.) and the caller couldn't easily figure
2258 	 * that out (xfs_bmap_alloc).
2259 	 */
2260 	agsize = mp->m_sb.sb_agblocks;
2261 	if (args->maxlen > agsize)
2262 		args->maxlen = agsize;
2263 	if (args->alignment == 0)
2264 		args->alignment = 1;
2265 	ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2266 	ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2267 	ASSERT(args->minlen <= args->maxlen);
2268 	ASSERT(args->minlen <= agsize);
2269 	ASSERT(args->mod < args->prod);
2270 	if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2271 	    XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2272 	    args->minlen > args->maxlen || args->minlen > agsize ||
2273 	    args->mod >= args->prod) {
2274 		args->fsbno = NULLFSBLOCK;
2275 		TRACE_ALLOC("badargs", args);
2276 		return 0;
2277 	}
2278 	minleft = args->minleft;
2279 
2280 	switch (type) {
2281 	case XFS_ALLOCTYPE_THIS_AG:
2282 	case XFS_ALLOCTYPE_NEAR_BNO:
2283 	case XFS_ALLOCTYPE_THIS_BNO:
2284 		/*
2285 		 * These three force us into a single a.g.
2286 		 */
2287 		args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2288 		down_read(&mp->m_peraglock);
2289 		args->pag = &mp->m_perag[args->agno];
2290 		args->minleft = 0;
2291 		error = xfs_alloc_fix_freelist(args, 0);
2292 		args->minleft = minleft;
2293 		if (error) {
2294 			TRACE_ALLOC("nofix", args);
2295 			goto error0;
2296 		}
2297 		if (!args->agbp) {
2298 			up_read(&mp->m_peraglock);
2299 			TRACE_ALLOC("noagbp", args);
2300 			break;
2301 		}
2302 		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2303 		if ((error = xfs_alloc_ag_vextent(args)))
2304 			goto error0;
2305 		up_read(&mp->m_peraglock);
2306 		break;
2307 	case XFS_ALLOCTYPE_START_BNO:
2308 		/*
2309 		 * Try near allocation first, then anywhere-in-ag after
2310 		 * the first a.g. fails.
2311 		 */
2312 		if ((args->userdata  == XFS_ALLOC_INITIAL_USER_DATA) &&
2313 		    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2314 			args->fsbno = XFS_AGB_TO_FSB(mp,
2315 					((mp->m_agfrotor / rotorstep) %
2316 					mp->m_sb.sb_agcount), 0);
2317 			bump_rotor = 1;
2318 		}
2319 		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2320 		args->type = XFS_ALLOCTYPE_NEAR_BNO;
2321 		/* FALLTHROUGH */
2322 	case XFS_ALLOCTYPE_ANY_AG:
2323 	case XFS_ALLOCTYPE_START_AG:
2324 	case XFS_ALLOCTYPE_FIRST_AG:
2325 		/*
2326 		 * Rotate through the allocation groups looking for a winner.
2327 		 */
2328 		if (type == XFS_ALLOCTYPE_ANY_AG) {
2329 			/*
2330 			 * Start with the last place we left off.
2331 			 */
2332 			args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2333 					mp->m_sb.sb_agcount;
2334 			args->type = XFS_ALLOCTYPE_THIS_AG;
2335 			flags = XFS_ALLOC_FLAG_TRYLOCK;
2336 		} else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2337 			/*
2338 			 * Start with allocation group given by bno.
2339 			 */
2340 			args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2341 			args->type = XFS_ALLOCTYPE_THIS_AG;
2342 			sagno = 0;
2343 			flags = 0;
2344 		} else {
2345 			if (type == XFS_ALLOCTYPE_START_AG)
2346 				args->type = XFS_ALLOCTYPE_THIS_AG;
2347 			/*
2348 			 * Start with the given allocation group.
2349 			 */
2350 			args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2351 			flags = XFS_ALLOC_FLAG_TRYLOCK;
2352 		}
2353 		/*
2354 		 * Loop over allocation groups twice; first time with
2355 		 * trylock set, second time without.
2356 		 */
2357 		down_read(&mp->m_peraglock);
2358 		for (;;) {
2359 			args->pag = &mp->m_perag[args->agno];
2360 			if (no_min) args->minleft = 0;
2361 			error = xfs_alloc_fix_freelist(args, flags);
2362 			args->minleft = minleft;
2363 			if (error) {
2364 				TRACE_ALLOC("nofix", args);
2365 				goto error0;
2366 			}
2367 			/*
2368 			 * If we get a buffer back then the allocation will fly.
2369 			 */
2370 			if (args->agbp) {
2371 				if ((error = xfs_alloc_ag_vextent(args)))
2372 					goto error0;
2373 				break;
2374 			}
2375 			TRACE_ALLOC("loopfailed", args);
2376 			/*
2377 			 * Didn't work, figure out the next iteration.
2378 			 */
2379 			if (args->agno == sagno &&
2380 			    type == XFS_ALLOCTYPE_START_BNO)
2381 				args->type = XFS_ALLOCTYPE_THIS_AG;
2382 			if (++(args->agno) == mp->m_sb.sb_agcount)
2383 				args->agno = 0;
2384 			/*
2385 			 * Reached the starting a.g., must either be done
2386 			 * or switch to non-trylock mode.
2387 			 */
2388 			if (args->agno == sagno) {
2389 				if (no_min == 1) {
2390 					args->agbno = NULLAGBLOCK;
2391 					TRACE_ALLOC("allfailed", args);
2392 					break;
2393 				}
2394 				if (flags == 0) {
2395 					no_min = 1;
2396 				} else {
2397 					flags = 0;
2398 					if (type == XFS_ALLOCTYPE_START_BNO) {
2399 						args->agbno = XFS_FSB_TO_AGBNO(mp,
2400 							args->fsbno);
2401 						args->type = XFS_ALLOCTYPE_NEAR_BNO;
2402 					}
2403 				}
2404 			}
2405 		}
2406 		up_read(&mp->m_peraglock);
2407 		if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2408 			if (args->agno == sagno)
2409 				mp->m_agfrotor = (mp->m_agfrotor + 1) %
2410 					(mp->m_sb.sb_agcount * rotorstep);
2411 			else
2412 				mp->m_agfrotor = (args->agno * rotorstep + 1) %
2413 					(mp->m_sb.sb_agcount * rotorstep);
2414 		}
2415 		break;
2416 	default:
2417 		ASSERT(0);
2418 		/* NOTREACHED */
2419 	}
2420 	if (args->agbno == NULLAGBLOCK)
2421 		args->fsbno = NULLFSBLOCK;
2422 	else {
2423 		args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2424 #ifdef DEBUG
2425 		ASSERT(args->len >= args->minlen);
2426 		ASSERT(args->len <= args->maxlen);
2427 		ASSERT(args->agbno % args->alignment == 0);
2428 		XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2429 			args->len);
2430 #endif
2431 	}
2432 	return 0;
2433 error0:
2434 	up_read(&mp->m_peraglock);
2435 	return error;
2436 }
2437 
2438 /*
2439  * Free an extent.
2440  * Just break up the extent address and hand off to xfs_free_ag_extent
2441  * after fixing up the freelist.
2442  */
2443 int				/* error */
xfs_free_extent(xfs_trans_t * tp,xfs_fsblock_t bno,xfs_extlen_t len)2444 xfs_free_extent(
2445 	xfs_trans_t	*tp,	/* transaction pointer */
2446 	xfs_fsblock_t	bno,	/* starting block number of extent */
2447 	xfs_extlen_t	len)	/* length of extent */
2448 {
2449 #ifdef DEBUG
2450 	xfs_agf_t	*agf;	/* a.g. freespace header */
2451 #endif
2452 	xfs_alloc_arg_t	args;	/* allocation argument structure */
2453 	int		error;
2454 
2455 	ASSERT(len != 0);
2456 	args.tp = tp;
2457 	args.mp = tp->t_mountp;
2458 	args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2459 	ASSERT(args.agno < args.mp->m_sb.sb_agcount);
2460 	args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
2461 	args.alignment = 1;
2462 	args.minlen = args.minleft = args.minalignslop = 0;
2463 	down_read(&args.mp->m_peraglock);
2464 	args.pag = &args.mp->m_perag[args.agno];
2465 	if ((error = xfs_alloc_fix_freelist(&args, 0)))
2466 		goto error0;
2467 #ifdef DEBUG
2468 	ASSERT(args.agbp != NULL);
2469 	agf = XFS_BUF_TO_AGF(args.agbp);
2470 	ASSERT(args.agbno + len <= INT_GET(agf->agf_length, ARCH_CONVERT));
2471 #endif
2472 	error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno,
2473 		len, 0);
2474 error0:
2475 	up_read(&args.mp->m_peraglock);
2476 	return error;
2477 }
2478 
2479 
2480 /*
2481  * AG Busy list management
2482  * The busy list contains block ranges that have been freed but whose
2483  * transacations have not yet hit disk.  If any block listed in a busy
2484  * list is reused, the transaction that freed it must be forced to disk
2485  * before continuing to use the block.
2486  *
2487  * xfs_alloc_mark_busy - add to the per-ag busy list
2488  * xfs_alloc_clear_busy - remove an item from the per-ag busy list
2489  */
2490 void
xfs_alloc_mark_busy(xfs_trans_t * tp,xfs_agnumber_t agno,xfs_agblock_t bno,xfs_extlen_t len)2491 xfs_alloc_mark_busy(xfs_trans_t *tp,
2492 		    xfs_agnumber_t agno,
2493 		    xfs_agblock_t bno,
2494 		    xfs_extlen_t len)
2495 {
2496 	xfs_mount_t		*mp;
2497 	xfs_perag_busy_t	*bsy;
2498 	int			n;
2499 	SPLDECL(s);
2500 
2501 	mp = tp->t_mountp;
2502 	s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
2503 
2504 	/* search pagb_list for an open slot */
2505 	for (bsy = mp->m_perag[agno].pagb_list, n = 0;
2506 	     n < XFS_PAGB_NUM_SLOTS;
2507 	     bsy++, n++) {
2508 		if (bsy->busy_tp == NULL) {
2509 			break;
2510 		}
2511 	}
2512 
2513 	if (n < XFS_PAGB_NUM_SLOTS) {
2514 		bsy = &mp->m_perag[agno].pagb_list[n];
2515 		mp->m_perag[agno].pagb_count++;
2516 		TRACE_BUSY("xfs_alloc_mark_busy", "got", agno, bno, len, n, tp);
2517 		bsy->busy_start = bno;
2518 		bsy->busy_length = len;
2519 		bsy->busy_tp = tp;
2520 		xfs_trans_add_busy(tp, agno, n);
2521 	} else {
2522 		TRACE_BUSY("xfs_alloc_mark_busy", "FULL", agno, bno, len, -1, tp);
2523 		/*
2524 		 * The busy list is full!  Since it is now not possible to
2525 		 * track the free block, make this a synchronous transaction
2526 		 * to insure that the block is not reused before this
2527 		 * transaction commits.
2528 		 */
2529 		xfs_trans_set_sync(tp);
2530 	}
2531 
2532 	mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
2533 }
2534 
2535 void
xfs_alloc_clear_busy(xfs_trans_t * tp,xfs_agnumber_t agno,int idx)2536 xfs_alloc_clear_busy(xfs_trans_t *tp,
2537 		     xfs_agnumber_t agno,
2538 		     int idx)
2539 {
2540 	xfs_mount_t		*mp;
2541 	xfs_perag_busy_t	*list;
2542 	SPLDECL(s);
2543 
2544 	mp = tp->t_mountp;
2545 
2546 	s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
2547 	list = mp->m_perag[agno].pagb_list;
2548 
2549 	ASSERT(idx < XFS_PAGB_NUM_SLOTS);
2550 	if (list[idx].busy_tp == tp) {
2551 		TRACE_UNBUSY("xfs_alloc_clear_busy", "found", agno, idx, tp);
2552 		list[idx].busy_tp = NULL;
2553 		mp->m_perag[agno].pagb_count--;
2554 	} else {
2555 		TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp);
2556 	}
2557 
2558 	mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
2559 }
2560 
2561 
2562 /*
2563  * returns non-zero if any of (agno,bno):len is in a busy list
2564  */
2565 int
xfs_alloc_search_busy(xfs_trans_t * tp,xfs_agnumber_t agno,xfs_agblock_t bno,xfs_extlen_t len)2566 xfs_alloc_search_busy(xfs_trans_t *tp,
2567 		    xfs_agnumber_t agno,
2568 		    xfs_agblock_t bno,
2569 		    xfs_extlen_t len)
2570 {
2571 	xfs_mount_t		*mp;
2572 	xfs_perag_busy_t	*bsy;
2573 	int			n;
2574 	xfs_agblock_t		uend, bend;
2575 	xfs_lsn_t		lsn;
2576 	int			cnt;
2577 	SPLDECL(s);
2578 
2579 	mp = tp->t_mountp;
2580 
2581 	s = mutex_spinlock(&mp->m_perag[agno].pagb_lock);
2582 	cnt = mp->m_perag[agno].pagb_count;
2583 
2584 	uend = bno + len - 1;
2585 
2586 	/* search pagb_list for this slot, skipping open slots */
2587 	for (bsy = mp->m_perag[agno].pagb_list, n = 0;
2588 	     cnt; bsy++, n++) {
2589 
2590 		/*
2591 		 * (start1,length1) within (start2, length2)
2592 		 */
2593 		if (bsy->busy_tp != NULL) {
2594 			bend = bsy->busy_start + bsy->busy_length - 1;
2595 			if ((bno > bend) ||
2596 			    (uend < bsy->busy_start)) {
2597 				cnt--;
2598 			} else {
2599 				TRACE_BUSYSEARCH("xfs_alloc_search_busy",
2600 						 "found1", agno, bno, len, n,
2601 						 tp);
2602 				break;
2603 			}
2604 		}
2605 	}
2606 
2607 	/*
2608 	 * If a block was found, force the log through the LSN of the
2609 	 * transaction that freed the block
2610 	 */
2611 	if (cnt) {
2612 		TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp);
2613 		lsn = bsy->busy_tp->t_commit_lsn;
2614 		mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
2615 		xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
2616 	} else {
2617 		TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp);
2618 		n = -1;
2619 		mutex_spinunlock(&mp->m_perag[agno].pagb_lock, s);
2620 	}
2621 
2622 	return n;
2623 }
2624