1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2006-2007 Silicon Graphics, Inc.
4  * Copyright (c) 2014 Christoph Hellwig.
5  * All Rights Reserved.
6  */
7 #include "xfs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_inode.h"
14 #include "xfs_bmap.h"
15 #include "xfs_alloc.h"
16 #include "xfs_mru_cache.h"
17 #include "xfs_trace.h"
18 #include "xfs_ag.h"
19 #include "xfs_ag_resv.h"
20 #include "xfs_trans.h"
21 #include "xfs_filestream.h"
22 
23 struct xfs_fstrm_item {
24 	struct xfs_mru_cache_elem	mru;
25 	xfs_agnumber_t			ag; /* AG in use for this directory */
26 };
27 
28 enum xfs_fstrm_alloc {
29 	XFS_PICK_USERDATA = 1,
30 	XFS_PICK_LOWSPACE = 2,
31 };
32 
33 /*
34  * Allocation group filestream associations are tracked with per-ag atomic
35  * counters.  These counters allow xfs_filestream_pick_ag() to tell whether a
36  * particular AG already has active filestreams associated with it.
37  */
38 int
xfs_filestream_peek_ag(xfs_mount_t * mp,xfs_agnumber_t agno)39 xfs_filestream_peek_ag(
40 	xfs_mount_t	*mp,
41 	xfs_agnumber_t	agno)
42 {
43 	struct xfs_perag *pag;
44 	int		ret;
45 
46 	pag = xfs_perag_get(mp, agno);
47 	ret = atomic_read(&pag->pagf_fstrms);
48 	xfs_perag_put(pag);
49 	return ret;
50 }
51 
52 static int
xfs_filestream_get_ag(xfs_mount_t * mp,xfs_agnumber_t agno)53 xfs_filestream_get_ag(
54 	xfs_mount_t	*mp,
55 	xfs_agnumber_t	agno)
56 {
57 	struct xfs_perag *pag;
58 	int		ret;
59 
60 	pag = xfs_perag_get(mp, agno);
61 	ret = atomic_inc_return(&pag->pagf_fstrms);
62 	xfs_perag_put(pag);
63 	return ret;
64 }
65 
66 static void
xfs_filestream_put_ag(xfs_mount_t * mp,xfs_agnumber_t agno)67 xfs_filestream_put_ag(
68 	xfs_mount_t	*mp,
69 	xfs_agnumber_t	agno)
70 {
71 	struct xfs_perag *pag;
72 
73 	pag = xfs_perag_get(mp, agno);
74 	atomic_dec(&pag->pagf_fstrms);
75 	xfs_perag_put(pag);
76 }
77 
78 static void
xfs_fstrm_free_func(void * data,struct xfs_mru_cache_elem * mru)79 xfs_fstrm_free_func(
80 	void			*data,
81 	struct xfs_mru_cache_elem *mru)
82 {
83 	struct xfs_mount	*mp = data;
84 	struct xfs_fstrm_item	*item =
85 		container_of(mru, struct xfs_fstrm_item, mru);
86 
87 	xfs_filestream_put_ag(mp, item->ag);
88 	trace_xfs_filestream_free(mp, mru->key, item->ag);
89 
90 	kmem_free(item);
91 }
92 
93 /*
94  * Scan the AGs starting at startag looking for an AG that isn't in use and has
95  * at least minlen blocks free.
96  */
97 static int
xfs_filestream_pick_ag(struct xfs_inode * ip,xfs_agnumber_t startag,xfs_agnumber_t * agp,int flags,xfs_extlen_t minlen)98 xfs_filestream_pick_ag(
99 	struct xfs_inode	*ip,
100 	xfs_agnumber_t		startag,
101 	xfs_agnumber_t		*agp,
102 	int			flags,
103 	xfs_extlen_t		minlen)
104 {
105 	struct xfs_mount	*mp = ip->i_mount;
106 	struct xfs_fstrm_item	*item;
107 	struct xfs_perag	*pag;
108 	xfs_extlen_t		longest, free = 0, minfree, maxfree = 0;
109 	xfs_agnumber_t		ag, max_ag = NULLAGNUMBER;
110 	int			err, trylock, nscan;
111 
112 	ASSERT(S_ISDIR(VFS_I(ip)->i_mode));
113 
114 	/* 2% of an AG's blocks must be free for it to be chosen. */
115 	minfree = mp->m_sb.sb_agblocks / 50;
116 
117 	ag = startag;
118 	*agp = NULLAGNUMBER;
119 
120 	/* For the first pass, don't sleep trying to init the per-AG. */
121 	trylock = XFS_ALLOC_FLAG_TRYLOCK;
122 
123 	for (nscan = 0; 1; nscan++) {
124 		trace_xfs_filestream_scan(mp, ip->i_ino, ag);
125 
126 		pag = xfs_perag_get(mp, ag);
127 
128 		if (!pag->pagf_init) {
129 			err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
130 			if (err) {
131 				if (err != -EAGAIN) {
132 					xfs_perag_put(pag);
133 					return err;
134 				}
135 				/* Couldn't lock the AGF, skip this AG. */
136 				goto next_ag;
137 			}
138 		}
139 
140 		/* Keep track of the AG with the most free blocks. */
141 		if (pag->pagf_freeblks > maxfree) {
142 			maxfree = pag->pagf_freeblks;
143 			max_ag = ag;
144 		}
145 
146 		/*
147 		 * The AG reference count does two things: it enforces mutual
148 		 * exclusion when examining the suitability of an AG in this
149 		 * loop, and it guards against two filestreams being established
150 		 * in the same AG as each other.
151 		 */
152 		if (xfs_filestream_get_ag(mp, ag) > 1) {
153 			xfs_filestream_put_ag(mp, ag);
154 			goto next_ag;
155 		}
156 
157 		longest = xfs_alloc_longest_free_extent(pag,
158 				xfs_alloc_min_freelist(mp, pag),
159 				xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
160 		if (((minlen && longest >= minlen) ||
161 		     (!minlen && pag->pagf_freeblks >= minfree)) &&
162 		    (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
163 		     (flags & XFS_PICK_LOWSPACE))) {
164 
165 			/* Break out, retaining the reference on the AG. */
166 			free = pag->pagf_freeblks;
167 			xfs_perag_put(pag);
168 			*agp = ag;
169 			break;
170 		}
171 
172 		/* Drop the reference on this AG, it's not usable. */
173 		xfs_filestream_put_ag(mp, ag);
174 next_ag:
175 		xfs_perag_put(pag);
176 		/* Move to the next AG, wrapping to AG 0 if necessary. */
177 		if (++ag >= mp->m_sb.sb_agcount)
178 			ag = 0;
179 
180 		/* If a full pass of the AGs hasn't been done yet, continue. */
181 		if (ag != startag)
182 			continue;
183 
184 		/* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */
185 		if (trylock != 0) {
186 			trylock = 0;
187 			continue;
188 		}
189 
190 		/* Finally, if lowspace wasn't set, set it for the 3rd pass. */
191 		if (!(flags & XFS_PICK_LOWSPACE)) {
192 			flags |= XFS_PICK_LOWSPACE;
193 			continue;
194 		}
195 
196 		/*
197 		 * Take the AG with the most free space, regardless of whether
198 		 * it's already in use by another filestream.
199 		 */
200 		if (max_ag != NULLAGNUMBER) {
201 			xfs_filestream_get_ag(mp, max_ag);
202 			free = maxfree;
203 			*agp = max_ag;
204 			break;
205 		}
206 
207 		/* take AG 0 if none matched */
208 		trace_xfs_filestream_pick(ip, *agp, free, nscan);
209 		*agp = 0;
210 		return 0;
211 	}
212 
213 	trace_xfs_filestream_pick(ip, *agp, free, nscan);
214 
215 	if (*agp == NULLAGNUMBER)
216 		return 0;
217 
218 	err = -ENOMEM;
219 	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
220 	if (!item)
221 		goto out_put_ag;
222 
223 	item->ag = *agp;
224 
225 	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);
226 	if (err) {
227 		if (err == -EEXIST)
228 			err = 0;
229 		goto out_free_item;
230 	}
231 
232 	return 0;
233 
234 out_free_item:
235 	kmem_free(item);
236 out_put_ag:
237 	xfs_filestream_put_ag(mp, *agp);
238 	return err;
239 }
240 
241 static struct xfs_inode *
xfs_filestream_get_parent(struct xfs_inode * ip)242 xfs_filestream_get_parent(
243 	struct xfs_inode	*ip)
244 {
245 	struct inode		*inode = VFS_I(ip), *dir = NULL;
246 	struct dentry		*dentry, *parent;
247 
248 	dentry = d_find_alias(inode);
249 	if (!dentry)
250 		goto out;
251 
252 	parent = dget_parent(dentry);
253 	if (!parent)
254 		goto out_dput;
255 
256 	dir = igrab(d_inode(parent));
257 	dput(parent);
258 
259 out_dput:
260 	dput(dentry);
261 out:
262 	return dir ? XFS_I(dir) : NULL;
263 }
264 
265 /*
266  * Find the right allocation group for a file, either by finding an
267  * existing file stream or creating a new one.
268  *
269  * Returns NULLAGNUMBER in case of an error.
270  */
271 xfs_agnumber_t
xfs_filestream_lookup_ag(struct xfs_inode * ip)272 xfs_filestream_lookup_ag(
273 	struct xfs_inode	*ip)
274 {
275 	struct xfs_mount	*mp = ip->i_mount;
276 	struct xfs_inode	*pip = NULL;
277 	xfs_agnumber_t		startag, ag = NULLAGNUMBER;
278 	struct xfs_mru_cache_elem *mru;
279 
280 	ASSERT(S_ISREG(VFS_I(ip)->i_mode));
281 
282 	pip = xfs_filestream_get_parent(ip);
283 	if (!pip)
284 		return NULLAGNUMBER;
285 
286 	mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
287 	if (mru) {
288 		ag = container_of(mru, struct xfs_fstrm_item, mru)->ag;
289 		xfs_mru_cache_done(mp->m_filestream);
290 
291 		trace_xfs_filestream_lookup(mp, ip->i_ino, ag);
292 		goto out;
293 	}
294 
295 	/*
296 	 * Set the starting AG using the rotor for inode32, otherwise
297 	 * use the directory inode's AG.
298 	 */
299 	if (xfs_is_inode32(mp)) {
300 		xfs_agnumber_t	 rotorstep = xfs_rotorstep;
301 		startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;
302 		mp->m_agfrotor = (mp->m_agfrotor + 1) %
303 		                 (mp->m_sb.sb_agcount * rotorstep);
304 	} else
305 		startag = XFS_INO_TO_AGNO(mp, pip->i_ino);
306 
307 	if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0))
308 		ag = NULLAGNUMBER;
309 out:
310 	xfs_irele(pip);
311 	return ag;
312 }
313 
314 /*
315  * Pick a new allocation group for the current file and its file stream.
316  *
317  * This is called when the allocator can't find a suitable extent in the
318  * current AG, and we have to move the stream into a new AG with more space.
319  */
320 int
xfs_filestream_new_ag(struct xfs_bmalloca * ap,xfs_agnumber_t * agp)321 xfs_filestream_new_ag(
322 	struct xfs_bmalloca	*ap,
323 	xfs_agnumber_t		*agp)
324 {
325 	struct xfs_inode	*ip = ap->ip, *pip;
326 	struct xfs_mount	*mp = ip->i_mount;
327 	xfs_extlen_t		minlen = ap->length;
328 	xfs_agnumber_t		startag = 0;
329 	int			flags = 0;
330 	int			err = 0;
331 	struct xfs_mru_cache_elem *mru;
332 
333 	*agp = NULLAGNUMBER;
334 
335 	pip = xfs_filestream_get_parent(ip);
336 	if (!pip)
337 		goto exit;
338 
339 	mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino);
340 	if (mru) {
341 		struct xfs_fstrm_item *item =
342 			container_of(mru, struct xfs_fstrm_item, mru);
343 		startag = (item->ag + 1) % mp->m_sb.sb_agcount;
344 	}
345 
346 	if (ap->datatype & XFS_ALLOC_USERDATA)
347 		flags |= XFS_PICK_USERDATA;
348 	if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
349 		flags |= XFS_PICK_LOWSPACE;
350 
351 	err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);
352 
353 	/*
354 	 * Only free the item here so we skip over the old AG earlier.
355 	 */
356 	if (mru)
357 		xfs_fstrm_free_func(mp, mru);
358 
359 	xfs_irele(pip);
360 exit:
361 	if (*agp == NULLAGNUMBER)
362 		*agp = 0;
363 	return err;
364 }
365 
366 void
xfs_filestream_deassociate(struct xfs_inode * ip)367 xfs_filestream_deassociate(
368 	struct xfs_inode	*ip)
369 {
370 	xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino);
371 }
372 
373 int
xfs_filestream_mount(xfs_mount_t * mp)374 xfs_filestream_mount(
375 	xfs_mount_t	*mp)
376 {
377 	/*
378 	 * The filestream timer tunable is currently fixed within the range of
379 	 * one second to four minutes, with five seconds being the default.  The
380 	 * group count is somewhat arbitrary, but it'd be nice to adhere to the
381 	 * timer tunable to within about 10 percent.  This requires at least 10
382 	 * groups.
383 	 */
384 	return xfs_mru_cache_create(&mp->m_filestream, mp,
385 			xfs_fstrm_centisecs * 10, 10, xfs_fstrm_free_func);
386 }
387 
388 void
xfs_filestream_unmount(xfs_mount_t * mp)389 xfs_filestream_unmount(
390 	xfs_mount_t	*mp)
391 {
392 	xfs_mru_cache_destroy(mp->m_filestream);
393 }
394