1 /*
2  * linux/fs/nfs/pagelist.c
3  *
4  * A set of helper functions for managing NFS read and write requests.
5  * The main purpose of these routines is to provide support for the
6  * coalescing of several requests into a single RPC call.
7  *
8  * Copyright 2000, 2001 (c) Trond Myklebust <trond.myklebust@fys.uio.no>
9  *
10  */
11 
12 #include <linux/slab.h>
13 #include <linux/file.h>
14 #include <linux/sched.h>
15 #include <linux/sunrpc/clnt.h>
16 #include <linux/nfs.h>
17 #include <linux/nfs3.h>
18 #include <linux/nfs4.h>
19 #include <linux/nfs_page.h>
20 #include <linux/nfs_fs.h>
21 #include <linux/nfs_mount.h>
22 #include <linux/export.h>
23 
24 #include "internal.h"
25 #include "pnfs.h"
26 
27 static struct kmem_cache *nfs_page_cachep;
28 
29 static inline struct nfs_page *
nfs_page_alloc(void)30 nfs_page_alloc(void)
31 {
32 	struct nfs_page	*p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL);
33 	if (p)
34 		INIT_LIST_HEAD(&p->wb_list);
35 	return p;
36 }
37 
38 static inline void
nfs_page_free(struct nfs_page * p)39 nfs_page_free(struct nfs_page *p)
40 {
41 	kmem_cache_free(nfs_page_cachep, p);
42 }
43 
44 /**
45  * nfs_create_request - Create an NFS read/write request.
46  * @ctx: open context to use
47  * @inode: inode to which the request is attached
48  * @page: page to write
49  * @offset: starting offset within the page for the write
50  * @count: number of bytes to read/write
51  *
52  * The page must be locked by the caller. This makes sure we never
53  * create two different requests for the same page.
54  * User should ensure it is safe to sleep in this function.
55  */
56 struct nfs_page *
nfs_create_request(struct nfs_open_context * ctx,struct inode * inode,struct page * page,unsigned int offset,unsigned int count)57 nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
58 		   struct page *page,
59 		   unsigned int offset, unsigned int count)
60 {
61 	struct nfs_page		*req;
62 
63 	/* try to allocate the request struct */
64 	req = nfs_page_alloc();
65 	if (req == NULL)
66 		return ERR_PTR(-ENOMEM);
67 
68 	/* get lock context early so we can deal with alloc failures */
69 	req->wb_lock_context = nfs_get_lock_context(ctx);
70 	if (req->wb_lock_context == NULL) {
71 		nfs_page_free(req);
72 		return ERR_PTR(-ENOMEM);
73 	}
74 
75 	/* Initialize the request struct. Initially, we assume a
76 	 * long write-back delay. This will be adjusted in
77 	 * update_nfs_request below if the region is not locked. */
78 	req->wb_page    = page;
79 	atomic_set(&req->wb_complete, 0);
80 	req->wb_index	= page->index;
81 	page_cache_get(page);
82 	BUG_ON(PagePrivate(page));
83 	BUG_ON(!PageLocked(page));
84 	BUG_ON(page->mapping->host != inode);
85 	req->wb_offset  = offset;
86 	req->wb_pgbase	= offset;
87 	req->wb_bytes   = count;
88 	req->wb_context = get_nfs_open_context(ctx);
89 	kref_init(&req->wb_kref);
90 	return req;
91 }
92 
93 /**
94  * nfs_unlock_request - Unlock request and wake up sleepers.
95  * @req:
96  */
nfs_unlock_request(struct nfs_page * req)97 void nfs_unlock_request(struct nfs_page *req)
98 {
99 	if (!NFS_WBACK_BUSY(req)) {
100 		printk(KERN_ERR "NFS: Invalid unlock attempted\n");
101 		BUG();
102 	}
103 	smp_mb__before_clear_bit();
104 	clear_bit(PG_BUSY, &req->wb_flags);
105 	smp_mb__after_clear_bit();
106 	wake_up_bit(&req->wb_flags, PG_BUSY);
107 	nfs_release_request(req);
108 }
109 
110 /*
111  * nfs_clear_request - Free up all resources allocated to the request
112  * @req:
113  *
114  * Release page and open context resources associated with a read/write
115  * request after it has completed.
116  */
nfs_clear_request(struct nfs_page * req)117 static void nfs_clear_request(struct nfs_page *req)
118 {
119 	struct page *page = req->wb_page;
120 	struct nfs_open_context *ctx = req->wb_context;
121 	struct nfs_lock_context *l_ctx = req->wb_lock_context;
122 
123 	if (page != NULL) {
124 		page_cache_release(page);
125 		req->wb_page = NULL;
126 	}
127 	if (l_ctx != NULL) {
128 		nfs_put_lock_context(l_ctx);
129 		req->wb_lock_context = NULL;
130 	}
131 	if (ctx != NULL) {
132 		put_nfs_open_context(ctx);
133 		req->wb_context = NULL;
134 	}
135 }
136 
137 
138 /**
139  * nfs_release_request - Release the count on an NFS read/write request
140  * @req: request to release
141  *
142  * Note: Should never be called with the spinlock held!
143  */
nfs_free_request(struct kref * kref)144 static void nfs_free_request(struct kref *kref)
145 {
146 	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
147 
148 	/* Release struct file and open context */
149 	nfs_clear_request(req);
150 	nfs_page_free(req);
151 }
152 
nfs_release_request(struct nfs_page * req)153 void nfs_release_request(struct nfs_page *req)
154 {
155 	kref_put(&req->wb_kref, nfs_free_request);
156 }
157 
nfs_wait_bit_uninterruptible(void * word)158 static int nfs_wait_bit_uninterruptible(void *word)
159 {
160 	io_schedule();
161 	return 0;
162 }
163 
164 /**
165  * nfs_wait_on_request - Wait for a request to complete.
166  * @req: request to wait upon.
167  *
168  * Interruptible by fatal signals only.
169  * The user is responsible for holding a count on the request.
170  */
171 int
nfs_wait_on_request(struct nfs_page * req)172 nfs_wait_on_request(struct nfs_page *req)
173 {
174 	return wait_on_bit(&req->wb_flags, PG_BUSY,
175 			nfs_wait_bit_uninterruptible,
176 			TASK_UNINTERRUPTIBLE);
177 }
178 
nfs_generic_pg_test(struct nfs_pageio_descriptor * desc,struct nfs_page * prev,struct nfs_page * req)179 bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
180 {
181 	/*
182 	 * FIXME: ideally we should be able to coalesce all requests
183 	 * that are not block boundary aligned, but currently this
184 	 * is problematic for the case of bsize < PAGE_CACHE_SIZE,
185 	 * since nfs_flush_multi and nfs_pagein_multi assume you
186 	 * can have only one struct nfs_page.
187 	 */
188 	if (desc->pg_bsize < PAGE_SIZE)
189 		return 0;
190 
191 	return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
192 }
193 EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
194 
195 /**
196  * nfs_pageio_init - initialise a page io descriptor
197  * @desc: pointer to descriptor
198  * @inode: pointer to inode
199  * @doio: pointer to io function
200  * @bsize: io block size
201  * @io_flags: extra parameters for the io function
202  */
nfs_pageio_init(struct nfs_pageio_descriptor * desc,struct inode * inode,const struct nfs_pageio_ops * pg_ops,size_t bsize,int io_flags)203 void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
204 		     struct inode *inode,
205 		     const struct nfs_pageio_ops *pg_ops,
206 		     size_t bsize,
207 		     int io_flags)
208 {
209 	INIT_LIST_HEAD(&desc->pg_list);
210 	desc->pg_bytes_written = 0;
211 	desc->pg_count = 0;
212 	desc->pg_bsize = bsize;
213 	desc->pg_base = 0;
214 	desc->pg_moreio = 0;
215 	desc->pg_recoalesce = 0;
216 	desc->pg_inode = inode;
217 	desc->pg_ops = pg_ops;
218 	desc->pg_ioflags = io_flags;
219 	desc->pg_error = 0;
220 	desc->pg_lseg = NULL;
221 }
222 
223 /**
224  * nfs_can_coalesce_requests - test two requests for compatibility
225  * @prev: pointer to nfs_page
226  * @req: pointer to nfs_page
227  *
228  * The nfs_page structures 'prev' and 'req' are compared to ensure that the
229  * page data area they describe is contiguous, and that their RPC
230  * credentials, NFSv4 open state, and lockowners are the same.
231  *
232  * Return 'true' if this is the case, else return 'false'.
233  */
nfs_can_coalesce_requests(struct nfs_page * prev,struct nfs_page * req,struct nfs_pageio_descriptor * pgio)234 static bool nfs_can_coalesce_requests(struct nfs_page *prev,
235 				      struct nfs_page *req,
236 				      struct nfs_pageio_descriptor *pgio)
237 {
238 	if (req->wb_context->cred != prev->wb_context->cred)
239 		return false;
240 	if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
241 		return false;
242 	if (req->wb_context->state != prev->wb_context->state)
243 		return false;
244 	if (req->wb_index != (prev->wb_index + 1))
245 		return false;
246 	if (req->wb_pgbase != 0)
247 		return false;
248 	if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
249 		return false;
250 	return pgio->pg_ops->pg_test(pgio, prev, req);
251 }
252 
253 /**
254  * nfs_pageio_do_add_request - Attempt to coalesce a request into a page list.
255  * @desc: destination io descriptor
256  * @req: request
257  *
258  * Returns true if the request 'req' was successfully coalesced into the
259  * existing list of pages 'desc'.
260  */
nfs_pageio_do_add_request(struct nfs_pageio_descriptor * desc,struct nfs_page * req)261 static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
262 				     struct nfs_page *req)
263 {
264 	if (desc->pg_count != 0) {
265 		struct nfs_page *prev;
266 
267 		prev = nfs_list_entry(desc->pg_list.prev);
268 		if (!nfs_can_coalesce_requests(prev, req, desc))
269 			return 0;
270 	} else {
271 		if (desc->pg_ops->pg_init)
272 			desc->pg_ops->pg_init(desc, req);
273 		desc->pg_base = req->wb_pgbase;
274 	}
275 	nfs_list_remove_request(req);
276 	nfs_list_add_request(req, &desc->pg_list);
277 	desc->pg_count += req->wb_bytes;
278 	return 1;
279 }
280 
281 /*
282  * Helper for nfs_pageio_add_request and nfs_pageio_complete
283  */
nfs_pageio_doio(struct nfs_pageio_descriptor * desc)284 static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
285 {
286 	if (!list_empty(&desc->pg_list)) {
287 		int error = desc->pg_ops->pg_doio(desc);
288 		if (error < 0)
289 			desc->pg_error = error;
290 		else
291 			desc->pg_bytes_written += desc->pg_count;
292 	}
293 	if (list_empty(&desc->pg_list)) {
294 		desc->pg_count = 0;
295 		desc->pg_base = 0;
296 	}
297 }
298 
299 /**
300  * nfs_pageio_add_request - Attempt to coalesce a request into a page list.
301  * @desc: destination io descriptor
302  * @req: request
303  *
304  * Returns true if the request 'req' was successfully coalesced into the
305  * existing list of pages 'desc'.
306  */
__nfs_pageio_add_request(struct nfs_pageio_descriptor * desc,struct nfs_page * req)307 static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
308 			   struct nfs_page *req)
309 {
310 	while (!nfs_pageio_do_add_request(desc, req)) {
311 		desc->pg_moreio = 1;
312 		nfs_pageio_doio(desc);
313 		if (desc->pg_error < 0)
314 			return 0;
315 		desc->pg_moreio = 0;
316 		if (desc->pg_recoalesce)
317 			return 0;
318 	}
319 	return 1;
320 }
321 
nfs_do_recoalesce(struct nfs_pageio_descriptor * desc)322 static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
323 {
324 	LIST_HEAD(head);
325 
326 	do {
327 		list_splice_init(&desc->pg_list, &head);
328 		desc->pg_bytes_written -= desc->pg_count;
329 		desc->pg_count = 0;
330 		desc->pg_base = 0;
331 		desc->pg_recoalesce = 0;
332 
333 		while (!list_empty(&head)) {
334 			struct nfs_page *req;
335 
336 			req = list_first_entry(&head, struct nfs_page, wb_list);
337 			nfs_list_remove_request(req);
338 			if (__nfs_pageio_add_request(desc, req))
339 				continue;
340 			if (desc->pg_error < 0)
341 				return 0;
342 			break;
343 		}
344 	} while (desc->pg_recoalesce);
345 	return 1;
346 }
347 
nfs_pageio_add_request(struct nfs_pageio_descriptor * desc,struct nfs_page * req)348 int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
349 		struct nfs_page *req)
350 {
351 	int ret;
352 
353 	do {
354 		ret = __nfs_pageio_add_request(desc, req);
355 		if (ret)
356 			break;
357 		if (desc->pg_error < 0)
358 			break;
359 		ret = nfs_do_recoalesce(desc);
360 	} while (ret);
361 	return ret;
362 }
363 
364 /**
365  * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
366  * @desc: pointer to io descriptor
367  */
nfs_pageio_complete(struct nfs_pageio_descriptor * desc)368 void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
369 {
370 	for (;;) {
371 		nfs_pageio_doio(desc);
372 		if (!desc->pg_recoalesce)
373 			break;
374 		if (!nfs_do_recoalesce(desc))
375 			break;
376 	}
377 }
378 
379 /**
380  * nfs_pageio_cond_complete - Conditional I/O completion
381  * @desc: pointer to io descriptor
382  * @index: page index
383  *
384  * It is important to ensure that processes don't try to take locks
385  * on non-contiguous ranges of pages as that might deadlock. This
386  * function should be called before attempting to wait on a locked
387  * nfs_page. It will complete the I/O if the page index 'index'
388  * is not contiguous with the existing list of pages in 'desc'.
389  */
nfs_pageio_cond_complete(struct nfs_pageio_descriptor * desc,pgoff_t index)390 void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
391 {
392 	if (!list_empty(&desc->pg_list)) {
393 		struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
394 		if (index != prev->wb_index + 1)
395 			nfs_pageio_complete(desc);
396 	}
397 }
398 
nfs_init_nfspagecache(void)399 int __init nfs_init_nfspagecache(void)
400 {
401 	nfs_page_cachep = kmem_cache_create("nfs_page",
402 					    sizeof(struct nfs_page),
403 					    0, SLAB_HWCACHE_ALIGN,
404 					    NULL);
405 	if (nfs_page_cachep == NULL)
406 		return -ENOMEM;
407 
408 	return 0;
409 }
410 
nfs_destroy_nfspagecache(void)411 void nfs_destroy_nfspagecache(void)
412 {
413 	kmem_cache_destroy(nfs_page_cachep);
414 }
415 
416