1 /*
2  * linux/fs/nfs/read.c
3  *
4  * Block I/O for NFS
5  *
6  * Partial copy of Linus' read cache modifications to fs/nfs/file.c
7  * modified for async RPC by okir@monad.swb.de
8  *
9  * We do an ugly hack here in order to return proper error codes to the
10  * user program when a read request failed: since generic_file_read
11  * only checks the return value of inode->i_op->readpage() which is always 0
12  * for async RPC, we set the error bit of the page to 1 when an error occurs,
13  * and make nfs_readpage transmit requests synchronously when encountering this.
14  * This is only a small problem, though, since we now retry all operations
15  * within the RPC code when root squashing is suspected.
16  */
17 
18 #include <linux/config.h>
19 #include <linux/sched.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/fcntl.h>
23 #include <linux/stat.h>
24 #include <linux/mm.h>
25 #include <linux/slab.h>
26 #include <linux/pagemap.h>
27 #include <linux/sunrpc/clnt.h>
28 #include <linux/nfs_fs.h>
29 #include <linux/nfs_page.h>
30 #include <linux/nfs_flushd.h>
31 #include <linux/smp_lock.h>
32 
33 #include <asm/system.h>
34 
35 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
36 
37 struct nfs_read_data {
38 	struct rpc_task		task;
39 	struct inode		*inode;
40 	struct rpc_cred		*cred;
41 	struct nfs_readargs	args;	/* XDR argument struct */
42 	struct nfs_readres	res;	/* ... and result struct */
43 	struct nfs_fattr	fattr;	/* fattr storage */
44 	struct list_head	pages;	/* Coalesced read requests */
45 	struct page		*pagevec[NFS_READ_MAXIOV];
46 };
47 
48 /*
49  * Local function declarations
50  */
51 static void	nfs_readpage_result(struct rpc_task *task);
52 
53 /* Hack for future NFS swap support */
54 #ifndef IS_SWAPFILE
55 # define IS_SWAPFILE(inode)	(0)
56 #endif
57 
58 static kmem_cache_t *nfs_rdata_cachep;
59 
nfs_readdata_alloc(void)60 static __inline__ struct nfs_read_data *nfs_readdata_alloc(void)
61 {
62 	struct nfs_read_data   *p;
63 	p = kmem_cache_alloc(nfs_rdata_cachep, SLAB_NOFS);
64 	if (p) {
65 		memset(p, 0, sizeof(*p));
66 		INIT_LIST_HEAD(&p->pages);
67 		p->args.pages = p->pagevec;
68 	}
69 	return p;
70 }
71 
nfs_readdata_free(struct nfs_read_data * p)72 static __inline__ void nfs_readdata_free(struct nfs_read_data *p)
73 {
74 	kmem_cache_free(nfs_rdata_cachep, p);
75 }
76 
nfs_readdata_release(struct rpc_task * task)77 static void nfs_readdata_release(struct rpc_task *task)
78 {
79         struct nfs_read_data   *data = (struct nfs_read_data *)task->tk_calldata;
80         nfs_readdata_free(data);
81 }
82 
83 /*
84  * Read a page synchronously.
85  */
86 static int
nfs_readpage_sync(struct file * file,struct inode * inode,struct page * page)87 nfs_readpage_sync(struct file *file, struct inode *inode, struct page *page)
88 {
89 	struct rpc_cred	*cred = NULL;
90 	struct nfs_fattr fattr;
91 	unsigned int	offset = 0;
92 	int		rsize = NFS_SERVER(inode)->rsize;
93 	int		result;
94 	int		count = PAGE_CACHE_SIZE;
95 	int		flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0;
96 	int		eof;
97 
98 	dprintk("NFS: nfs_readpage_sync(%p)\n", page);
99 
100 	if (file)
101 		cred = nfs_file_cred(file);
102 
103 	/*
104 	 * This works now because the socket layer never tries to DMA
105 	 * into this buffer directly.
106 	 */
107 	do {
108 		if (count < rsize)
109 			rsize = count;
110 
111 		dprintk("NFS: nfs_proc_read(%s, (%x/%Ld), %u, %u, %p)\n",
112 			NFS_SERVER(inode)->hostname,
113 			inode->i_dev, (long long)NFS_FILEID(inode),
114 			offset, rsize, page);
115 
116 		lock_kernel();
117 		result = NFS_PROTO(inode)->read(inode, cred, &fattr, flags,
118 						offset, rsize, page, &eof);
119 		nfs_refresh_inode(inode, &fattr);
120 		unlock_kernel();
121 
122 		/*
123 		 * Even if we had a partial success we can't mark the page
124 		 * cache valid.
125 		 */
126 		if (result < 0) {
127 			if (result == -EISDIR)
128 				result = -EINVAL;
129 			goto io_error;
130 		}
131 		count  -= result;
132 		offset += result;
133 		if (result < rsize)	/* NFSv2ism */
134 			break;
135 	} while (count);
136 
137 	if (count) {
138 		char *kaddr = kmap(page);
139 		memset(kaddr + offset, 0, count);
140 		kunmap(page);
141 	}
142 	flush_dcache_page(page);
143 	SetPageUptodate(page);
144 	if (PageError(page))
145 		ClearPageError(page);
146 	result = 0;
147 
148 io_error:
149 	UnlockPage(page);
150 	return result;
151 }
152 
153 /*
154  * Add a request to the inode's asynchronous read list.
155  */
156 static inline void
nfs_mark_request_read(struct nfs_page * req)157 nfs_mark_request_read(struct nfs_page *req)
158 {
159 	struct inode *inode = req->wb_inode;
160 
161 	spin_lock(&nfs_wreq_lock);
162 	nfs_list_add_request(req, &inode->u.nfs_i.read);
163 	inode->u.nfs_i.nread++;
164 	__nfs_add_lru(&NFS_SERVER(inode)->lru_read, req);
165 	spin_unlock(&nfs_wreq_lock);
166 }
167 
168 static int
nfs_readpage_async(struct file * file,struct inode * inode,struct page * page)169 nfs_readpage_async(struct file *file, struct inode *inode, struct page *page)
170 {
171 	struct nfs_page	*new;
172 
173 	new = nfs_create_request(nfs_file_cred(file), inode, page, 0, PAGE_CACHE_SIZE);
174 	if (IS_ERR(new)) {
175 		SetPageError(page);
176 		NFS_ClearPageSync(page);
177 		UnlockPage(page);
178 		return PTR_ERR(new);
179 	}
180 	nfs_mark_request_read(new);
181 
182 	if (NFS_TestClearPageSync(page) ||
183 	    inode->u.nfs_i.nread >= NFS_SERVER(inode)->rpages ||
184 	    page_index(page) == (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
185 		nfs_pagein_inode(inode, 0, 0);
186 	return 0;
187 }
188 
189 /*
190  * Set up the NFS read request struct
191  */
192 static void
nfs_read_rpcsetup(struct list_head * head,struct nfs_read_data * data)193 nfs_read_rpcsetup(struct list_head *head, struct nfs_read_data *data)
194 {
195 	struct nfs_page		*req;
196 	struct page		**pages;
197 	unsigned int		count;
198 
199 	pages = data->args.pages;
200 	count = 0;
201 	while (!list_empty(head)) {
202 		struct nfs_page *req = nfs_list_entry(head->next);
203 		nfs_list_remove_request(req);
204 		nfs_list_add_request(req, &data->pages);
205 		*pages++ = req->wb_page;
206 		count += req->wb_bytes;
207 	}
208 	req = nfs_list_entry(data->pages.next);
209 	data->inode	  = req->wb_inode;
210 	data->cred	  = req->wb_cred;
211 	data->args.fh     = NFS_FH(req->wb_inode);
212 	data->args.offset = page_offset(req->wb_page) + req->wb_offset;
213 	data->args.pgbase = req->wb_offset;
214 	data->args.count  = count;
215 	data->res.fattr   = &data->fattr;
216 	data->res.count   = count;
217 	data->res.eof     = 0;
218 }
219 
220 static void
nfs_async_read_error(struct list_head * head)221 nfs_async_read_error(struct list_head *head)
222 {
223 	struct nfs_page	*req;
224 	struct page	*page;
225 
226 	while (!list_empty(head)) {
227 		req = nfs_list_entry(head->next);
228 		page = req->wb_page;
229 		nfs_list_remove_request(req);
230 		NFS_ClearPageSync(page);
231 		SetPageError(page);
232 		UnlockPage(page);
233 		nfs_clear_request(req);
234 		nfs_release_request(req);
235 		nfs_unlock_request(req);
236 	}
237 }
238 
239 static int
nfs_pagein_one(struct list_head * head,struct inode * inode)240 nfs_pagein_one(struct list_head *head, struct inode *inode)
241 {
242 	struct rpc_task		*task;
243 	struct rpc_clnt		*clnt = NFS_CLIENT(inode);
244 	struct nfs_read_data	*data;
245 	struct rpc_message	msg;
246 	int			flags;
247 	sigset_t		oldset;
248 
249 	data = nfs_readdata_alloc();
250 	if (!data)
251 		goto out_bad;
252 	task = &data->task;
253 
254 	/* N.B. Do we need to test? Never called for swapfile inode */
255 	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
256 
257 	nfs_read_rpcsetup(head, data);
258 
259 	/* Finalize the task. */
260 	rpc_init_task(task, clnt, nfs_readpage_result, flags);
261 	task->tk_calldata = data;
262 	/* Release requests */
263 	task->tk_release = nfs_readdata_release;
264 
265 #ifdef CONFIG_NFS_V3
266 	msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_READ : NFSPROC_READ;
267 #else
268 	msg.rpc_proc = NFSPROC_READ;
269 #endif
270 	msg.rpc_argp = &data->args;
271 	msg.rpc_resp = &data->res;
272 	msg.rpc_cred = data->cred;
273 
274 	/* Start the async call */
275 	dprintk("NFS: %4d initiated read call (req %x/%Ld count %u.\n",
276 		task->tk_pid,
277 		inode->i_dev, (long long)NFS_FILEID(inode),
278 		data->args.count);
279 
280 	rpc_clnt_sigmask(clnt, &oldset);
281 	rpc_call_setup(task, &msg, 0);
282 	lock_kernel();
283 	rpc_execute(task);
284 	unlock_kernel();
285 	rpc_clnt_sigunmask(clnt, &oldset);
286 	return 0;
287 out_bad:
288 	nfs_async_read_error(head);
289 	return -ENOMEM;
290 }
291 
292 int
nfs_pagein_list(struct list_head * head,int rpages)293 nfs_pagein_list(struct list_head *head, int rpages)
294 {
295 	LIST_HEAD(one_request);
296 	struct nfs_page		*req;
297 	int			error = 0;
298 	unsigned int		pages = 0;
299 
300 	while (!list_empty(head)) {
301 		pages += nfs_coalesce_requests(head, &one_request, rpages);
302 		req = nfs_list_entry(one_request.next);
303 		error = nfs_pagein_one(&one_request, req->wb_inode);
304 		if (error < 0)
305 			break;
306 	}
307 	if (error >= 0)
308 		return pages;
309 
310 	nfs_async_read_error(head);
311 	return error;
312 }
313 
314 /**
315  * nfs_scan_lru_read_timeout - Scan LRU list for timed out read requests
316  * @server: NFS superblock data
317  * @dst: destination list
318  *
319  * Moves a maximum of 'rpages' timed out requests from the NFS read LRU list.
320  * The elements are checked to ensure that they form a contiguous set
321  * of pages, and that they originated from the same file.
322  */
323 int
nfs_scan_lru_read_timeout(struct nfs_server * server,struct list_head * dst)324 nfs_scan_lru_read_timeout(struct nfs_server *server, struct list_head *dst)
325 {
326 	struct inode *inode;
327 	int npages;
328 
329 	npages = nfs_scan_lru_timeout(&server->lru_read, dst, server->rpages);
330 	if (npages) {
331 		inode = nfs_list_entry(dst->next)->wb_inode;
332 		inode->u.nfs_i.nread -= npages;
333 	}
334 	return npages;
335 }
336 
337 /**
338  * nfs_scan_lru_read - Scan LRU list for read requests
339  * @server: NFS superblock data
340  * @dst: destination list
341  *
342  * Moves a maximum of 'rpages' requests from the NFS read LRU list.
343  * The elements are checked to ensure that they form a contiguous set
344  * of pages, and that they originated from the same file.
345  */
346 int
nfs_scan_lru_read(struct nfs_server * server,struct list_head * dst)347 nfs_scan_lru_read(struct nfs_server *server, struct list_head *dst)
348 {
349 	struct inode *inode;
350 	int npages;
351 
352 	npages = nfs_scan_lru(&server->lru_read, dst, server->rpages);
353 	if (npages) {
354 		inode = nfs_list_entry(dst->next)->wb_inode;
355 		inode->u.nfs_i.nread -= npages;
356 	}
357 	return npages;
358 }
359 
360 /*
361  * nfs_scan_read - Scan an inode for read requests
362  * @inode: NFS inode to scan
363  * @dst: destination list
364  * @idx_start: lower bound of page->index to scan
365  * @npages: idx_start + npages sets the upper bound to scan
366  *
367  * Moves requests from the inode's read list.
368  * The requests are *not* checked to ensure that they form a contiguous set.
369  */
370 static int
nfs_scan_read(struct inode * inode,struct list_head * dst,unsigned long idx_start,unsigned int npages)371 nfs_scan_read(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
372 {
373 	int	res;
374 	res = nfs_scan_list(&inode->u.nfs_i.read, dst, idx_start, npages);
375 	inode->u.nfs_i.nread -= res;
376 	if ((inode->u.nfs_i.nread == 0) != list_empty(&inode->u.nfs_i.read))
377 		printk(KERN_ERR "NFS: desynchronized value of nfs_i.nread.\n");
378 	return res;
379 }
380 
nfs_pagein_inode(struct inode * inode,unsigned long idx_start,unsigned int npages)381 int nfs_pagein_inode(struct inode *inode, unsigned long idx_start,
382 		     unsigned int npages)
383 {
384 	LIST_HEAD(head);
385 	int	res,
386 		error = 0;
387 
388 	spin_lock(&nfs_wreq_lock);
389 	res = nfs_scan_read(inode, &head, idx_start, npages);
390 	spin_unlock(&nfs_wreq_lock);
391 	if (res)
392 		error = nfs_pagein_list(&head, NFS_SERVER(inode)->rpages);
393 	if (error < 0)
394 		return error;
395 	return res;
396 }
397 
398 /*
399  * This is the callback from RPC telling us whether a reply was
400  * received or some error occurred (timeout or socket shutdown).
401  */
402 static void
nfs_readpage_result(struct rpc_task * task)403 nfs_readpage_result(struct rpc_task *task)
404 {
405 	struct nfs_read_data	*data = (struct nfs_read_data *) task->tk_calldata;
406 	struct inode		*inode = data->inode;
407 	unsigned int		count = data->res.count;
408 
409 	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
410 		task->tk_pid, task->tk_status);
411 
412 	if (nfs_async_handle_jukebox(task))
413 		return;
414 
415 	nfs_refresh_inode(inode, &data->fattr);
416 	while (!list_empty(&data->pages)) {
417 		struct nfs_page *req = nfs_list_entry(data->pages.next);
418 		struct page *page = req->wb_page;
419 		nfs_list_remove_request(req);
420 
421 		if (task->tk_status >= 0) {
422 			if (count < PAGE_CACHE_SIZE) {
423 				char *p = kmap(page);
424 				memset(p + count, 0, PAGE_CACHE_SIZE - count);
425 				kunmap(page);
426 				count = 0;
427 			} else
428 				count -= PAGE_CACHE_SIZE;
429 			SetPageUptodate(page);
430 		} else
431 			SetPageError(page);
432 		flush_dcache_page(page);
433 		NFS_ClearPageSync(page);
434 		UnlockPage(page);
435 
436 		dprintk("NFS: read (%x/%Ld %d@%Ld)\n",
437                         req->wb_inode->i_dev,
438                         (long long)NFS_FILEID(req->wb_inode),
439                         req->wb_bytes,
440                         (long long)(page_offset(page) + req->wb_offset));
441 		nfs_clear_request(req);
442 		nfs_release_request(req);
443 		nfs_unlock_request(req);
444 	}
445 }
446 
447 /*
448  * Read a page over NFS.
449  * We read the page synchronously in the following cases:
450  *  -	The NFS rsize is smaller than PAGE_CACHE_SIZE. We could kludge our way
451  *	around this by creating several consecutive read requests, but
452  *	that's hardly worth it.
453  *  -	The error flag is set for this page. This happens only when a
454  *	previous async read operation failed.
455  */
456 int
nfs_readpage(struct file * file,struct page * page)457 nfs_readpage(struct file *file, struct page *page)
458 {
459 	struct inode *inode = page->mapping->host;
460 	int		error;
461 
462 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
463 		page, PAGE_CACHE_SIZE, page->index);
464 	/*
465 	 * Try to flush any pending writes to the file..
466 	 *
467 	 * NOTE! Because we own the page lock, there cannot
468 	 * be any new pending writes generated at this point
469 	 * for this page (other pages can be written to).
470 	 */
471 	error = nfs_wb_page(inode, page);
472 	if (error)
473 		goto out_error;
474 
475 	if (!PageError(page) && NFS_SERVER(inode)->rsize >= PAGE_CACHE_SIZE) {
476 		error = nfs_readpage_async(file, inode, page);
477 		goto out;
478 	}
479 
480 	error = nfs_readpage_sync(file, inode, page);
481 	if (error < 0 && IS_SWAPFILE(inode))
482 		printk("Aiee.. nfs swap-in of page failed!\n");
483 out:
484 	return error;
485 
486 out_error:
487 	NFS_ClearPageSync(page);
488 	UnlockPage(page);
489 	goto out;
490 }
491 
nfs_init_readpagecache(void)492 int nfs_init_readpagecache(void)
493 {
494 	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
495 					     sizeof(struct nfs_read_data),
496 					     0, SLAB_HWCACHE_ALIGN,
497 					     NULL, NULL);
498 	if (nfs_rdata_cachep == NULL)
499 		return -ENOMEM;
500 
501 	return 0;
502 }
503 
nfs_destroy_readpagecache(void)504 void nfs_destroy_readpagecache(void)
505 {
506 	if (kmem_cache_destroy(nfs_rdata_cachep))
507 		printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
508 }
509