1 /*
2 * linux/fs/nfs/write.c
3 *
4 * Writing file data over NFS.
5 *
6 * We do it like this: When a (user) process wishes to write data to an
7 * NFS file, a write request is allocated that contains the RPC task data
8 * plus some info on the page to be written, and added to the inode's
9 * write chain. If the process writes past the end of the page, an async
10 * RPC call to write the page is scheduled immediately; otherwise, the call
11 * is delayed for a few seconds.
12 *
13 * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
14 *
15 * Write requests are kept on the inode's writeback list. Each entry in
16 * that list references the page (portion) to be written. When the
17 * cache timeout has expired, the RPC task is woken up, and tries to
18 * lock the page. As soon as it manages to do so, the request is moved
19 * from the writeback list to the writelock list.
20 *
21 * Note: we must make sure never to confuse the inode passed in the
22 * write_page request with the one in page->inode. As far as I understand
23 * it, these are different when doing a swap-out.
24 *
25 * To understand everything that goes on here and in the NFS read code,
26 * one should be aware that a page is locked in exactly one of the following
27 * cases:
28 *
29 * - A write request is in progress.
30 * - A user process is in generic_file_write/nfs_update_page
31 * - A user process is in generic_file_read
32 *
33 * Also note that because of the way pages are invalidated in
34 * nfs_revalidate_inode, the following assertions hold:
35 *
36 * - If a page is dirty, there will be no read requests (a page will
37 * not be re-read unless invalidated by nfs_revalidate_inode).
38 * - If the page is not uptodate, there will be no pending write
39 * requests, and no process will be in nfs_update_page.
40 *
41 * FIXME: Interaction with the vmscan routines is not optimal yet.
42 * Either vmscan must be made nfs-savvy, or we need a different page
43 * reclaim concept that supports something like FS-independent
44 * buffer_heads with a b_ops-> field.
45 *
46 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
47 */
48
49 #include <linux/config.h>
50 #include <linux/types.h>
51 #include <linux/slab.h>
52 #include <linux/swap.h>
53 #include <linux/pagemap.h>
54 #include <linux/file.h>
55
56 #include <linux/sunrpc/clnt.h>
57 #include <linux/nfs_fs.h>
58 #include <linux/nfs_mount.h>
59 #include <linux/nfs_flushd.h>
60 #include <linux/nfs_page.h>
61 #include <asm/uaccess.h>
62 #include <linux/smp_lock.h>
63
64 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
65
66 /*
67 * Local structures
68 *
69 * This is the struct where the WRITE/COMMIT arguments go.
70 */
71 struct nfs_write_data {
72 struct rpc_task task;
73 struct inode *inode;
74 struct rpc_cred *cred;
75 struct nfs_writeargs args; /* argument struct */
76 struct nfs_writeres res; /* result struct */
77 struct nfs_fattr fattr;
78 struct nfs_writeverf verf;
79 struct list_head pages; /* Coalesced requests we wish to flush */
80 struct page *pagevec[NFS_WRITE_MAXIOV];
81 };
82
83 /*
84 * Local function declarations
85 */
86 static struct nfs_page * nfs_update_request(struct file*, struct inode *,
87 struct page *,
88 unsigned int, unsigned int);
89 static void nfs_strategy(struct inode *inode);
90 static void nfs_writeback_done(struct rpc_task *);
91 #ifdef CONFIG_NFS_V3
92 static void nfs_commit_done(struct rpc_task *);
93 #endif
94
95 /* Hack for future NFS swap support */
96 #ifndef IS_SWAPFILE
97 # define IS_SWAPFILE(inode) (0)
98 #endif
99
100 static kmem_cache_t *nfs_wdata_cachep;
101
nfs_writedata_alloc(void)102 static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
103 {
104 struct nfs_write_data *p;
105 p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NOFS);
106 if (p) {
107 memset(p, 0, sizeof(*p));
108 INIT_LIST_HEAD(&p->pages);
109 p->args.pages = p->pagevec;
110 }
111 return p;
112 }
113
nfs_writedata_free(struct nfs_write_data * p)114 static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
115 {
116 kmem_cache_free(nfs_wdata_cachep, p);
117 }
118
nfs_writedata_release(struct rpc_task * task)119 static void nfs_writedata_release(struct rpc_task *task)
120 {
121 struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata;
122 nfs_writedata_free(wdata);
123 }
124
125 /*
126 * Write a page synchronously.
127 * Offset is the data offset within the page.
128 */
129 static int
nfs_writepage_sync(struct file * file,struct inode * inode,struct page * page,unsigned int offset,unsigned int count)130 nfs_writepage_sync(struct file *file, struct inode *inode, struct page *page,
131 unsigned int offset, unsigned int count)
132 {
133 struct rpc_cred *cred = NULL;
134 loff_t base;
135 unsigned int wsize = NFS_SERVER(inode)->wsize;
136 int result, refresh = 0, written = 0, flags;
137 u8 *buffer;
138 struct nfs_fattr fattr;
139 struct nfs_writeverf verf;
140
141
142 if (file)
143 cred = get_rpccred(nfs_file_cred(file));
144 if (!cred)
145 cred = get_rpccred(NFS_I(inode)->mm_cred);
146
147 dprintk("NFS: nfs_writepage_sync(%x/%Ld %d@%Ld)\n",
148 inode->i_dev, (long long)NFS_FILEID(inode),
149 count, (long long)(page_offset(page) + offset));
150
151 base = page_offset(page) + offset;
152
153 flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC;
154
155 do {
156 if (count < wsize && !IS_SWAPFILE(inode))
157 wsize = count;
158
159 result = NFS_PROTO(inode)->write(inode, cred, &fattr, flags,
160 offset, wsize, page, &verf);
161 nfs_write_attributes(inode, &fattr);
162
163 if (result < 0) {
164 /* Must mark the page invalid after I/O error */
165 ClearPageUptodate(page);
166 goto io_error;
167 }
168 if (result != wsize)
169 printk("NFS: short write, wsize=%u, result=%d\n",
170 wsize, result);
171 refresh = 1;
172 buffer += wsize;
173 base += wsize;
174 offset += wsize;
175 written += wsize;
176 count -= wsize;
177 /*
178 * If we've extended the file, update the inode
179 * now so we don't invalidate the cache.
180 */
181 if (base > inode->i_size)
182 inode->i_size = base;
183 } while (count);
184
185 if (PageError(page))
186 ClearPageError(page);
187
188 io_error:
189 if (cred)
190 put_rpccred(cred);
191
192 return written? written : result;
193 }
194
195 static int
nfs_writepage_async(struct file * file,struct inode * inode,struct page * page,unsigned int offset,unsigned int count)196 nfs_writepage_async(struct file *file, struct inode *inode, struct page *page,
197 unsigned int offset, unsigned int count)
198 {
199 struct nfs_page *req;
200 loff_t end;
201 int status;
202
203 req = nfs_update_request(file, inode, page, offset, count);
204 status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
205 if (status < 0)
206 goto out;
207 if (!req->wb_cred)
208 req->wb_cred = get_rpccred(NFS_I(inode)->mm_cred);
209 nfs_unlock_request(req);
210 nfs_strategy(inode);
211 end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
212 if (inode->i_size < end)
213 inode->i_size = end;
214
215 out:
216 return status;
217 }
218
219 /*
220 * Write an mmapped page to the server.
221 */
222 int
nfs_writepage(struct page * page)223 nfs_writepage(struct page *page)
224 {
225 struct inode *inode = page->mapping->host;
226 unsigned long end_index;
227 unsigned offset = PAGE_CACHE_SIZE;
228 int inode_referenced = 0;
229 int err;
230
231 /*
232 * Note: We need to ensure that we have a reference to the inode
233 * if we are to do asynchronous writes. If not, waiting
234 * in nfs_wait_on_request() may deadlock with clear_inode().
235 *
236 * If igrab() fails here, then it is in any case safe to
237 * call nfs_wb_page(), since there will be no pending writes.
238 */
239 if (igrab(inode) != 0)
240 inode_referenced = 1;
241 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
242
243 /* Ensure we've flushed out any previous writes */
244 nfs_wb_page(inode,page);
245
246 /* easy case */
247 if (page->index < end_index)
248 goto do_it;
249 /* things got complicated... */
250 offset = inode->i_size & (PAGE_CACHE_SIZE-1);
251
252 /* OK, are we completely out? */
253 err = -EIO;
254 if (page->index >= end_index+1 || !offset)
255 goto out;
256 do_it:
257 lock_kernel();
258 if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode) &&
259 inode_referenced) {
260 err = nfs_writepage_async(NULL, inode, page, 0, offset);
261 if (err >= 0)
262 err = 0;
263 } else {
264 err = nfs_writepage_sync(NULL, inode, page, 0, offset);
265 if (err == offset)
266 err = 0;
267 }
268 unlock_kernel();
269 out:
270 UnlockPage(page);
271 if (inode_referenced)
272 iput(inode);
273 return err;
274 }
275
276 /*
277 * Check whether the file range we want to write to is locked by
278 * us.
279 */
280 static int
region_locked(struct inode * inode,struct nfs_page * req)281 region_locked(struct inode *inode, struct nfs_page *req)
282 {
283 struct file_lock *fl;
284 loff_t rqstart, rqend;
285
286 /* Don't optimize writes if we don't use NLM */
287 if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
288 return 0;
289
290 rqstart = page_offset(req->wb_page) + req->wb_offset;
291 rqend = rqstart + req->wb_bytes;
292 for (fl = inode->i_flock; fl; fl = fl->fl_next) {
293 if (fl->fl_owner == current->files && (fl->fl_flags & FL_POSIX)
294 && fl->fl_type == F_WRLCK
295 && fl->fl_start <= rqstart && rqend <= fl->fl_end) {
296 return 1;
297 }
298 }
299
300 return 0;
301 }
302
303 /*
304 * Insert a write request into an inode
305 * Note: we sort the list in order to be able to optimize nfs_find_request()
306 * & co. for the 'write append' case. For 2.5 we may want to consider
307 * some form of hashing so as to perform well on random writes.
308 */
309 static inline void
nfs_inode_add_request(struct inode * inode,struct nfs_page * req)310 nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
311 {
312 struct list_head *pos, *head;
313 unsigned long pg_idx = page_index(req->wb_page);
314
315 if (!list_empty(&req->wb_hash))
316 return;
317 if (!NFS_WBACK_BUSY(req))
318 printk(KERN_ERR "NFS: unlocked request attempted hashed!\n");
319 head = &inode->u.nfs_i.writeback;
320 if (list_empty(head))
321 igrab(inode);
322 list_for_each_prev(pos, head) {
323 struct nfs_page *entry = nfs_inode_wb_entry(pos);
324 if (page_index(entry->wb_page) < pg_idx)
325 break;
326 }
327 inode->u.nfs_i.npages++;
328 list_add(&req->wb_hash, pos);
329 req->wb_count++;
330 }
331
332 /*
333 * Insert a write request into an inode
334 */
335 static inline void
nfs_inode_remove_request(struct nfs_page * req)336 nfs_inode_remove_request(struct nfs_page *req)
337 {
338 struct inode *inode;
339 spin_lock(&nfs_wreq_lock);
340 if (list_empty(&req->wb_hash)) {
341 spin_unlock(&nfs_wreq_lock);
342 return;
343 }
344 if (!NFS_WBACK_BUSY(req))
345 printk(KERN_ERR "NFS: unlocked request attempted unhashed!\n");
346 inode = req->wb_inode;
347 list_del(&req->wb_hash);
348 INIT_LIST_HEAD(&req->wb_hash);
349 inode->u.nfs_i.npages--;
350 if ((inode->u.nfs_i.npages == 0) != list_empty(&inode->u.nfs_i.writeback))
351 printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n");
352 if (list_empty(&inode->u.nfs_i.writeback)) {
353 spin_unlock(&nfs_wreq_lock);
354 iput(inode);
355 } else
356 spin_unlock(&nfs_wreq_lock);
357 nfs_clear_request(req);
358 nfs_release_request(req);
359 }
360
361 /*
362 * Find a request
363 */
364 static inline struct nfs_page *
_nfs_find_request(struct inode * inode,struct page * page)365 _nfs_find_request(struct inode *inode, struct page *page)
366 {
367 struct list_head *head, *pos;
368 unsigned long pg_idx = page_index(page);
369
370 head = &inode->u.nfs_i.writeback;
371 list_for_each_prev(pos, head) {
372 struct nfs_page *req = nfs_inode_wb_entry(pos);
373 unsigned long found_idx = page_index(req->wb_page);
374
375 if (pg_idx < found_idx)
376 continue;
377 if (pg_idx != found_idx)
378 break;
379 req->wb_count++;
380 return req;
381 }
382 return NULL;
383 }
384
385 static struct nfs_page *
nfs_find_request(struct inode * inode,struct page * page)386 nfs_find_request(struct inode *inode, struct page *page)
387 {
388 struct nfs_page *req;
389
390 spin_lock(&nfs_wreq_lock);
391 req = _nfs_find_request(inode, page);
392 spin_unlock(&nfs_wreq_lock);
393 return req;
394 }
395
396 /*
397 * Add a request to the inode's dirty list.
398 */
399 static inline void
nfs_mark_request_dirty(struct nfs_page * req)400 nfs_mark_request_dirty(struct nfs_page *req)
401 {
402 struct inode *inode = req->wb_inode;
403
404 spin_lock(&nfs_wreq_lock);
405 nfs_list_add_request(req, &inode->u.nfs_i.dirty);
406 inode->u.nfs_i.ndirty++;
407 __nfs_del_lru(req);
408 __nfs_add_lru(&NFS_SERVER(inode)->lru_dirty, req);
409 spin_unlock(&nfs_wreq_lock);
410 mark_inode_dirty(inode);
411 }
412
413 /*
414 * Check if a request is dirty
415 */
416 static inline int
nfs_dirty_request(struct nfs_page * req)417 nfs_dirty_request(struct nfs_page *req)
418 {
419 struct inode *inode = req->wb_inode;
420 return !list_empty(&req->wb_list) && req->wb_list_head == &inode->u.nfs_i.dirty;
421 }
422
423 #ifdef CONFIG_NFS_V3
424 /*
425 * Add a request to the inode's commit list.
426 */
427 static inline void
nfs_mark_request_commit(struct nfs_page * req)428 nfs_mark_request_commit(struct nfs_page *req)
429 {
430 struct inode *inode = req->wb_inode;
431
432 spin_lock(&nfs_wreq_lock);
433 nfs_list_add_request(req, &inode->u.nfs_i.commit);
434 inode->u.nfs_i.ncommit++;
435 __nfs_del_lru(req);
436 __nfs_add_lru(&NFS_SERVER(inode)->lru_commit, req);
437 spin_unlock(&nfs_wreq_lock);
438 mark_inode_dirty(inode);
439 }
440 #endif
441
442 /*
443 * Wait for a request to complete.
444 *
445 * Interruptible by signals only if mounted with intr flag.
446 */
447 static int
nfs_wait_on_requests(struct inode * inode,unsigned long idx_start,unsigned int npages)448 nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
449 {
450 struct list_head *p, *head;
451 unsigned long idx_end;
452 unsigned int res = 0;
453 int error;
454
455 if (npages == 0)
456 idx_end = ~0;
457 else
458 idx_end = idx_start + npages - 1;
459
460 head = &inode->u.nfs_i.writeback;
461 restart:
462 spin_lock(&nfs_wreq_lock);
463 list_for_each_prev(p, head) {
464 unsigned long pg_idx;
465 struct nfs_page *req = nfs_inode_wb_entry(p);
466
467 pg_idx = page_index(req->wb_page);
468 if (pg_idx < idx_start)
469 break;
470 if (pg_idx > idx_end)
471 continue;
472
473 if (!NFS_WBACK_BUSY(req))
474 continue;
475 req->wb_count++;
476 spin_unlock(&nfs_wreq_lock);
477 error = nfs_wait_on_request(req);
478 nfs_release_request(req);
479 if (error < 0)
480 return error;
481 res++;
482 goto restart;
483 }
484 spin_unlock(&nfs_wreq_lock);
485 return res;
486 }
487
488 /**
489 * nfs_scan_lru_dirty_timeout - Scan LRU list for timed out dirty requests
490 * @server: NFS superblock data
491 * @dst: destination list
492 *
493 * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list.
494 * The elements are checked to ensure that they form a contiguous set
495 * of pages, and that they originated from the same file.
496 */
497 int
nfs_scan_lru_dirty_timeout(struct nfs_server * server,struct list_head * dst)498 nfs_scan_lru_dirty_timeout(struct nfs_server *server, struct list_head *dst)
499 {
500 struct inode *inode;
501 int npages;
502
503 npages = nfs_scan_lru_timeout(&server->lru_dirty, dst, server->wpages);
504 if (npages) {
505 inode = nfs_list_entry(dst->next)->wb_inode;
506 inode->u.nfs_i.ndirty -= npages;
507 }
508 return npages;
509 }
510
511 /**
512 * nfs_scan_lru_dirty - Scan LRU list for dirty requests
513 * @server: NFS superblock data
514 * @dst: destination list
515 *
516 * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list.
517 * The elements are checked to ensure that they form a contiguous set
518 * of pages, and that they originated from the same file.
519 */
520 int
nfs_scan_lru_dirty(struct nfs_server * server,struct list_head * dst)521 nfs_scan_lru_dirty(struct nfs_server *server, struct list_head *dst)
522 {
523 struct inode *inode;
524 int npages;
525
526 npages = nfs_scan_lru(&server->lru_dirty, dst, server->wpages);
527 if (npages) {
528 inode = nfs_list_entry(dst->next)->wb_inode;
529 inode->u.nfs_i.ndirty -= npages;
530 }
531 return npages;
532 }
533
534 /*
535 * nfs_scan_dirty - Scan an inode for dirty requests
536 * @inode: NFS inode to scan
537 * @dst: destination list
538 * @idx_start: lower bound of page->index to scan.
539 * @npages: idx_start + npages sets the upper bound to scan.
540 *
541 * Moves requests from the inode's dirty page list.
542 * The requests are *not* checked to ensure that they form a contiguous set.
543 */
544 static int
nfs_scan_dirty(struct inode * inode,struct list_head * dst,unsigned long idx_start,unsigned int npages)545 nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
546 {
547 int res;
548 res = nfs_scan_list(&inode->u.nfs_i.dirty, dst, idx_start, npages);
549 inode->u.nfs_i.ndirty -= res;
550 if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty))
551 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
552 return res;
553 }
554
555 #ifdef CONFIG_NFS_V3
556 /**
557 * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests
558 * @server: NFS superblock data
559 * @dst: destination list
560 *
561 * Finds the first a timed out request in the NFS commit LRU list and moves it
562 * to the list dst. If such an element is found, we move all other commit
563 * requests that apply to the same inode.
564 * The assumption is that doing everything in a single commit-to-disk is
565 * the cheaper alternative.
566 */
567 int
nfs_scan_lru_commit_timeout(struct nfs_server * server,struct list_head * dst)568 nfs_scan_lru_commit_timeout(struct nfs_server *server, struct list_head *dst)
569 {
570 struct inode *inode;
571 int npages;
572
573 npages = nfs_scan_lru_timeout(&server->lru_commit, dst, 1);
574 if (npages) {
575 inode = nfs_list_entry(dst->next)->wb_inode;
576 npages += nfs_scan_list(&inode->u.nfs_i.commit, dst, 0, 0);
577 inode->u.nfs_i.ncommit -= npages;
578 }
579 return npages;
580 }
581
582
583 /**
584 * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests
585 * @server: NFS superblock data
586 * @dst: destination list
587 *
588 * Finds the first request in the NFS commit LRU list and moves it
589 * to the list dst. If such an element is found, we move all other commit
590 * requests that apply to the same inode.
591 * The assumption is that doing everything in a single commit-to-disk is
592 * the cheaper alternative.
593 */
594 int
nfs_scan_lru_commit(struct nfs_server * server,struct list_head * dst)595 nfs_scan_lru_commit(struct nfs_server *server, struct list_head *dst)
596 {
597 struct inode *inode;
598 int npages;
599
600 npages = nfs_scan_lru(&server->lru_commit, dst, 1);
601 if (npages) {
602 inode = nfs_list_entry(dst->next)->wb_inode;
603 npages += nfs_scan_list(&inode->u.nfs_i.commit, dst, 0, 0);
604 inode->u.nfs_i.ncommit -= npages;
605 }
606 return npages;
607 }
608
609 /*
610 * nfs_scan_commit - Scan an inode for commit requests
611 * @inode: NFS inode to scan
612 * @dst: destination list
613 * @idx_start: lower bound of page->index to scan.
614 * @npages: idx_start + npages sets the upper bound to scan.
615 *
616 * Moves requests from the inode's 'commit' request list.
617 * The requests are *not* checked to ensure that they form a contiguous set.
618 */
619 static int
nfs_scan_commit(struct inode * inode,struct list_head * dst,unsigned long idx_start,unsigned int npages)620 nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
621 {
622 int res;
623 res = nfs_scan_list(&inode->u.nfs_i.commit, dst, idx_start, npages);
624 inode->u.nfs_i.ncommit -= res;
625 if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit))
626 printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
627 return res;
628 }
629 #endif
630
631
632 /*
633 * Try to update any existing write request, or create one if there is none.
634 * In order to match, the request's credentials must match those of
635 * the calling process.
636 *
637 * Note: Should always be called with the Page Lock held!
638 */
639 static struct nfs_page *
nfs_update_request(struct file * file,struct inode * inode,struct page * page,unsigned int offset,unsigned int bytes)640 nfs_update_request(struct file* file, struct inode *inode, struct page *page,
641 unsigned int offset, unsigned int bytes)
642 {
643 struct nfs_page *req, *new = NULL;
644 unsigned long rqend, end;
645
646 end = offset + bytes;
647
648 for (;;) {
649 /* Loop over all inode entries and see if we find
650 * A request for the page we wish to update
651 */
652 spin_lock(&nfs_wreq_lock);
653 req = _nfs_find_request(inode, page);
654 if (req) {
655 if (!nfs_lock_request_dontget(req)) {
656 int error;
657 spin_unlock(&nfs_wreq_lock);
658 error = nfs_wait_on_request(req);
659 nfs_release_request(req);
660 if (error < 0)
661 return ERR_PTR(error);
662 continue;
663 }
664 spin_unlock(&nfs_wreq_lock);
665 if (new)
666 nfs_release_request(new);
667 break;
668 }
669
670 if (new) {
671 nfs_lock_request_dontget(new);
672 nfs_inode_add_request(inode, new);
673 spin_unlock(&nfs_wreq_lock);
674 nfs_mark_request_dirty(new);
675 return new;
676 }
677 spin_unlock(&nfs_wreq_lock);
678
679 new = nfs_create_request(nfs_file_cred(file), inode, page, offset, bytes);
680 if (IS_ERR(new))
681 return new;
682 if (file) {
683 new->wb_file = file;
684 get_file(file);
685 }
686 /* If the region is locked, adjust the timeout */
687 if (region_locked(inode, new))
688 new->wb_timeout = jiffies + NFS_WRITEBACK_LOCKDELAY;
689 else
690 new->wb_timeout = jiffies + NFS_WRITEBACK_DELAY;
691 }
692
693 /* We have a request for our page.
694 * If the creds don't match, or the
695 * page addresses don't match,
696 * tell the caller to wait on the conflicting
697 * request.
698 */
699 rqend = req->wb_offset + req->wb_bytes;
700 if (req->wb_file != file
701 || req->wb_page != page
702 || !nfs_dirty_request(req)
703 || offset > rqend || end < req->wb_offset) {
704 nfs_unlock_request(req);
705 return ERR_PTR(-EBUSY);
706 }
707
708 /* Okay, the request matches. Update the region */
709 if (offset < req->wb_offset) {
710 req->wb_offset = offset;
711 req->wb_bytes = rqend - req->wb_offset;
712 }
713
714 if (end > rqend)
715 req->wb_bytes = end - req->wb_offset;
716
717 return req;
718 }
719
720 /*
721 * This is the strategy routine for NFS.
722 * It is called by nfs_updatepage whenever the user wrote up to the end
723 * of a page.
724 *
725 * We always try to submit a set of requests in parallel so that the
726 * server's write code can gather writes. This is mainly for the benefit
727 * of NFSv2.
728 *
729 * We never submit more requests than we think the remote can handle.
730 * For UDP sockets, we make sure we don't exceed the congestion window;
731 * for TCP, we limit the number of requests to 8.
732 *
733 * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that
734 * should be sent out in one go. This is for the benefit of NFSv2 servers
735 * that perform write gathering.
736 *
737 * FIXME: Different servers may have different sweet spots.
738 * Record the average congestion window in server struct?
739 */
740 #define NFS_STRATEGY_PAGES 8
741 static void
nfs_strategy(struct inode * inode)742 nfs_strategy(struct inode *inode)
743 {
744 unsigned int dirty, wpages;
745
746 dirty = inode->u.nfs_i.ndirty;
747 wpages = NFS_SERVER(inode)->wpages;
748 #ifdef CONFIG_NFS_V3
749 if (NFS_PROTO(inode)->version == 2) {
750 if (dirty >= NFS_STRATEGY_PAGES * wpages)
751 nfs_flush_file(inode, 0, 0, 0);
752 } else if (dirty >= wpages)
753 nfs_flush_file(inode, 0, 0, 0);
754 #else
755 if (dirty >= NFS_STRATEGY_PAGES * wpages)
756 nfs_flush_file(inode, 0, 0, 0);
757 #endif
758 }
759
760 int
nfs_flush_incompatible(struct file * file,struct page * page)761 nfs_flush_incompatible(struct file *file, struct page *page)
762 {
763 struct rpc_cred *cred = nfs_file_cred(file);
764 struct inode *inode = page->mapping->host;
765 struct nfs_page *req;
766 int status = 0;
767 /*
768 * Look for a request corresponding to this page. If there
769 * is one, and it belongs to another file, we flush it out
770 * before we try to copy anything into the page. Do this
771 * due to the lack of an ACCESS-type call in NFSv2.
772 * Also do the same if we find a request from an existing
773 * dropped page.
774 */
775 req = nfs_find_request(inode,page);
776 if (req) {
777 if (req->wb_file != file || req->wb_cred != cred || req->wb_page != page)
778 status = nfs_wb_page(inode, page);
779 nfs_release_request(req);
780 }
781 return (status < 0) ? status : 0;
782 }
783
784 /*
785 * Update and possibly write a cached page of an NFS file.
786 *
787 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
788 * things with a page scheduled for an RPC call (e.g. invalidate it).
789 */
790 int
nfs_updatepage(struct file * file,struct page * page,unsigned int offset,unsigned int count)791 nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count)
792 {
793 struct dentry *dentry = file->f_dentry;
794 struct inode *inode = page->mapping->host;
795 struct nfs_page *req;
796 loff_t end;
797 int status = 0;
798
799 dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
800 dentry->d_parent->d_name.name, dentry->d_name.name,
801 count, (long long)(page_offset(page) +offset));
802
803 /*
804 * If wsize is smaller than page size, update and write
805 * page synchronously.
806 */
807 if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE || IS_SYNC(inode)) {
808 status = nfs_writepage_sync(file, inode, page, offset, count);
809 if (status > 0) {
810 if (offset == 0 && status == PAGE_CACHE_SIZE)
811 SetPageUptodate(page);
812 return 0;
813 }
814 return status;
815 }
816
817 /*
818 * Try to find an NFS request corresponding to this page
819 * and update it.
820 * If the existing request cannot be updated, we must flush
821 * it out now.
822 */
823 do {
824 req = nfs_update_request(file, inode, page, offset, count);
825 status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
826 if (status != -EBUSY)
827 break;
828 /* Request could not be updated. Flush it out and try again */
829 status = nfs_wb_page(inode, page);
830 } while (status >= 0);
831 if (status < 0)
832 goto done;
833
834 status = 0;
835 end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
836 if (inode->i_size < end)
837 inode->i_size = end;
838
839 /* If we wrote past the end of the page.
840 * Call the strategy routine so it can send out a bunch
841 * of requests.
842 */
843 if (req->wb_offset == 0 && req->wb_bytes == PAGE_CACHE_SIZE) {
844 SetPageUptodate(page);
845 nfs_unlock_request(req);
846 nfs_strategy(inode);
847 } else
848 nfs_unlock_request(req);
849 done:
850 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
851 status, (long long)inode->i_size);
852 if (status < 0)
853 ClearPageUptodate(page);
854 return status;
855 }
856
857 /*
858 * Set up the argument/result storage required for the RPC call.
859 */
860 static void
nfs_write_rpcsetup(struct list_head * head,struct nfs_write_data * data)861 nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data)
862 {
863 struct nfs_page *req;
864 struct page **pages;
865 unsigned int count;
866
867 /* Set up the RPC argument and reply structs
868 * NB: take care not to mess about with data->commit et al. */
869
870 pages = data->args.pages;
871 count = 0;
872 while (!list_empty(head)) {
873 struct nfs_page *req = nfs_list_entry(head->next);
874 nfs_list_remove_request(req);
875 nfs_list_add_request(req, &data->pages);
876 *pages++ = req->wb_page;
877 count += req->wb_bytes;
878 }
879 req = nfs_list_entry(data->pages.next);
880 data->inode = req->wb_inode;
881 data->cred = req->wb_cred;
882 data->args.fh = NFS_FH(req->wb_inode);
883 data->args.offset = page_offset(req->wb_page) + req->wb_offset;
884 data->args.pgbase = req->wb_offset;
885 data->args.count = count;
886 data->res.fattr = &data->fattr;
887 data->res.count = count;
888 data->res.verf = &data->verf;
889 }
890
891
892 /*
893 * Create an RPC task for the given write request and kick it.
894 * The page must have been locked by the caller.
895 *
896 * It may happen that the page we're passed is not marked dirty.
897 * This is the case if nfs_updatepage detects a conflicting request
898 * that has been written but not committed.
899 */
900 static int
nfs_flush_one(struct list_head * head,struct inode * inode,int how)901 nfs_flush_one(struct list_head *head, struct inode *inode, int how)
902 {
903 struct rpc_clnt *clnt = NFS_CLIENT(inode);
904 struct nfs_write_data *data;
905 struct rpc_task *task;
906 struct rpc_message msg;
907 int flags,
908 nfsvers = NFS_PROTO(inode)->version,
909 async = !(how & FLUSH_SYNC),
910 stable = (how & FLUSH_STABLE);
911 sigset_t oldset;
912
913
914 data = nfs_writedata_alloc();
915 if (!data)
916 goto out_bad;
917 task = &data->task;
918
919 /* Set the initial flags for the task. */
920 flags = (async) ? RPC_TASK_ASYNC : 0;
921
922 /* Set up the argument struct */
923 nfs_write_rpcsetup(head, data);
924 if (nfsvers < 3)
925 data->args.stable = NFS_FILE_SYNC;
926 else if (stable) {
927 if (!inode->u.nfs_i.ncommit)
928 data->args.stable = NFS_FILE_SYNC;
929 else
930 data->args.stable = NFS_DATA_SYNC;
931 } else
932 data->args.stable = NFS_UNSTABLE;
933
934 /* Finalize the task. */
935 rpc_init_task(task, clnt, nfs_writeback_done, flags);
936 task->tk_calldata = data;
937 /* Release requests */
938 task->tk_release = nfs_writedata_release;
939
940 #ifdef CONFIG_NFS_V3
941 msg.rpc_proc = (nfsvers == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE;
942 #else
943 msg.rpc_proc = NFSPROC_WRITE;
944 #endif
945 msg.rpc_argp = &data->args;
946 msg.rpc_resp = &data->res;
947 msg.rpc_cred = data->cred;
948
949 dprintk("NFS: %4d initiated write call (req %x/%Ld count %u)\n",
950 task->tk_pid,
951 inode->i_dev,
952 (long long)NFS_FILEID(inode),
953 data->args.count);
954
955 rpc_clnt_sigmask(clnt, &oldset);
956 rpc_call_setup(task, &msg, 0);
957 lock_kernel();
958 rpc_execute(task);
959 unlock_kernel();
960 rpc_clnt_sigunmask(clnt, &oldset);
961 return 0;
962 out_bad:
963 while (!list_empty(head)) {
964 struct nfs_page *req = nfs_list_entry(head->next);
965 nfs_list_remove_request(req);
966 nfs_mark_request_dirty(req);
967 nfs_unlock_request(req);
968 }
969 return -ENOMEM;
970 }
971
972 int
nfs_flush_list(struct list_head * head,int wpages,int how)973 nfs_flush_list(struct list_head *head, int wpages, int how)
974 {
975 LIST_HEAD(one_request);
976 struct nfs_page *req;
977 int error = 0;
978 unsigned int pages = 0;
979
980 while (!list_empty(head)) {
981 pages += nfs_coalesce_requests(head, &one_request, wpages);
982 req = nfs_list_entry(one_request.next);
983 error = nfs_flush_one(&one_request, req->wb_inode, how);
984 if (error < 0)
985 break;
986 }
987 if (error >= 0)
988 return pages;
989
990 while (!list_empty(head)) {
991 req = nfs_list_entry(head->next);
992 nfs_list_remove_request(req);
993 nfs_mark_request_dirty(req);
994 nfs_unlock_request(req);
995 }
996 return error;
997 }
998
999
1000 /*
1001 * This function is called when the WRITE call is complete.
1002 */
1003 static void
nfs_writeback_done(struct rpc_task * task)1004 nfs_writeback_done(struct rpc_task *task)
1005 {
1006 struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
1007 struct nfs_writeargs *argp = &data->args;
1008 struct nfs_writeres *resp = &data->res;
1009 struct inode *inode = data->inode;
1010 struct nfs_page *req;
1011 struct page *page;
1012
1013 dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
1014 task->tk_pid, task->tk_status);
1015
1016 if (nfs_async_handle_jukebox(task))
1017 return;
1018
1019 /* We can't handle that yet but we check for it nevertheless */
1020 if (resp->count < argp->count && task->tk_status >= 0) {
1021 static unsigned long complain;
1022 if (time_before(complain, jiffies)) {
1023 printk(KERN_WARNING
1024 "NFS: Server wrote less than requested.\n");
1025 complain = jiffies + 300 * HZ;
1026 }
1027 /* Can't do anything about it right now except throw
1028 * an error. */
1029 task->tk_status = -EIO;
1030 }
1031 #ifdef CONFIG_NFS_V3
1032 if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
1033 /* We tried a write call, but the server did not
1034 * commit data to stable storage even though we
1035 * requested it.
1036 * Note: There is a known bug in Tru64 < 5.0 in which
1037 * the server reports NFS_DATA_SYNC, but performs
1038 * NFS_FILE_SYNC. We therefore implement this checking
1039 * as a dprintk() in order to avoid filling syslog.
1040 */
1041 static unsigned long complain;
1042
1043 if (time_before(complain, jiffies)) {
1044 dprintk("NFS: faulty NFSv3 server %s:"
1045 " (committed = %d) != (stable = %d)\n",
1046 NFS_SERVER(inode)->hostname,
1047 resp->verf->committed, argp->stable);
1048 complain = jiffies + 300 * HZ;
1049 }
1050 }
1051 #endif
1052
1053 /*
1054 * Update attributes as result of writeback.
1055 * FIXME: There is an inherent race with invalidate_inode_pages and
1056 * writebacks since the page->count is kept > 1 for as long
1057 * as the page has a write request pending.
1058 */
1059 nfs_write_attributes(inode, resp->fattr);
1060 while (!list_empty(&data->pages)) {
1061 req = nfs_list_entry(data->pages.next);
1062 nfs_list_remove_request(req);
1063 page = req->wb_page;
1064
1065 dprintk("NFS: write (%x/%Ld %d@%Ld)",
1066 req->wb_inode->i_dev,
1067 (long long)NFS_FILEID(req->wb_inode),
1068 req->wb_bytes,
1069 (long long)(page_offset(page) + req->wb_offset));
1070
1071 if (task->tk_status < 0) {
1072 ClearPageUptodate(page);
1073 SetPageError(page);
1074 if (req->wb_file)
1075 req->wb_file->f_error = task->tk_status;
1076 nfs_inode_remove_request(req);
1077 dprintk(", error = %d\n", task->tk_status);
1078 goto next;
1079 }
1080
1081 #ifdef CONFIG_NFS_V3
1082 if (argp->stable != NFS_UNSTABLE || resp->verf->committed == NFS_FILE_SYNC) {
1083 nfs_inode_remove_request(req);
1084 dprintk(" OK\n");
1085 goto next;
1086 }
1087 memcpy(&req->wb_verf, resp->verf, sizeof(req->wb_verf));
1088 req->wb_timeout = jiffies + NFS_COMMIT_DELAY;
1089 nfs_mark_request_commit(req);
1090 dprintk(" marked for commit\n");
1091 #else
1092 nfs_inode_remove_request(req);
1093 #endif
1094 next:
1095 nfs_unlock_request(req);
1096 }
1097 }
1098
1099
1100 #ifdef CONFIG_NFS_V3
1101 /*
1102 * Set up the argument/result storage required for the RPC call.
1103 */
1104 static void
nfs_commit_rpcsetup(struct list_head * head,struct nfs_write_data * data)1105 nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data)
1106 {
1107 struct nfs_page *first, *last;
1108 struct inode *inode;
1109 loff_t start, end, len;
1110
1111 /* Set up the RPC argument and reply structs
1112 * NB: take care not to mess about with data->commit et al. */
1113
1114 list_splice(head, &data->pages);
1115 INIT_LIST_HEAD(head);
1116 first = nfs_list_entry(data->pages.next);
1117 last = nfs_list_entry(data->pages.prev);
1118 inode = first->wb_inode;
1119
1120 /*
1121 * Determine the offset range of requests in the COMMIT call.
1122 * We rely on the fact that data->pages is an ordered list...
1123 */
1124 start = page_offset(first->wb_page) + first->wb_offset;
1125 end = page_offset(last->wb_page) + (last->wb_offset + last->wb_bytes);
1126 len = end - start;
1127 /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
1128 if (end >= inode->i_size || len < 0 || len > (~((u32)0) >> 1))
1129 len = 0;
1130
1131 data->inode = inode;
1132 data->cred = first->wb_cred;
1133 data->args.fh = NFS_FH(inode);
1134 data->args.offset = start;
1135 data->res.count = data->args.count = (u32)len;
1136 data->res.fattr = &data->fattr;
1137 data->res.verf = &data->verf;
1138 }
1139
1140 /*
1141 * Commit dirty pages
1142 */
1143 int
nfs_commit_list(struct list_head * head,int how)1144 nfs_commit_list(struct list_head *head, int how)
1145 {
1146 struct rpc_message msg;
1147 struct rpc_clnt *clnt;
1148 struct nfs_write_data *data;
1149 struct rpc_task *task;
1150 struct nfs_page *req;
1151 int flags,
1152 async = !(how & FLUSH_SYNC);
1153 sigset_t oldset;
1154
1155 data = nfs_writedata_alloc();
1156
1157 if (!data)
1158 goto out_bad;
1159 task = &data->task;
1160
1161 flags = (async) ? RPC_TASK_ASYNC : 0;
1162
1163 /* Set up the argument struct */
1164 nfs_commit_rpcsetup(head, data);
1165 req = nfs_list_entry(data->pages.next);
1166 clnt = NFS_CLIENT(req->wb_inode);
1167
1168 rpc_init_task(task, clnt, nfs_commit_done, flags);
1169 task->tk_calldata = data;
1170 /* Release requests */
1171 task->tk_release = nfs_writedata_release;
1172
1173 msg.rpc_proc = NFS3PROC_COMMIT;
1174 msg.rpc_argp = &data->args;
1175 msg.rpc_resp = &data->res;
1176 msg.rpc_cred = data->cred;
1177
1178 dprintk("NFS: %4d initiated commit call\n", task->tk_pid);
1179 rpc_clnt_sigmask(clnt, &oldset);
1180 rpc_call_setup(task, &msg, 0);
1181 lock_kernel();
1182 rpc_execute(task);
1183 unlock_kernel();
1184 rpc_clnt_sigunmask(clnt, &oldset);
1185 return 0;
1186 out_bad:
1187 while (!list_empty(head)) {
1188 req = nfs_list_entry(head->next);
1189 nfs_list_remove_request(req);
1190 nfs_mark_request_commit(req);
1191 nfs_unlock_request(req);
1192 }
1193 return -ENOMEM;
1194 }
1195
1196 /*
1197 * COMMIT call returned
1198 */
1199 static void
nfs_commit_done(struct rpc_task * task)1200 nfs_commit_done(struct rpc_task *task)
1201 {
1202 struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata;
1203 struct nfs_writeres *resp = &data->res;
1204 struct nfs_page *req;
1205 struct inode *inode = data->inode;
1206
1207 dprintk("NFS: %4d nfs_commit_done (status %d)\n",
1208 task->tk_pid, task->tk_status);
1209
1210 if (nfs_async_handle_jukebox(task))
1211 return;
1212
1213 nfs_write_attributes(inode, resp->fattr);
1214 while (!list_empty(&data->pages)) {
1215 req = nfs_list_entry(data->pages.next);
1216 nfs_list_remove_request(req);
1217
1218 dprintk("NFS: commit (%x/%Ld %d@%Ld)",
1219 req->wb_inode->i_dev,
1220 (long long)NFS_FILEID(req->wb_inode),
1221 req->wb_bytes,
1222 (long long)(page_offset(req->wb_page) + req->wb_offset));
1223 if (task->tk_status < 0) {
1224 if (req->wb_file)
1225 req->wb_file->f_error = task->tk_status;
1226 nfs_inode_remove_request(req);
1227 dprintk(", error = %d\n", task->tk_status);
1228 goto next;
1229 }
1230
1231 /* Okay, COMMIT succeeded, apparently. Check the verifier
1232 * returned by the server against all stored verfs. */
1233 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
1234 /* We have a match */
1235 nfs_inode_remove_request(req);
1236 dprintk(" OK\n");
1237 goto next;
1238 }
1239 /* We have a mismatch. Write the page again */
1240 dprintk(" mismatch\n");
1241 nfs_mark_request_dirty(req);
1242 next:
1243 nfs_unlock_request(req);
1244 }
1245 }
1246 #endif
1247
nfs_flush_file(struct inode * inode,unsigned long idx_start,unsigned int npages,int how)1248 int nfs_flush_file(struct inode *inode, unsigned long idx_start,
1249 unsigned int npages, int how)
1250 {
1251 LIST_HEAD(head);
1252 int res,
1253 error = 0;
1254
1255 spin_lock(&nfs_wreq_lock);
1256 res = nfs_scan_dirty(inode, &head, idx_start, npages);
1257 spin_unlock(&nfs_wreq_lock);
1258 if (res)
1259 error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how);
1260 if (error < 0)
1261 return error;
1262 return res;
1263 }
1264
1265 #ifdef CONFIG_NFS_V3
nfs_commit_file(struct inode * inode,int how)1266 int nfs_commit_file(struct inode *inode, int how)
1267 {
1268 LIST_HEAD(head);
1269 int res,
1270 error = 0;
1271
1272 spin_lock(&nfs_wreq_lock);
1273 res = nfs_scan_commit(inode, &head, 0, 0);
1274 spin_unlock(&nfs_wreq_lock);
1275 if (res)
1276 error = nfs_commit_list(&head, how);
1277 if (error < 0)
1278 return error;
1279 return res;
1280 }
1281 #endif
1282
nfs_sync_file(struct inode * inode,unsigned long idx_start,unsigned int npages,int how)1283 int nfs_sync_file(struct inode *inode, unsigned long idx_start,
1284 unsigned int npages, int how)
1285 {
1286 int error,
1287 wait;
1288
1289 wait = how & FLUSH_WAIT;
1290 how &= ~FLUSH_WAIT;
1291
1292 do {
1293 error = 0;
1294 if (wait)
1295 error = nfs_wait_on_requests(inode, idx_start, npages);
1296 if (error == 0)
1297 error = nfs_flush_file(inode, idx_start, npages, how);
1298 #ifdef CONFIG_NFS_V3
1299 if (error == 0)
1300 error = nfs_commit_file(inode, how);
1301 #endif
1302 } while (error > 0);
1303 return error;
1304 }
1305
nfs_init_writepagecache(void)1306 int nfs_init_writepagecache(void)
1307 {
1308 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1309 sizeof(struct nfs_write_data),
1310 0, SLAB_HWCACHE_ALIGN,
1311 NULL, NULL);
1312 if (nfs_wdata_cachep == NULL)
1313 return -ENOMEM;
1314
1315 return 0;
1316 }
1317
nfs_destroy_writepagecache(void)1318 void nfs_destroy_writepagecache(void)
1319 {
1320 if (kmem_cache_destroy(nfs_wdata_cachep))
1321 printk(KERN_INFO "nfs_write_data: not all structures were freed\n");
1322 }
1323
1324