1 /*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7 */
8
9 #include "fuse_i.h"
10
11 #include <linux/pagemap.h>
12 #include <linux/slab.h>
13 #include <linux/kernel.h>
14 #include <linux/sched.h>
15 #include <linux/module.h>
16 #include <linux/compat.h>
17 #include <linux/swap.h>
18
19 static const struct file_operations fuse_direct_io_file_operations;
20
fuse_send_open(struct fuse_conn * fc,u64 nodeid,struct file * file,int opcode,struct fuse_open_out * outargp)21 static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
22 int opcode, struct fuse_open_out *outargp)
23 {
24 struct fuse_open_in inarg;
25 struct fuse_req *req;
26 int err;
27
28 req = fuse_get_req(fc);
29 if (IS_ERR(req))
30 return PTR_ERR(req);
31
32 memset(&inarg, 0, sizeof(inarg));
33 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
34 if (!fc->atomic_o_trunc)
35 inarg.flags &= ~O_TRUNC;
36 req->in.h.opcode = opcode;
37 req->in.h.nodeid = nodeid;
38 req->in.numargs = 1;
39 req->in.args[0].size = sizeof(inarg);
40 req->in.args[0].value = &inarg;
41 req->out.numargs = 1;
42 req->out.args[0].size = sizeof(*outargp);
43 req->out.args[0].value = outargp;
44 fuse_request_send(fc, req);
45 err = req->out.h.error;
46 fuse_put_request(fc, req);
47
48 return err;
49 }
50
fuse_file_alloc(struct fuse_conn * fc)51 struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
52 {
53 struct fuse_file *ff;
54
55 ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
56 if (unlikely(!ff))
57 return NULL;
58
59 ff->fc = fc;
60 ff->reserved_req = fuse_request_alloc();
61 if (unlikely(!ff->reserved_req)) {
62 kfree(ff);
63 return NULL;
64 }
65
66 INIT_LIST_HEAD(&ff->write_entry);
67 atomic_set(&ff->count, 0);
68 RB_CLEAR_NODE(&ff->polled_node);
69 init_waitqueue_head(&ff->poll_wait);
70
71 spin_lock(&fc->lock);
72 ff->kh = ++fc->khctr;
73 spin_unlock(&fc->lock);
74
75 return ff;
76 }
77
fuse_file_free(struct fuse_file * ff)78 void fuse_file_free(struct fuse_file *ff)
79 {
80 fuse_request_free(ff->reserved_req);
81 kfree(ff);
82 }
83
fuse_file_get(struct fuse_file * ff)84 struct fuse_file *fuse_file_get(struct fuse_file *ff)
85 {
86 atomic_inc(&ff->count);
87 return ff;
88 }
89
fuse_release_async(struct work_struct * work)90 static void fuse_release_async(struct work_struct *work)
91 {
92 struct fuse_req *req;
93 struct fuse_conn *fc;
94 struct path path;
95
96 req = container_of(work, struct fuse_req, misc.release.work);
97 path = req->misc.release.path;
98 fc = get_fuse_conn(path.dentry->d_inode);
99
100 fuse_put_request(fc, req);
101 path_put(&path);
102 }
103
fuse_release_end(struct fuse_conn * fc,struct fuse_req * req)104 static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
105 {
106 if (fc->destroy_req) {
107 /*
108 * If this is a fuseblk mount, then it's possible that
109 * releasing the path will result in releasing the
110 * super block and sending the DESTROY request. If
111 * the server is single threaded, this would hang.
112 * For this reason do the path_put() in a separate
113 * thread.
114 */
115 atomic_inc(&req->count);
116 INIT_WORK(&req->misc.release.work, fuse_release_async);
117 schedule_work(&req->misc.release.work);
118 } else {
119 path_put(&req->misc.release.path);
120 }
121 }
122
fuse_file_put(struct fuse_file * ff,bool sync)123 static void fuse_file_put(struct fuse_file *ff, bool sync)
124 {
125 if (atomic_dec_and_test(&ff->count)) {
126 struct fuse_req *req = ff->reserved_req;
127
128 if (sync) {
129 fuse_request_send(ff->fc, req);
130 path_put(&req->misc.release.path);
131 fuse_put_request(ff->fc, req);
132 } else {
133 req->end = fuse_release_end;
134 fuse_request_send_background(ff->fc, req);
135 }
136 kfree(ff);
137 }
138 }
139
fuse_do_open(struct fuse_conn * fc,u64 nodeid,struct file * file,bool isdir)140 int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
141 bool isdir)
142 {
143 struct fuse_open_out outarg;
144 struct fuse_file *ff;
145 int err;
146 int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
147
148 ff = fuse_file_alloc(fc);
149 if (!ff)
150 return -ENOMEM;
151
152 err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
153 if (err) {
154 fuse_file_free(ff);
155 return err;
156 }
157
158 if (isdir)
159 outarg.open_flags &= ~FOPEN_DIRECT_IO;
160
161 ff->fh = outarg.fh;
162 ff->nodeid = nodeid;
163 ff->open_flags = outarg.open_flags;
164 file->private_data = fuse_file_get(ff);
165
166 return 0;
167 }
168 EXPORT_SYMBOL_GPL(fuse_do_open);
169
fuse_finish_open(struct inode * inode,struct file * file)170 void fuse_finish_open(struct inode *inode, struct file *file)
171 {
172 struct fuse_file *ff = file->private_data;
173 struct fuse_conn *fc = get_fuse_conn(inode);
174
175 if (ff->open_flags & FOPEN_DIRECT_IO)
176 file->f_op = &fuse_direct_io_file_operations;
177 if (!(ff->open_flags & FOPEN_KEEP_CACHE))
178 invalidate_inode_pages2(inode->i_mapping);
179 if (ff->open_flags & FOPEN_NONSEEKABLE)
180 nonseekable_open(inode, file);
181 if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
182 struct fuse_inode *fi = get_fuse_inode(inode);
183
184 spin_lock(&fc->lock);
185 fi->attr_version = ++fc->attr_version;
186 i_size_write(inode, 0);
187 spin_unlock(&fc->lock);
188 fuse_invalidate_attr(inode);
189 }
190 }
191
fuse_open_common(struct inode * inode,struct file * file,bool isdir)192 int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
193 {
194 struct fuse_conn *fc = get_fuse_conn(inode);
195 int err;
196
197 err = generic_file_open(inode, file);
198 if (err)
199 return err;
200
201 err = fuse_do_open(fc, get_node_id(inode), file, isdir);
202 if (err)
203 return err;
204
205 fuse_finish_open(inode, file);
206
207 return 0;
208 }
209
fuse_prepare_release(struct fuse_file * ff,int flags,int opcode)210 static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
211 {
212 struct fuse_conn *fc = ff->fc;
213 struct fuse_req *req = ff->reserved_req;
214 struct fuse_release_in *inarg = &req->misc.release.in;
215
216 spin_lock(&fc->lock);
217 list_del(&ff->write_entry);
218 if (!RB_EMPTY_NODE(&ff->polled_node))
219 rb_erase(&ff->polled_node, &fc->polled_files);
220 spin_unlock(&fc->lock);
221
222 wake_up_interruptible_all(&ff->poll_wait);
223
224 inarg->fh = ff->fh;
225 inarg->flags = flags;
226 req->in.h.opcode = opcode;
227 req->in.h.nodeid = ff->nodeid;
228 req->in.numargs = 1;
229 req->in.args[0].size = sizeof(struct fuse_release_in);
230 req->in.args[0].value = inarg;
231 }
232
fuse_release_common(struct file * file,int opcode)233 void fuse_release_common(struct file *file, int opcode)
234 {
235 struct fuse_file *ff;
236 struct fuse_req *req;
237
238 ff = file->private_data;
239 if (unlikely(!ff))
240 return;
241
242 req = ff->reserved_req;
243 fuse_prepare_release(ff, file->f_flags, opcode);
244
245 if (ff->flock) {
246 struct fuse_release_in *inarg = &req->misc.release.in;
247 inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
248 inarg->lock_owner = fuse_lock_owner_id(ff->fc,
249 (fl_owner_t) file);
250 }
251 /* Hold vfsmount and dentry until release is finished */
252 path_get(&file->f_path);
253 req->misc.release.path = file->f_path;
254
255 /*
256 * Normally this will send the RELEASE request, however if
257 * some asynchronous READ or WRITE requests are outstanding,
258 * the sending will be delayed.
259 *
260 * Make the release synchronous if this is a fuseblk mount,
261 * synchronous RELEASE is allowed (and desirable) in this case
262 * because the server can be trusted not to screw up.
263 */
264 fuse_file_put(ff, ff->fc->destroy_req != NULL);
265 }
266
fuse_open(struct inode * inode,struct file * file)267 static int fuse_open(struct inode *inode, struct file *file)
268 {
269 return fuse_open_common(inode, file, false);
270 }
271
fuse_release(struct inode * inode,struct file * file)272 static int fuse_release(struct inode *inode, struct file *file)
273 {
274 fuse_release_common(file, FUSE_RELEASE);
275
276 /* return value is ignored by VFS */
277 return 0;
278 }
279
fuse_sync_release(struct fuse_file * ff,int flags)280 void fuse_sync_release(struct fuse_file *ff, int flags)
281 {
282 WARN_ON(atomic_read(&ff->count) > 1);
283 fuse_prepare_release(ff, flags, FUSE_RELEASE);
284 ff->reserved_req->force = 1;
285 fuse_request_send(ff->fc, ff->reserved_req);
286 fuse_put_request(ff->fc, ff->reserved_req);
287 kfree(ff);
288 }
289 EXPORT_SYMBOL_GPL(fuse_sync_release);
290
291 /*
292 * Scramble the ID space with XTEA, so that the value of the files_struct
293 * pointer is not exposed to userspace.
294 */
fuse_lock_owner_id(struct fuse_conn * fc,fl_owner_t id)295 u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
296 {
297 u32 *k = fc->scramble_key;
298 u64 v = (unsigned long) id;
299 u32 v0 = v;
300 u32 v1 = v >> 32;
301 u32 sum = 0;
302 int i;
303
304 for (i = 0; i < 32; i++) {
305 v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
306 sum += 0x9E3779B9;
307 v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]);
308 }
309
310 return (u64) v0 + ((u64) v1 << 32);
311 }
312
313 /*
314 * Check if page is under writeback
315 *
316 * This is currently done by walking the list of writepage requests
317 * for the inode, which can be pretty inefficient.
318 */
fuse_page_is_writeback(struct inode * inode,pgoff_t index)319 static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
320 {
321 struct fuse_conn *fc = get_fuse_conn(inode);
322 struct fuse_inode *fi = get_fuse_inode(inode);
323 struct fuse_req *req;
324 bool found = false;
325
326 spin_lock(&fc->lock);
327 list_for_each_entry(req, &fi->writepages, writepages_entry) {
328 pgoff_t curr_index;
329
330 BUG_ON(req->inode != inode);
331 curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
332 if (curr_index == index) {
333 found = true;
334 break;
335 }
336 }
337 spin_unlock(&fc->lock);
338
339 return found;
340 }
341
342 /*
343 * Wait for page writeback to be completed.
344 *
345 * Since fuse doesn't rely on the VM writeback tracking, this has to
346 * use some other means.
347 */
fuse_wait_on_page_writeback(struct inode * inode,pgoff_t index)348 static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
349 {
350 struct fuse_inode *fi = get_fuse_inode(inode);
351
352 wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
353 return 0;
354 }
355
fuse_flush(struct file * file,fl_owner_t id)356 static int fuse_flush(struct file *file, fl_owner_t id)
357 {
358 struct inode *inode = file->f_path.dentry->d_inode;
359 struct fuse_conn *fc = get_fuse_conn(inode);
360 struct fuse_file *ff = file->private_data;
361 struct fuse_req *req;
362 struct fuse_flush_in inarg;
363 int err;
364
365 if (is_bad_inode(inode))
366 return -EIO;
367
368 if (fc->no_flush)
369 return 0;
370
371 req = fuse_get_req_nofail(fc, file);
372 memset(&inarg, 0, sizeof(inarg));
373 inarg.fh = ff->fh;
374 inarg.lock_owner = fuse_lock_owner_id(fc, id);
375 req->in.h.opcode = FUSE_FLUSH;
376 req->in.h.nodeid = get_node_id(inode);
377 req->in.numargs = 1;
378 req->in.args[0].size = sizeof(inarg);
379 req->in.args[0].value = &inarg;
380 req->force = 1;
381 fuse_request_send(fc, req);
382 err = req->out.h.error;
383 fuse_put_request(fc, req);
384 if (err == -ENOSYS) {
385 fc->no_flush = 1;
386 err = 0;
387 }
388 return err;
389 }
390
391 /*
392 * Wait for all pending writepages on the inode to finish.
393 *
394 * This is currently done by blocking further writes with FUSE_NOWRITE
395 * and waiting for all sent writes to complete.
396 *
397 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
398 * could conflict with truncation.
399 */
fuse_sync_writes(struct inode * inode)400 static void fuse_sync_writes(struct inode *inode)
401 {
402 fuse_set_nowrite(inode);
403 fuse_release_nowrite(inode);
404 }
405
fuse_fsync_common(struct file * file,loff_t start,loff_t end,int datasync,int isdir)406 int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
407 int datasync, int isdir)
408 {
409 struct inode *inode = file->f_mapping->host;
410 struct fuse_conn *fc = get_fuse_conn(inode);
411 struct fuse_file *ff = file->private_data;
412 struct fuse_req *req;
413 struct fuse_fsync_in inarg;
414 int err;
415
416 if (is_bad_inode(inode))
417 return -EIO;
418
419 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
420 if (err)
421 return err;
422
423 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
424 return 0;
425
426 mutex_lock(&inode->i_mutex);
427
428 /*
429 * Start writeback against all dirty pages of the inode, then
430 * wait for all outstanding writes, before sending the FSYNC
431 * request.
432 */
433 err = write_inode_now(inode, 0);
434 if (err)
435 goto out;
436
437 fuse_sync_writes(inode);
438
439 req = fuse_get_req(fc);
440 if (IS_ERR(req)) {
441 err = PTR_ERR(req);
442 goto out;
443 }
444
445 memset(&inarg, 0, sizeof(inarg));
446 inarg.fh = ff->fh;
447 inarg.fsync_flags = datasync ? 1 : 0;
448 req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
449 req->in.h.nodeid = get_node_id(inode);
450 req->in.numargs = 1;
451 req->in.args[0].size = sizeof(inarg);
452 req->in.args[0].value = &inarg;
453 fuse_request_send(fc, req);
454 err = req->out.h.error;
455 fuse_put_request(fc, req);
456 if (err == -ENOSYS) {
457 if (isdir)
458 fc->no_fsyncdir = 1;
459 else
460 fc->no_fsync = 1;
461 err = 0;
462 }
463 out:
464 mutex_unlock(&inode->i_mutex);
465 return err;
466 }
467
fuse_fsync(struct file * file,loff_t start,loff_t end,int datasync)468 static int fuse_fsync(struct file *file, loff_t start, loff_t end,
469 int datasync)
470 {
471 return fuse_fsync_common(file, start, end, datasync, 0);
472 }
473
fuse_read_fill(struct fuse_req * req,struct file * file,loff_t pos,size_t count,int opcode)474 void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
475 size_t count, int opcode)
476 {
477 struct fuse_read_in *inarg = &req->misc.read.in;
478 struct fuse_file *ff = file->private_data;
479
480 inarg->fh = ff->fh;
481 inarg->offset = pos;
482 inarg->size = count;
483 inarg->flags = file->f_flags;
484 req->in.h.opcode = opcode;
485 req->in.h.nodeid = ff->nodeid;
486 req->in.numargs = 1;
487 req->in.args[0].size = sizeof(struct fuse_read_in);
488 req->in.args[0].value = inarg;
489 req->out.argvar = 1;
490 req->out.numargs = 1;
491 req->out.args[0].size = count;
492 }
493
fuse_send_read(struct fuse_req * req,struct file * file,loff_t pos,size_t count,fl_owner_t owner)494 static size_t fuse_send_read(struct fuse_req *req, struct file *file,
495 loff_t pos, size_t count, fl_owner_t owner)
496 {
497 struct fuse_file *ff = file->private_data;
498 struct fuse_conn *fc = ff->fc;
499
500 fuse_read_fill(req, file, pos, count, FUSE_READ);
501 if (owner != NULL) {
502 struct fuse_read_in *inarg = &req->misc.read.in;
503
504 inarg->read_flags |= FUSE_READ_LOCKOWNER;
505 inarg->lock_owner = fuse_lock_owner_id(fc, owner);
506 }
507 fuse_request_send(fc, req);
508 return req->out.args[0].size;
509 }
510
fuse_read_update_size(struct inode * inode,loff_t size,u64 attr_ver)511 static void fuse_read_update_size(struct inode *inode, loff_t size,
512 u64 attr_ver)
513 {
514 struct fuse_conn *fc = get_fuse_conn(inode);
515 struct fuse_inode *fi = get_fuse_inode(inode);
516
517 spin_lock(&fc->lock);
518 if (attr_ver == fi->attr_version && size < inode->i_size &&
519 !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
520 fi->attr_version = ++fc->attr_version;
521 i_size_write(inode, size);
522 }
523 spin_unlock(&fc->lock);
524 }
525
fuse_readpage(struct file * file,struct page * page)526 static int fuse_readpage(struct file *file, struct page *page)
527 {
528 struct inode *inode = page->mapping->host;
529 struct fuse_conn *fc = get_fuse_conn(inode);
530 struct fuse_req *req;
531 size_t num_read;
532 loff_t pos = page_offset(page);
533 size_t count = PAGE_CACHE_SIZE;
534 u64 attr_ver;
535 int err;
536
537 err = -EIO;
538 if (is_bad_inode(inode))
539 goto out;
540
541 /*
542 * Page writeback can extend beyond the lifetime of the
543 * page-cache page, so make sure we read a properly synced
544 * page.
545 */
546 fuse_wait_on_page_writeback(inode, page->index);
547
548 req = fuse_get_req(fc);
549 err = PTR_ERR(req);
550 if (IS_ERR(req))
551 goto out;
552
553 attr_ver = fuse_get_attr_version(fc);
554
555 req->out.page_zeroing = 1;
556 req->out.argpages = 1;
557 req->num_pages = 1;
558 req->pages[0] = page;
559 num_read = fuse_send_read(req, file, pos, count, NULL);
560 err = req->out.h.error;
561 fuse_put_request(fc, req);
562
563 if (!err) {
564 /*
565 * Short read means EOF. If file size is larger, truncate it
566 */
567 if (num_read < count)
568 fuse_read_update_size(inode, pos + num_read, attr_ver);
569
570 SetPageUptodate(page);
571 }
572
573 fuse_invalidate_attr(inode); /* atime changed */
574 out:
575 unlock_page(page);
576 return err;
577 }
578
fuse_readpages_end(struct fuse_conn * fc,struct fuse_req * req)579 static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
580 {
581 int i;
582 size_t count = req->misc.read.in.size;
583 size_t num_read = req->out.args[0].size;
584 struct address_space *mapping = NULL;
585
586 for (i = 0; mapping == NULL && i < req->num_pages; i++)
587 mapping = req->pages[i]->mapping;
588
589 if (mapping) {
590 struct inode *inode = mapping->host;
591
592 /*
593 * Short read means EOF. If file size is larger, truncate it
594 */
595 if (!req->out.h.error && num_read < count) {
596 loff_t pos;
597
598 pos = page_offset(req->pages[0]) + num_read;
599 fuse_read_update_size(inode, pos,
600 req->misc.read.attr_ver);
601 }
602 fuse_invalidate_attr(inode); /* atime changed */
603 }
604
605 for (i = 0; i < req->num_pages; i++) {
606 struct page *page = req->pages[i];
607 if (!req->out.h.error)
608 SetPageUptodate(page);
609 else
610 SetPageError(page);
611 unlock_page(page);
612 page_cache_release(page);
613 }
614 if (req->ff)
615 fuse_file_put(req->ff, false);
616 }
617
fuse_send_readpages(struct fuse_req * req,struct file * file)618 static void fuse_send_readpages(struct fuse_req *req, struct file *file)
619 {
620 struct fuse_file *ff = file->private_data;
621 struct fuse_conn *fc = ff->fc;
622 loff_t pos = page_offset(req->pages[0]);
623 size_t count = req->num_pages << PAGE_CACHE_SHIFT;
624
625 req->out.argpages = 1;
626 req->out.page_zeroing = 1;
627 req->out.page_replace = 1;
628 fuse_read_fill(req, file, pos, count, FUSE_READ);
629 req->misc.read.attr_ver = fuse_get_attr_version(fc);
630 if (fc->async_read) {
631 req->ff = fuse_file_get(ff);
632 req->end = fuse_readpages_end;
633 fuse_request_send_background(fc, req);
634 } else {
635 fuse_request_send(fc, req);
636 fuse_readpages_end(fc, req);
637 fuse_put_request(fc, req);
638 }
639 }
640
641 struct fuse_fill_data {
642 struct fuse_req *req;
643 struct file *file;
644 struct inode *inode;
645 };
646
fuse_readpages_fill(void * _data,struct page * page)647 static int fuse_readpages_fill(void *_data, struct page *page)
648 {
649 struct fuse_fill_data *data = _data;
650 struct fuse_req *req = data->req;
651 struct inode *inode = data->inode;
652 struct fuse_conn *fc = get_fuse_conn(inode);
653
654 fuse_wait_on_page_writeback(inode, page->index);
655
656 if (req->num_pages &&
657 (req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
658 (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
659 req->pages[req->num_pages - 1]->index + 1 != page->index)) {
660 fuse_send_readpages(req, data->file);
661 data->req = req = fuse_get_req(fc);
662 if (IS_ERR(req)) {
663 unlock_page(page);
664 return PTR_ERR(req);
665 }
666 }
667 page_cache_get(page);
668 req->pages[req->num_pages] = page;
669 req->num_pages++;
670 return 0;
671 }
672
fuse_readpages(struct file * file,struct address_space * mapping,struct list_head * pages,unsigned nr_pages)673 static int fuse_readpages(struct file *file, struct address_space *mapping,
674 struct list_head *pages, unsigned nr_pages)
675 {
676 struct inode *inode = mapping->host;
677 struct fuse_conn *fc = get_fuse_conn(inode);
678 struct fuse_fill_data data;
679 int err;
680
681 err = -EIO;
682 if (is_bad_inode(inode))
683 goto out;
684
685 data.file = file;
686 data.inode = inode;
687 data.req = fuse_get_req(fc);
688 err = PTR_ERR(data.req);
689 if (IS_ERR(data.req))
690 goto out;
691
692 err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
693 if (!err) {
694 if (data.req->num_pages)
695 fuse_send_readpages(data.req, file);
696 else
697 fuse_put_request(fc, data.req);
698 }
699 out:
700 return err;
701 }
702
fuse_file_aio_read(struct kiocb * iocb,const struct iovec * iov,unsigned long nr_segs,loff_t pos)703 static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
704 unsigned long nr_segs, loff_t pos)
705 {
706 struct inode *inode = iocb->ki_filp->f_mapping->host;
707
708 if (pos + iov_length(iov, nr_segs) > i_size_read(inode)) {
709 int err;
710 /*
711 * If trying to read past EOF, make sure the i_size
712 * attribute is up-to-date.
713 */
714 err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL);
715 if (err)
716 return err;
717 }
718
719 return generic_file_aio_read(iocb, iov, nr_segs, pos);
720 }
721
fuse_write_fill(struct fuse_req * req,struct fuse_file * ff,loff_t pos,size_t count)722 static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
723 loff_t pos, size_t count)
724 {
725 struct fuse_write_in *inarg = &req->misc.write.in;
726 struct fuse_write_out *outarg = &req->misc.write.out;
727
728 inarg->fh = ff->fh;
729 inarg->offset = pos;
730 inarg->size = count;
731 req->in.h.opcode = FUSE_WRITE;
732 req->in.h.nodeid = ff->nodeid;
733 req->in.numargs = 2;
734 if (ff->fc->minor < 9)
735 req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
736 else
737 req->in.args[0].size = sizeof(struct fuse_write_in);
738 req->in.args[0].value = inarg;
739 req->in.args[1].size = count;
740 req->out.numargs = 1;
741 req->out.args[0].size = sizeof(struct fuse_write_out);
742 req->out.args[0].value = outarg;
743 }
744
fuse_send_write(struct fuse_req * req,struct file * file,loff_t pos,size_t count,fl_owner_t owner)745 static size_t fuse_send_write(struct fuse_req *req, struct file *file,
746 loff_t pos, size_t count, fl_owner_t owner)
747 {
748 struct fuse_file *ff = file->private_data;
749 struct fuse_conn *fc = ff->fc;
750 struct fuse_write_in *inarg = &req->misc.write.in;
751
752 fuse_write_fill(req, ff, pos, count);
753 inarg->flags = file->f_flags;
754 if (owner != NULL) {
755 inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
756 inarg->lock_owner = fuse_lock_owner_id(fc, owner);
757 }
758 fuse_request_send(fc, req);
759 return req->misc.write.out.size;
760 }
761
fuse_write_update_size(struct inode * inode,loff_t pos)762 void fuse_write_update_size(struct inode *inode, loff_t pos)
763 {
764 struct fuse_conn *fc = get_fuse_conn(inode);
765 struct fuse_inode *fi = get_fuse_inode(inode);
766
767 spin_lock(&fc->lock);
768 fi->attr_version = ++fc->attr_version;
769 if (pos > inode->i_size)
770 i_size_write(inode, pos);
771 spin_unlock(&fc->lock);
772 }
773
fuse_send_write_pages(struct fuse_req * req,struct file * file,struct inode * inode,loff_t pos,size_t count)774 static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
775 struct inode *inode, loff_t pos,
776 size_t count)
777 {
778 size_t res;
779 unsigned offset;
780 unsigned i;
781
782 for (i = 0; i < req->num_pages; i++)
783 fuse_wait_on_page_writeback(inode, req->pages[i]->index);
784
785 res = fuse_send_write(req, file, pos, count, NULL);
786
787 offset = req->page_offset;
788 count = res;
789 for (i = 0; i < req->num_pages; i++) {
790 struct page *page = req->pages[i];
791
792 if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
793 SetPageUptodate(page);
794
795 if (count > PAGE_CACHE_SIZE - offset)
796 count -= PAGE_CACHE_SIZE - offset;
797 else
798 count = 0;
799 offset = 0;
800
801 unlock_page(page);
802 page_cache_release(page);
803 }
804
805 return res;
806 }
807
fuse_fill_write_pages(struct fuse_req * req,struct address_space * mapping,struct iov_iter * ii,loff_t pos)808 static ssize_t fuse_fill_write_pages(struct fuse_req *req,
809 struct address_space *mapping,
810 struct iov_iter *ii, loff_t pos)
811 {
812 struct fuse_conn *fc = get_fuse_conn(mapping->host);
813 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
814 size_t count = 0;
815 int err;
816
817 req->in.argpages = 1;
818 req->page_offset = offset;
819
820 do {
821 size_t tmp;
822 struct page *page;
823 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
824 size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
825 iov_iter_count(ii));
826
827 bytes = min_t(size_t, bytes, fc->max_write - count);
828
829 again:
830 err = -EFAULT;
831 if (iov_iter_fault_in_readable(ii, bytes))
832 break;
833
834 err = -ENOMEM;
835 page = grab_cache_page_write_begin(mapping, index, 0);
836 if (!page)
837 break;
838
839 if (mapping_writably_mapped(mapping))
840 flush_dcache_page(page);
841
842 pagefault_disable();
843 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
844 pagefault_enable();
845 flush_dcache_page(page);
846
847 mark_page_accessed(page);
848
849 if (!tmp) {
850 unlock_page(page);
851 page_cache_release(page);
852 bytes = min(bytes, iov_iter_single_seg_count(ii));
853 goto again;
854 }
855
856 err = 0;
857 req->pages[req->num_pages] = page;
858 req->num_pages++;
859
860 iov_iter_advance(ii, tmp);
861 count += tmp;
862 pos += tmp;
863 offset += tmp;
864 if (offset == PAGE_CACHE_SIZE)
865 offset = 0;
866
867 if (!fc->big_writes)
868 break;
869 } while (iov_iter_count(ii) && count < fc->max_write &&
870 req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
871
872 return count > 0 ? count : err;
873 }
874
fuse_perform_write(struct file * file,struct address_space * mapping,struct iov_iter * ii,loff_t pos)875 static ssize_t fuse_perform_write(struct file *file,
876 struct address_space *mapping,
877 struct iov_iter *ii, loff_t pos)
878 {
879 struct inode *inode = mapping->host;
880 struct fuse_conn *fc = get_fuse_conn(inode);
881 struct fuse_inode *fi = get_fuse_inode(inode);
882 int err = 0;
883 ssize_t res = 0;
884
885 if (is_bad_inode(inode))
886 return -EIO;
887
888 if (inode->i_size < pos + iov_iter_count(ii))
889 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
890
891 do {
892 struct fuse_req *req;
893 ssize_t count;
894
895 req = fuse_get_req(fc);
896 if (IS_ERR(req)) {
897 err = PTR_ERR(req);
898 break;
899 }
900
901 count = fuse_fill_write_pages(req, mapping, ii, pos);
902 if (count <= 0) {
903 err = count;
904 } else {
905 size_t num_written;
906
907 num_written = fuse_send_write_pages(req, file, inode,
908 pos, count);
909 err = req->out.h.error;
910 if (!err) {
911 res += num_written;
912 pos += num_written;
913
914 /* break out of the loop on short write */
915 if (num_written != count)
916 err = -EIO;
917 }
918 }
919 fuse_put_request(fc, req);
920 } while (!err && iov_iter_count(ii));
921
922 if (res > 0)
923 fuse_write_update_size(inode, pos);
924
925 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
926 fuse_invalidate_attr(inode);
927
928 return res > 0 ? res : err;
929 }
930
fuse_file_aio_write(struct kiocb * iocb,const struct iovec * iov,unsigned long nr_segs,loff_t pos)931 static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
932 unsigned long nr_segs, loff_t pos)
933 {
934 struct file *file = iocb->ki_filp;
935 struct address_space *mapping = file->f_mapping;
936 size_t count = 0;
937 size_t ocount = 0;
938 ssize_t written = 0;
939 ssize_t written_buffered = 0;
940 struct inode *inode = mapping->host;
941 ssize_t err;
942 struct iov_iter i;
943 loff_t endbyte = 0;
944
945 WARN_ON(iocb->ki_pos != pos);
946
947 ocount = 0;
948 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
949 if (err)
950 return err;
951
952 count = ocount;
953
954 mutex_lock(&inode->i_mutex);
955 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
956
957 /* We can write back this queue in page reclaim */
958 current->backing_dev_info = mapping->backing_dev_info;
959
960 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
961 if (err)
962 goto out;
963
964 if (count == 0)
965 goto out;
966
967 err = file_remove_suid(file);
968 if (err)
969 goto out;
970
971 file_update_time(file);
972
973 if (file->f_flags & O_DIRECT) {
974 written = generic_file_direct_write(iocb, iov, &nr_segs,
975 pos, &iocb->ki_pos,
976 count, ocount);
977 if (written < 0 || written == count)
978 goto out;
979
980 pos += written;
981 count -= written;
982
983 iov_iter_init(&i, iov, nr_segs, count, written);
984 written_buffered = fuse_perform_write(file, mapping, &i, pos);
985 if (written_buffered < 0) {
986 err = written_buffered;
987 goto out;
988 }
989 endbyte = pos + written_buffered - 1;
990
991 err = filemap_write_and_wait_range(file->f_mapping, pos,
992 endbyte);
993 if (err)
994 goto out;
995
996 invalidate_mapping_pages(file->f_mapping,
997 pos >> PAGE_CACHE_SHIFT,
998 endbyte >> PAGE_CACHE_SHIFT);
999
1000 written += written_buffered;
1001 iocb->ki_pos = pos + written_buffered;
1002 } else {
1003 iov_iter_init(&i, iov, nr_segs, count, 0);
1004 written = fuse_perform_write(file, mapping, &i, pos);
1005 if (written >= 0)
1006 iocb->ki_pos = pos + written;
1007 }
1008 out:
1009 current->backing_dev_info = NULL;
1010 mutex_unlock(&inode->i_mutex);
1011
1012 return written ? written : err;
1013 }
1014
fuse_release_user_pages(struct fuse_req * req,int write)1015 static void fuse_release_user_pages(struct fuse_req *req, int write)
1016 {
1017 unsigned i;
1018
1019 for (i = 0; i < req->num_pages; i++) {
1020 struct page *page = req->pages[i];
1021 if (write)
1022 set_page_dirty_lock(page);
1023 put_page(page);
1024 }
1025 }
1026
fuse_get_user_pages(struct fuse_req * req,const char __user * buf,size_t * nbytesp,int write)1027 static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
1028 size_t *nbytesp, int write)
1029 {
1030 size_t nbytes = *nbytesp;
1031 unsigned long user_addr = (unsigned long) buf;
1032 unsigned offset = user_addr & ~PAGE_MASK;
1033 int npages;
1034
1035 /* Special case for kernel I/O: can copy directly into the buffer */
1036 if (segment_eq(get_fs(), KERNEL_DS)) {
1037 if (write)
1038 req->in.args[1].value = (void *) user_addr;
1039 else
1040 req->out.args[0].value = (void *) user_addr;
1041
1042 return 0;
1043 }
1044
1045 nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
1046 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1047 npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
1048 npages = get_user_pages_fast(user_addr, npages, !write, req->pages);
1049 if (npages < 0)
1050 return npages;
1051
1052 req->num_pages = npages;
1053 req->page_offset = offset;
1054
1055 if (write)
1056 req->in.argpages = 1;
1057 else
1058 req->out.argpages = 1;
1059
1060 nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
1061 *nbytesp = min(*nbytesp, nbytes);
1062
1063 return 0;
1064 }
1065
fuse_direct_io(struct file * file,const char __user * buf,size_t count,loff_t * ppos,int write)1066 ssize_t fuse_direct_io(struct file *file, const char __user *buf,
1067 size_t count, loff_t *ppos, int write)
1068 {
1069 struct fuse_file *ff = file->private_data;
1070 struct fuse_conn *fc = ff->fc;
1071 size_t nmax = write ? fc->max_write : fc->max_read;
1072 loff_t pos = *ppos;
1073 ssize_t res = 0;
1074 struct fuse_req *req;
1075
1076 req = fuse_get_req(fc);
1077 if (IS_ERR(req))
1078 return PTR_ERR(req);
1079
1080 while (count) {
1081 size_t nres;
1082 fl_owner_t owner = current->files;
1083 size_t nbytes = min(count, nmax);
1084 int err = fuse_get_user_pages(req, buf, &nbytes, write);
1085 if (err) {
1086 res = err;
1087 break;
1088 }
1089
1090 if (write)
1091 nres = fuse_send_write(req, file, pos, nbytes, owner);
1092 else
1093 nres = fuse_send_read(req, file, pos, nbytes, owner);
1094
1095 fuse_release_user_pages(req, !write);
1096 if (req->out.h.error) {
1097 if (!res)
1098 res = req->out.h.error;
1099 break;
1100 } else if (nres > nbytes) {
1101 res = -EIO;
1102 break;
1103 }
1104 count -= nres;
1105 res += nres;
1106 pos += nres;
1107 buf += nres;
1108 if (nres != nbytes)
1109 break;
1110 if (count) {
1111 fuse_put_request(fc, req);
1112 req = fuse_get_req(fc);
1113 if (IS_ERR(req))
1114 break;
1115 }
1116 }
1117 if (!IS_ERR(req))
1118 fuse_put_request(fc, req);
1119 if (res > 0)
1120 *ppos = pos;
1121
1122 return res;
1123 }
1124 EXPORT_SYMBOL_GPL(fuse_direct_io);
1125
fuse_direct_read(struct file * file,char __user * buf,size_t count,loff_t * ppos)1126 static ssize_t fuse_direct_read(struct file *file, char __user *buf,
1127 size_t count, loff_t *ppos)
1128 {
1129 ssize_t res;
1130 struct inode *inode = file->f_path.dentry->d_inode;
1131
1132 if (is_bad_inode(inode))
1133 return -EIO;
1134
1135 res = fuse_direct_io(file, buf, count, ppos, 0);
1136
1137 fuse_invalidate_attr(inode);
1138
1139 return res;
1140 }
1141
__fuse_direct_write(struct file * file,const char __user * buf,size_t count,loff_t * ppos)1142 static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
1143 size_t count, loff_t *ppos)
1144 {
1145 struct inode *inode = file->f_path.dentry->d_inode;
1146 ssize_t res;
1147
1148 res = generic_write_checks(file, ppos, &count, 0);
1149 if (!res) {
1150 res = fuse_direct_io(file, buf, count, ppos, 1);
1151 if (res > 0)
1152 fuse_write_update_size(inode, *ppos);
1153 }
1154
1155 fuse_invalidate_attr(inode);
1156
1157 return res;
1158 }
1159
fuse_direct_write(struct file * file,const char __user * buf,size_t count,loff_t * ppos)1160 static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
1161 size_t count, loff_t *ppos)
1162 {
1163 struct inode *inode = file->f_path.dentry->d_inode;
1164 ssize_t res;
1165
1166 if (is_bad_inode(inode))
1167 return -EIO;
1168
1169 /* Don't allow parallel writes to the same file */
1170 mutex_lock(&inode->i_mutex);
1171 res = __fuse_direct_write(file, buf, count, ppos);
1172 mutex_unlock(&inode->i_mutex);
1173
1174 return res;
1175 }
1176
fuse_writepage_free(struct fuse_conn * fc,struct fuse_req * req)1177 static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
1178 {
1179 __free_page(req->pages[0]);
1180 fuse_file_put(req->ff, false);
1181 }
1182
fuse_writepage_finish(struct fuse_conn * fc,struct fuse_req * req)1183 static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
1184 {
1185 struct inode *inode = req->inode;
1186 struct fuse_inode *fi = get_fuse_inode(inode);
1187 struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
1188
1189 list_del(&req->writepages_entry);
1190 dec_bdi_stat(bdi, BDI_WRITEBACK);
1191 dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
1192 bdi_writeout_inc(bdi);
1193 wake_up(&fi->page_waitq);
1194 }
1195
1196 /* Called under fc->lock, may release and reacquire it */
fuse_send_writepage(struct fuse_conn * fc,struct fuse_req * req)1197 static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
1198 __releases(fc->lock)
1199 __acquires(fc->lock)
1200 {
1201 struct fuse_inode *fi = get_fuse_inode(req->inode);
1202 loff_t size = i_size_read(req->inode);
1203 struct fuse_write_in *inarg = &req->misc.write.in;
1204
1205 if (!fc->connected)
1206 goto out_free;
1207
1208 if (inarg->offset + PAGE_CACHE_SIZE <= size) {
1209 inarg->size = PAGE_CACHE_SIZE;
1210 } else if (inarg->offset < size) {
1211 inarg->size = size & (PAGE_CACHE_SIZE - 1);
1212 } else {
1213 /* Got truncated off completely */
1214 goto out_free;
1215 }
1216
1217 req->in.args[1].size = inarg->size;
1218 fi->writectr++;
1219 fuse_request_send_background_locked(fc, req);
1220 return;
1221
1222 out_free:
1223 fuse_writepage_finish(fc, req);
1224 spin_unlock(&fc->lock);
1225 fuse_writepage_free(fc, req);
1226 fuse_put_request(fc, req);
1227 spin_lock(&fc->lock);
1228 }
1229
1230 /*
1231 * If fi->writectr is positive (no truncate or fsync going on) send
1232 * all queued writepage requests.
1233 *
1234 * Called with fc->lock
1235 */
fuse_flush_writepages(struct inode * inode)1236 void fuse_flush_writepages(struct inode *inode)
1237 __releases(fc->lock)
1238 __acquires(fc->lock)
1239 {
1240 struct fuse_conn *fc = get_fuse_conn(inode);
1241 struct fuse_inode *fi = get_fuse_inode(inode);
1242 struct fuse_req *req;
1243
1244 while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
1245 req = list_entry(fi->queued_writes.next, struct fuse_req, list);
1246 list_del_init(&req->list);
1247 fuse_send_writepage(fc, req);
1248 }
1249 }
1250
fuse_writepage_end(struct fuse_conn * fc,struct fuse_req * req)1251 static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
1252 {
1253 struct inode *inode = req->inode;
1254 struct fuse_inode *fi = get_fuse_inode(inode);
1255
1256 mapping_set_error(inode->i_mapping, req->out.h.error);
1257 spin_lock(&fc->lock);
1258 fi->writectr--;
1259 fuse_writepage_finish(fc, req);
1260 spin_unlock(&fc->lock);
1261 fuse_writepage_free(fc, req);
1262 }
1263
fuse_writepage_locked(struct page * page)1264 static int fuse_writepage_locked(struct page *page)
1265 {
1266 struct address_space *mapping = page->mapping;
1267 struct inode *inode = mapping->host;
1268 struct fuse_conn *fc = get_fuse_conn(inode);
1269 struct fuse_inode *fi = get_fuse_inode(inode);
1270 struct fuse_req *req;
1271 struct fuse_file *ff;
1272 struct page *tmp_page;
1273
1274 set_page_writeback(page);
1275
1276 req = fuse_request_alloc_nofs();
1277 if (!req)
1278 goto err;
1279
1280 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
1281 if (!tmp_page)
1282 goto err_free;
1283
1284 spin_lock(&fc->lock);
1285 BUG_ON(list_empty(&fi->write_files));
1286 ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
1287 req->ff = fuse_file_get(ff);
1288 spin_unlock(&fc->lock);
1289
1290 fuse_write_fill(req, ff, page_offset(page), 0);
1291
1292 copy_highpage(tmp_page, page);
1293 req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
1294 req->in.argpages = 1;
1295 req->num_pages = 1;
1296 req->pages[0] = tmp_page;
1297 req->page_offset = 0;
1298 req->end = fuse_writepage_end;
1299 req->inode = inode;
1300
1301 inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
1302 inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
1303
1304 spin_lock(&fc->lock);
1305 list_add(&req->writepages_entry, &fi->writepages);
1306 list_add_tail(&req->list, &fi->queued_writes);
1307 fuse_flush_writepages(inode);
1308 spin_unlock(&fc->lock);
1309
1310 end_page_writeback(page);
1311
1312 return 0;
1313
1314 err_free:
1315 fuse_request_free(req);
1316 err:
1317 end_page_writeback(page);
1318 return -ENOMEM;
1319 }
1320
fuse_writepage(struct page * page,struct writeback_control * wbc)1321 static int fuse_writepage(struct page *page, struct writeback_control *wbc)
1322 {
1323 int err;
1324
1325 err = fuse_writepage_locked(page);
1326 unlock_page(page);
1327
1328 return err;
1329 }
1330
fuse_launder_page(struct page * page)1331 static int fuse_launder_page(struct page *page)
1332 {
1333 int err = 0;
1334 if (clear_page_dirty_for_io(page)) {
1335 struct inode *inode = page->mapping->host;
1336 err = fuse_writepage_locked(page);
1337 if (!err)
1338 fuse_wait_on_page_writeback(inode, page->index);
1339 }
1340 return err;
1341 }
1342
1343 /*
1344 * Write back dirty pages now, because there may not be any suitable
1345 * open files later
1346 */
fuse_vma_close(struct vm_area_struct * vma)1347 static void fuse_vma_close(struct vm_area_struct *vma)
1348 {
1349 filemap_write_and_wait(vma->vm_file->f_mapping);
1350 }
1351
1352 /*
1353 * Wait for writeback against this page to complete before allowing it
1354 * to be marked dirty again, and hence written back again, possibly
1355 * before the previous writepage completed.
1356 *
1357 * Block here, instead of in ->writepage(), so that the userspace fs
1358 * can only block processes actually operating on the filesystem.
1359 *
1360 * Otherwise unprivileged userspace fs would be able to block
1361 * unrelated:
1362 *
1363 * - page migration
1364 * - sync(2)
1365 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
1366 */
fuse_page_mkwrite(struct vm_area_struct * vma,struct vm_fault * vmf)1367 static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1368 {
1369 struct page *page = vmf->page;
1370 /*
1371 * Don't use page->mapping as it may become NULL from a
1372 * concurrent truncate.
1373 */
1374 struct inode *inode = vma->vm_file->f_mapping->host;
1375
1376 fuse_wait_on_page_writeback(inode, page->index);
1377 return 0;
1378 }
1379
1380 static const struct vm_operations_struct fuse_file_vm_ops = {
1381 .close = fuse_vma_close,
1382 .fault = filemap_fault,
1383 .page_mkwrite = fuse_page_mkwrite,
1384 };
1385
fuse_file_mmap(struct file * file,struct vm_area_struct * vma)1386 static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
1387 {
1388 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
1389 struct inode *inode = file->f_dentry->d_inode;
1390 struct fuse_conn *fc = get_fuse_conn(inode);
1391 struct fuse_inode *fi = get_fuse_inode(inode);
1392 struct fuse_file *ff = file->private_data;
1393 /*
1394 * file may be written through mmap, so chain it onto the
1395 * inodes's write_file list
1396 */
1397 spin_lock(&fc->lock);
1398 if (list_empty(&ff->write_entry))
1399 list_add(&ff->write_entry, &fi->write_files);
1400 spin_unlock(&fc->lock);
1401 }
1402 file_accessed(file);
1403 vma->vm_ops = &fuse_file_vm_ops;
1404 return 0;
1405 }
1406
fuse_direct_mmap(struct file * file,struct vm_area_struct * vma)1407 static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
1408 {
1409 /* Can't provide the coherency needed for MAP_SHARED */
1410 if (vma->vm_flags & VM_MAYSHARE)
1411 return -ENODEV;
1412
1413 invalidate_inode_pages2(file->f_mapping);
1414
1415 return generic_file_mmap(file, vma);
1416 }
1417
convert_fuse_file_lock(const struct fuse_file_lock * ffl,struct file_lock * fl)1418 static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
1419 struct file_lock *fl)
1420 {
1421 switch (ffl->type) {
1422 case F_UNLCK:
1423 break;
1424
1425 case F_RDLCK:
1426 case F_WRLCK:
1427 if (ffl->start > OFFSET_MAX || ffl->end > OFFSET_MAX ||
1428 ffl->end < ffl->start)
1429 return -EIO;
1430
1431 fl->fl_start = ffl->start;
1432 fl->fl_end = ffl->end;
1433 fl->fl_pid = ffl->pid;
1434 break;
1435
1436 default:
1437 return -EIO;
1438 }
1439 fl->fl_type = ffl->type;
1440 return 0;
1441 }
1442
fuse_lk_fill(struct fuse_req * req,struct file * file,const struct file_lock * fl,int opcode,pid_t pid,int flock)1443 static void fuse_lk_fill(struct fuse_req *req, struct file *file,
1444 const struct file_lock *fl, int opcode, pid_t pid,
1445 int flock)
1446 {
1447 struct inode *inode = file->f_path.dentry->d_inode;
1448 struct fuse_conn *fc = get_fuse_conn(inode);
1449 struct fuse_file *ff = file->private_data;
1450 struct fuse_lk_in *arg = &req->misc.lk_in;
1451
1452 arg->fh = ff->fh;
1453 arg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
1454 arg->lk.start = fl->fl_start;
1455 arg->lk.end = fl->fl_end;
1456 arg->lk.type = fl->fl_type;
1457 arg->lk.pid = pid;
1458 if (flock)
1459 arg->lk_flags |= FUSE_LK_FLOCK;
1460 req->in.h.opcode = opcode;
1461 req->in.h.nodeid = get_node_id(inode);
1462 req->in.numargs = 1;
1463 req->in.args[0].size = sizeof(*arg);
1464 req->in.args[0].value = arg;
1465 }
1466
fuse_getlk(struct file * file,struct file_lock * fl)1467 static int fuse_getlk(struct file *file, struct file_lock *fl)
1468 {
1469 struct inode *inode = file->f_path.dentry->d_inode;
1470 struct fuse_conn *fc = get_fuse_conn(inode);
1471 struct fuse_req *req;
1472 struct fuse_lk_out outarg;
1473 int err;
1474
1475 req = fuse_get_req(fc);
1476 if (IS_ERR(req))
1477 return PTR_ERR(req);
1478
1479 fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0);
1480 req->out.numargs = 1;
1481 req->out.args[0].size = sizeof(outarg);
1482 req->out.args[0].value = &outarg;
1483 fuse_request_send(fc, req);
1484 err = req->out.h.error;
1485 fuse_put_request(fc, req);
1486 if (!err)
1487 err = convert_fuse_file_lock(&outarg.lk, fl);
1488
1489 return err;
1490 }
1491
fuse_setlk(struct file * file,struct file_lock * fl,int flock)1492 static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
1493 {
1494 struct inode *inode = file->f_path.dentry->d_inode;
1495 struct fuse_conn *fc = get_fuse_conn(inode);
1496 struct fuse_req *req;
1497 int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
1498 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
1499 int err;
1500
1501 if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
1502 /* NLM needs asynchronous locks, which we don't support yet */
1503 return -ENOLCK;
1504 }
1505
1506 /* Unlock on close is handled by the flush method */
1507 if (fl->fl_flags & FL_CLOSE)
1508 return 0;
1509
1510 req = fuse_get_req(fc);
1511 if (IS_ERR(req))
1512 return PTR_ERR(req);
1513
1514 fuse_lk_fill(req, file, fl, opcode, pid, flock);
1515 fuse_request_send(fc, req);
1516 err = req->out.h.error;
1517 /* locking is restartable */
1518 if (err == -EINTR)
1519 err = -ERESTARTSYS;
1520 fuse_put_request(fc, req);
1521 return err;
1522 }
1523
fuse_file_lock(struct file * file,int cmd,struct file_lock * fl)1524 static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
1525 {
1526 struct inode *inode = file->f_path.dentry->d_inode;
1527 struct fuse_conn *fc = get_fuse_conn(inode);
1528 int err;
1529
1530 if (cmd == F_CANCELLK) {
1531 err = 0;
1532 } else if (cmd == F_GETLK) {
1533 if (fc->no_lock) {
1534 posix_test_lock(file, fl);
1535 err = 0;
1536 } else
1537 err = fuse_getlk(file, fl);
1538 } else {
1539 if (fc->no_lock)
1540 err = posix_lock_file(file, fl, NULL);
1541 else
1542 err = fuse_setlk(file, fl, 0);
1543 }
1544 return err;
1545 }
1546
fuse_file_flock(struct file * file,int cmd,struct file_lock * fl)1547 static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl)
1548 {
1549 struct inode *inode = file->f_path.dentry->d_inode;
1550 struct fuse_conn *fc = get_fuse_conn(inode);
1551 int err;
1552
1553 if (fc->no_flock) {
1554 err = flock_lock_file_wait(file, fl);
1555 } else {
1556 struct fuse_file *ff = file->private_data;
1557
1558 /* emulate flock with POSIX locks */
1559 fl->fl_owner = (fl_owner_t) file;
1560 ff->flock = true;
1561 err = fuse_setlk(file, fl, 1);
1562 }
1563
1564 return err;
1565 }
1566
fuse_bmap(struct address_space * mapping,sector_t block)1567 static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
1568 {
1569 struct inode *inode = mapping->host;
1570 struct fuse_conn *fc = get_fuse_conn(inode);
1571 struct fuse_req *req;
1572 struct fuse_bmap_in inarg;
1573 struct fuse_bmap_out outarg;
1574 int err;
1575
1576 if (!inode->i_sb->s_bdev || fc->no_bmap)
1577 return 0;
1578
1579 req = fuse_get_req(fc);
1580 if (IS_ERR(req))
1581 return 0;
1582
1583 memset(&inarg, 0, sizeof(inarg));
1584 inarg.block = block;
1585 inarg.blocksize = inode->i_sb->s_blocksize;
1586 req->in.h.opcode = FUSE_BMAP;
1587 req->in.h.nodeid = get_node_id(inode);
1588 req->in.numargs = 1;
1589 req->in.args[0].size = sizeof(inarg);
1590 req->in.args[0].value = &inarg;
1591 req->out.numargs = 1;
1592 req->out.args[0].size = sizeof(outarg);
1593 req->out.args[0].value = &outarg;
1594 fuse_request_send(fc, req);
1595 err = req->out.h.error;
1596 fuse_put_request(fc, req);
1597 if (err == -ENOSYS)
1598 fc->no_bmap = 1;
1599
1600 return err ? 0 : outarg.block;
1601 }
1602
fuse_file_llseek(struct file * file,loff_t offset,int origin)1603 static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1604 {
1605 loff_t retval;
1606 struct inode *inode = file->f_path.dentry->d_inode;
1607
1608 /* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */
1609 if (origin == SEEK_CUR || origin == SEEK_SET)
1610 return generic_file_llseek(file, offset, origin);
1611
1612 mutex_lock(&inode->i_mutex);
1613 retval = fuse_update_attributes(inode, NULL, file, NULL);
1614 if (!retval)
1615 retval = generic_file_llseek(file, offset, origin);
1616 mutex_unlock(&inode->i_mutex);
1617
1618 return retval;
1619 }
1620
fuse_ioctl_copy_user(struct page ** pages,struct iovec * iov,unsigned int nr_segs,size_t bytes,bool to_user)1621 static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
1622 unsigned int nr_segs, size_t bytes, bool to_user)
1623 {
1624 struct iov_iter ii;
1625 int page_idx = 0;
1626
1627 if (!bytes)
1628 return 0;
1629
1630 iov_iter_init(&ii, iov, nr_segs, bytes, 0);
1631
1632 while (iov_iter_count(&ii)) {
1633 struct page *page = pages[page_idx++];
1634 size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii));
1635 void *kaddr;
1636
1637 kaddr = kmap(page);
1638
1639 while (todo) {
1640 char __user *uaddr = ii.iov->iov_base + ii.iov_offset;
1641 size_t iov_len = ii.iov->iov_len - ii.iov_offset;
1642 size_t copy = min(todo, iov_len);
1643 size_t left;
1644
1645 if (!to_user)
1646 left = copy_from_user(kaddr, uaddr, copy);
1647 else
1648 left = copy_to_user(uaddr, kaddr, copy);
1649
1650 if (unlikely(left))
1651 return -EFAULT;
1652
1653 iov_iter_advance(&ii, copy);
1654 todo -= copy;
1655 kaddr += copy;
1656 }
1657
1658 kunmap(page);
1659 }
1660
1661 return 0;
1662 }
1663
1664 /*
1665 * CUSE servers compiled on 32bit broke on 64bit kernels because the
1666 * ABI was defined to be 'struct iovec' which is different on 32bit
1667 * and 64bit. Fortunately we can determine which structure the server
1668 * used from the size of the reply.
1669 */
fuse_copy_ioctl_iovec_old(struct iovec * dst,void * src,size_t transferred,unsigned count,bool is_compat)1670 static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
1671 size_t transferred, unsigned count,
1672 bool is_compat)
1673 {
1674 #ifdef CONFIG_COMPAT
1675 if (count * sizeof(struct compat_iovec) == transferred) {
1676 struct compat_iovec *ciov = src;
1677 unsigned i;
1678
1679 /*
1680 * With this interface a 32bit server cannot support
1681 * non-compat (i.e. ones coming from 64bit apps) ioctl
1682 * requests
1683 */
1684 if (!is_compat)
1685 return -EINVAL;
1686
1687 for (i = 0; i < count; i++) {
1688 dst[i].iov_base = compat_ptr(ciov[i].iov_base);
1689 dst[i].iov_len = ciov[i].iov_len;
1690 }
1691 return 0;
1692 }
1693 #endif
1694
1695 if (count * sizeof(struct iovec) != transferred)
1696 return -EIO;
1697
1698 memcpy(dst, src, transferred);
1699 return 0;
1700 }
1701
1702 /* Make sure iov_length() won't overflow */
fuse_verify_ioctl_iov(struct iovec * iov,size_t count)1703 static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
1704 {
1705 size_t n;
1706 u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
1707
1708 for (n = 0; n < count; n++, iov++) {
1709 if (iov->iov_len > (size_t) max)
1710 return -ENOMEM;
1711 max -= iov->iov_len;
1712 }
1713 return 0;
1714 }
1715
fuse_copy_ioctl_iovec(struct fuse_conn * fc,struct iovec * dst,void * src,size_t transferred,unsigned count,bool is_compat)1716 static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst,
1717 void *src, size_t transferred, unsigned count,
1718 bool is_compat)
1719 {
1720 unsigned i;
1721 struct fuse_ioctl_iovec *fiov = src;
1722
1723 if (fc->minor < 16) {
1724 return fuse_copy_ioctl_iovec_old(dst, src, transferred,
1725 count, is_compat);
1726 }
1727
1728 if (count * sizeof(struct fuse_ioctl_iovec) != transferred)
1729 return -EIO;
1730
1731 for (i = 0; i < count; i++) {
1732 /* Did the server supply an inappropriate value? */
1733 if (fiov[i].base != (unsigned long) fiov[i].base ||
1734 fiov[i].len != (unsigned long) fiov[i].len)
1735 return -EIO;
1736
1737 dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base;
1738 dst[i].iov_len = (size_t) fiov[i].len;
1739
1740 #ifdef CONFIG_COMPAT
1741 if (is_compat &&
1742 (ptr_to_compat(dst[i].iov_base) != fiov[i].base ||
1743 (compat_size_t) dst[i].iov_len != fiov[i].len))
1744 return -EIO;
1745 #endif
1746 }
1747
1748 return 0;
1749 }
1750
1751
1752 /*
1753 * For ioctls, there is no generic way to determine how much memory
1754 * needs to be read and/or written. Furthermore, ioctls are allowed
1755 * to dereference the passed pointer, so the parameter requires deep
1756 * copying but FUSE has no idea whatsoever about what to copy in or
1757 * out.
1758 *
1759 * This is solved by allowing FUSE server to retry ioctl with
1760 * necessary in/out iovecs. Let's assume the ioctl implementation
1761 * needs to read in the following structure.
1762 *
1763 * struct a {
1764 * char *buf;
1765 * size_t buflen;
1766 * }
1767 *
1768 * On the first callout to FUSE server, inarg->in_size and
1769 * inarg->out_size will be NULL; then, the server completes the ioctl
1770 * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and
1771 * the actual iov array to
1772 *
1773 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } }
1774 *
1775 * which tells FUSE to copy in the requested area and retry the ioctl.
1776 * On the second round, the server has access to the structure and
1777 * from that it can tell what to look for next, so on the invocation,
1778 * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to
1779 *
1780 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) },
1781 * { .iov_base = a.buf, .iov_len = a.buflen } }
1782 *
1783 * FUSE will copy both struct a and the pointed buffer from the
1784 * process doing the ioctl and retry ioctl with both struct a and the
1785 * buffer.
1786 *
1787 * This time, FUSE server has everything it needs and completes ioctl
1788 * without FUSE_IOCTL_RETRY which finishes the ioctl call.
1789 *
1790 * Copying data out works the same way.
1791 *
1792 * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel
1793 * automatically initializes in and out iovs by decoding @cmd with
1794 * _IOC_* macros and the server is not allowed to request RETRY. This
1795 * limits ioctl data transfers to well-formed ioctls and is the forced
1796 * behavior for all FUSE servers.
1797 */
fuse_do_ioctl(struct file * file,unsigned int cmd,unsigned long arg,unsigned int flags)1798 long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1799 unsigned int flags)
1800 {
1801 struct fuse_file *ff = file->private_data;
1802 struct fuse_conn *fc = ff->fc;
1803 struct fuse_ioctl_in inarg = {
1804 .fh = ff->fh,
1805 .cmd = cmd,
1806 .arg = arg,
1807 .flags = flags
1808 };
1809 struct fuse_ioctl_out outarg;
1810 struct fuse_req *req = NULL;
1811 struct page **pages = NULL;
1812 struct iovec *iov_page = NULL;
1813 struct iovec *in_iov = NULL, *out_iov = NULL;
1814 unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
1815 size_t in_size, out_size, transferred;
1816 int err;
1817
1818 #if BITS_PER_LONG == 32
1819 inarg.flags |= FUSE_IOCTL_32BIT;
1820 #else
1821 if (flags & FUSE_IOCTL_COMPAT)
1822 inarg.flags |= FUSE_IOCTL_32BIT;
1823 #endif
1824
1825 /* assume all the iovs returned by client always fits in a page */
1826 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
1827
1828 err = -ENOMEM;
1829 pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL);
1830 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
1831 if (!pages || !iov_page)
1832 goto out;
1833
1834 /*
1835 * If restricted, initialize IO parameters as encoded in @cmd.
1836 * RETRY from server is not allowed.
1837 */
1838 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) {
1839 struct iovec *iov = iov_page;
1840
1841 iov->iov_base = (void __user *)arg;
1842 iov->iov_len = _IOC_SIZE(cmd);
1843
1844 if (_IOC_DIR(cmd) & _IOC_WRITE) {
1845 in_iov = iov;
1846 in_iovs = 1;
1847 }
1848
1849 if (_IOC_DIR(cmd) & _IOC_READ) {
1850 out_iov = iov;
1851 out_iovs = 1;
1852 }
1853 }
1854
1855 retry:
1856 inarg.in_size = in_size = iov_length(in_iov, in_iovs);
1857 inarg.out_size = out_size = iov_length(out_iov, out_iovs);
1858
1859 /*
1860 * Out data can be used either for actual out data or iovs,
1861 * make sure there always is at least one page.
1862 */
1863 out_size = max_t(size_t, out_size, PAGE_SIZE);
1864 max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE);
1865
1866 /* make sure there are enough buffer pages and init request with them */
1867 err = -ENOMEM;
1868 if (max_pages > FUSE_MAX_PAGES_PER_REQ)
1869 goto out;
1870 while (num_pages < max_pages) {
1871 pages[num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
1872 if (!pages[num_pages])
1873 goto out;
1874 num_pages++;
1875 }
1876
1877 req = fuse_get_req(fc);
1878 if (IS_ERR(req)) {
1879 err = PTR_ERR(req);
1880 req = NULL;
1881 goto out;
1882 }
1883 memcpy(req->pages, pages, sizeof(req->pages[0]) * num_pages);
1884 req->num_pages = num_pages;
1885
1886 /* okay, let's send it to the client */
1887 req->in.h.opcode = FUSE_IOCTL;
1888 req->in.h.nodeid = ff->nodeid;
1889 req->in.numargs = 1;
1890 req->in.args[0].size = sizeof(inarg);
1891 req->in.args[0].value = &inarg;
1892 if (in_size) {
1893 req->in.numargs++;
1894 req->in.args[1].size = in_size;
1895 req->in.argpages = 1;
1896
1897 err = fuse_ioctl_copy_user(pages, in_iov, in_iovs, in_size,
1898 false);
1899 if (err)
1900 goto out;
1901 }
1902
1903 req->out.numargs = 2;
1904 req->out.args[0].size = sizeof(outarg);
1905 req->out.args[0].value = &outarg;
1906 req->out.args[1].size = out_size;
1907 req->out.argpages = 1;
1908 req->out.argvar = 1;
1909
1910 fuse_request_send(fc, req);
1911 err = req->out.h.error;
1912 transferred = req->out.args[1].size;
1913 fuse_put_request(fc, req);
1914 req = NULL;
1915 if (err)
1916 goto out;
1917
1918 /* did it ask for retry? */
1919 if (outarg.flags & FUSE_IOCTL_RETRY) {
1920 void *vaddr;
1921
1922 /* no retry if in restricted mode */
1923 err = -EIO;
1924 if (!(flags & FUSE_IOCTL_UNRESTRICTED))
1925 goto out;
1926
1927 in_iovs = outarg.in_iovs;
1928 out_iovs = outarg.out_iovs;
1929
1930 /*
1931 * Make sure things are in boundary, separate checks
1932 * are to protect against overflow.
1933 */
1934 err = -ENOMEM;
1935 if (in_iovs > FUSE_IOCTL_MAX_IOV ||
1936 out_iovs > FUSE_IOCTL_MAX_IOV ||
1937 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
1938 goto out;
1939
1940 vaddr = kmap_atomic(pages[0]);
1941 err = fuse_copy_ioctl_iovec(fc, iov_page, vaddr,
1942 transferred, in_iovs + out_iovs,
1943 (flags & FUSE_IOCTL_COMPAT) != 0);
1944 kunmap_atomic(vaddr);
1945 if (err)
1946 goto out;
1947
1948 in_iov = iov_page;
1949 out_iov = in_iov + in_iovs;
1950
1951 err = fuse_verify_ioctl_iov(in_iov, in_iovs);
1952 if (err)
1953 goto out;
1954
1955 err = fuse_verify_ioctl_iov(out_iov, out_iovs);
1956 if (err)
1957 goto out;
1958
1959 goto retry;
1960 }
1961
1962 err = -EIO;
1963 if (transferred > inarg.out_size)
1964 goto out;
1965
1966 err = fuse_ioctl_copy_user(pages, out_iov, out_iovs, transferred, true);
1967 out:
1968 if (req)
1969 fuse_put_request(fc, req);
1970 free_page((unsigned long) iov_page);
1971 while (num_pages)
1972 __free_page(pages[--num_pages]);
1973 kfree(pages);
1974
1975 return err ? err : outarg.result;
1976 }
1977 EXPORT_SYMBOL_GPL(fuse_do_ioctl);
1978
fuse_ioctl_common(struct file * file,unsigned int cmd,unsigned long arg,unsigned int flags)1979 long fuse_ioctl_common(struct file *file, unsigned int cmd,
1980 unsigned long arg, unsigned int flags)
1981 {
1982 struct inode *inode = file->f_dentry->d_inode;
1983 struct fuse_conn *fc = get_fuse_conn(inode);
1984
1985 if (!fuse_allow_task(fc, current))
1986 return -EACCES;
1987
1988 if (is_bad_inode(inode))
1989 return -EIO;
1990
1991 return fuse_do_ioctl(file, cmd, arg, flags);
1992 }
1993
fuse_file_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1994 static long fuse_file_ioctl(struct file *file, unsigned int cmd,
1995 unsigned long arg)
1996 {
1997 return fuse_ioctl_common(file, cmd, arg, 0);
1998 }
1999
fuse_file_compat_ioctl(struct file * file,unsigned int cmd,unsigned long arg)2000 static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
2001 unsigned long arg)
2002 {
2003 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
2004 }
2005
2006 /*
2007 * All files which have been polled are linked to RB tree
2008 * fuse_conn->polled_files which is indexed by kh. Walk the tree and
2009 * find the matching one.
2010 */
fuse_find_polled_node(struct fuse_conn * fc,u64 kh,struct rb_node ** parent_out)2011 static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh,
2012 struct rb_node **parent_out)
2013 {
2014 struct rb_node **link = &fc->polled_files.rb_node;
2015 struct rb_node *last = NULL;
2016
2017 while (*link) {
2018 struct fuse_file *ff;
2019
2020 last = *link;
2021 ff = rb_entry(last, struct fuse_file, polled_node);
2022
2023 if (kh < ff->kh)
2024 link = &last->rb_left;
2025 else if (kh > ff->kh)
2026 link = &last->rb_right;
2027 else
2028 return link;
2029 }
2030
2031 if (parent_out)
2032 *parent_out = last;
2033 return link;
2034 }
2035
2036 /*
2037 * The file is about to be polled. Make sure it's on the polled_files
2038 * RB tree. Note that files once added to the polled_files tree are
2039 * not removed before the file is released. This is because a file
2040 * polled once is likely to be polled again.
2041 */
fuse_register_polled_file(struct fuse_conn * fc,struct fuse_file * ff)2042 static void fuse_register_polled_file(struct fuse_conn *fc,
2043 struct fuse_file *ff)
2044 {
2045 spin_lock(&fc->lock);
2046 if (RB_EMPTY_NODE(&ff->polled_node)) {
2047 struct rb_node **link, *parent;
2048
2049 link = fuse_find_polled_node(fc, ff->kh, &parent);
2050 BUG_ON(*link);
2051 rb_link_node(&ff->polled_node, parent, link);
2052 rb_insert_color(&ff->polled_node, &fc->polled_files);
2053 }
2054 spin_unlock(&fc->lock);
2055 }
2056
fuse_file_poll(struct file * file,poll_table * wait)2057 unsigned fuse_file_poll(struct file *file, poll_table *wait)
2058 {
2059 struct fuse_file *ff = file->private_data;
2060 struct fuse_conn *fc = ff->fc;
2061 struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
2062 struct fuse_poll_out outarg;
2063 struct fuse_req *req;
2064 int err;
2065
2066 if (fc->no_poll)
2067 return DEFAULT_POLLMASK;
2068
2069 poll_wait(file, &ff->poll_wait, wait);
2070
2071 /*
2072 * Ask for notification iff there's someone waiting for it.
2073 * The client may ignore the flag and always notify.
2074 */
2075 if (waitqueue_active(&ff->poll_wait)) {
2076 inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY;
2077 fuse_register_polled_file(fc, ff);
2078 }
2079
2080 req = fuse_get_req(fc);
2081 if (IS_ERR(req))
2082 return POLLERR;
2083
2084 req->in.h.opcode = FUSE_POLL;
2085 req->in.h.nodeid = ff->nodeid;
2086 req->in.numargs = 1;
2087 req->in.args[0].size = sizeof(inarg);
2088 req->in.args[0].value = &inarg;
2089 req->out.numargs = 1;
2090 req->out.args[0].size = sizeof(outarg);
2091 req->out.args[0].value = &outarg;
2092 fuse_request_send(fc, req);
2093 err = req->out.h.error;
2094 fuse_put_request(fc, req);
2095
2096 if (!err)
2097 return outarg.revents;
2098 if (err == -ENOSYS) {
2099 fc->no_poll = 1;
2100 return DEFAULT_POLLMASK;
2101 }
2102 return POLLERR;
2103 }
2104 EXPORT_SYMBOL_GPL(fuse_file_poll);
2105
2106 /*
2107 * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and
2108 * wakes up the poll waiters.
2109 */
fuse_notify_poll_wakeup(struct fuse_conn * fc,struct fuse_notify_poll_wakeup_out * outarg)2110 int fuse_notify_poll_wakeup(struct fuse_conn *fc,
2111 struct fuse_notify_poll_wakeup_out *outarg)
2112 {
2113 u64 kh = outarg->kh;
2114 struct rb_node **link;
2115
2116 spin_lock(&fc->lock);
2117
2118 link = fuse_find_polled_node(fc, kh, NULL);
2119 if (*link) {
2120 struct fuse_file *ff;
2121
2122 ff = rb_entry(*link, struct fuse_file, polled_node);
2123 wake_up_interruptible_sync(&ff->poll_wait);
2124 }
2125
2126 spin_unlock(&fc->lock);
2127 return 0;
2128 }
2129
fuse_loop_dio(struct file * filp,const struct iovec * iov,unsigned long nr_segs,loff_t * ppos,int rw)2130 static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov,
2131 unsigned long nr_segs, loff_t *ppos, int rw)
2132 {
2133 const struct iovec *vector = iov;
2134 ssize_t ret = 0;
2135
2136 while (nr_segs > 0) {
2137 void __user *base;
2138 size_t len;
2139 ssize_t nr;
2140
2141 base = vector->iov_base;
2142 len = vector->iov_len;
2143 vector++;
2144 nr_segs--;
2145
2146 if (rw == WRITE)
2147 nr = __fuse_direct_write(filp, base, len, ppos);
2148 else
2149 nr = fuse_direct_read(filp, base, len, ppos);
2150
2151 if (nr < 0) {
2152 if (!ret)
2153 ret = nr;
2154 break;
2155 }
2156 ret += nr;
2157 if (nr != len)
2158 break;
2159 }
2160
2161 return ret;
2162 }
2163
2164
2165 static ssize_t
fuse_direct_IO(int rw,struct kiocb * iocb,const struct iovec * iov,loff_t offset,unsigned long nr_segs)2166 fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2167 loff_t offset, unsigned long nr_segs)
2168 {
2169 ssize_t ret = 0;
2170 struct file *file = NULL;
2171 loff_t pos = 0;
2172
2173 file = iocb->ki_filp;
2174 pos = offset;
2175
2176 ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw);
2177
2178 return ret;
2179 }
2180
2181 static const struct file_operations fuse_file_operations = {
2182 .llseek = fuse_file_llseek,
2183 .read = do_sync_read,
2184 .aio_read = fuse_file_aio_read,
2185 .write = do_sync_write,
2186 .aio_write = fuse_file_aio_write,
2187 .mmap = fuse_file_mmap,
2188 .open = fuse_open,
2189 .flush = fuse_flush,
2190 .release = fuse_release,
2191 .fsync = fuse_fsync,
2192 .lock = fuse_file_lock,
2193 .flock = fuse_file_flock,
2194 .splice_read = generic_file_splice_read,
2195 .unlocked_ioctl = fuse_file_ioctl,
2196 .compat_ioctl = fuse_file_compat_ioctl,
2197 .poll = fuse_file_poll,
2198 };
2199
2200 static const struct file_operations fuse_direct_io_file_operations = {
2201 .llseek = fuse_file_llseek,
2202 .read = fuse_direct_read,
2203 .write = fuse_direct_write,
2204 .mmap = fuse_direct_mmap,
2205 .open = fuse_open,
2206 .flush = fuse_flush,
2207 .release = fuse_release,
2208 .fsync = fuse_fsync,
2209 .lock = fuse_file_lock,
2210 .flock = fuse_file_flock,
2211 .unlocked_ioctl = fuse_file_ioctl,
2212 .compat_ioctl = fuse_file_compat_ioctl,
2213 .poll = fuse_file_poll,
2214 /* no splice_read */
2215 };
2216
2217 static const struct address_space_operations fuse_file_aops = {
2218 .readpage = fuse_readpage,
2219 .writepage = fuse_writepage,
2220 .launder_page = fuse_launder_page,
2221 .readpages = fuse_readpages,
2222 .set_page_dirty = __set_page_dirty_nobuffers,
2223 .bmap = fuse_bmap,
2224 .direct_IO = fuse_direct_IO,
2225 };
2226
fuse_init_file_inode(struct inode * inode)2227 void fuse_init_file_inode(struct inode *inode)
2228 {
2229 inode->i_fop = &fuse_file_operations;
2230 inode->i_data.a_ops = &fuse_file_aops;
2231 }
2232