1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 #include "fuse_i.h"
10 
11 #include <linux/pagemap.h>
12 #include <linux/file.h>
13 #include <linux/fs_context.h>
14 #include <linux/sched.h>
15 #include <linux/namei.h>
16 #include <linux/slab.h>
17 #include <linux/xattr.h>
18 #include <linux/iversion.h>
19 #include <linux/posix_acl.h>
20 #include <linux/security.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 
fuse_advise_use_readdirplus(struct inode * dir)24 static void fuse_advise_use_readdirplus(struct inode *dir)
25 {
26 	struct fuse_inode *fi = get_fuse_inode(dir);
27 
28 	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
29 }
30 
31 #if BITS_PER_LONG >= 64
__fuse_dentry_settime(struct dentry * entry,u64 time)32 static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
33 {
34 	entry->d_fsdata = (void *) time;
35 }
36 
fuse_dentry_time(const struct dentry * entry)37 static inline u64 fuse_dentry_time(const struct dentry *entry)
38 {
39 	return (u64)entry->d_fsdata;
40 }
41 
42 #else
43 union fuse_dentry {
44 	u64 time;
45 	struct rcu_head rcu;
46 };
47 
__fuse_dentry_settime(struct dentry * dentry,u64 time)48 static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
49 {
50 	((union fuse_dentry *) dentry->d_fsdata)->time = time;
51 }
52 
fuse_dentry_time(const struct dentry * entry)53 static inline u64 fuse_dentry_time(const struct dentry *entry)
54 {
55 	return ((union fuse_dentry *) entry->d_fsdata)->time;
56 }
57 #endif
58 
fuse_dentry_settime(struct dentry * dentry,u64 time)59 static void fuse_dentry_settime(struct dentry *dentry, u64 time)
60 {
61 	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
62 	bool delete = !time && fc->delete_stale;
63 	/*
64 	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
65 	 * Don't care about races, either way it's just an optimization
66 	 */
67 	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
68 	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
69 		spin_lock(&dentry->d_lock);
70 		if (!delete)
71 			dentry->d_flags &= ~DCACHE_OP_DELETE;
72 		else
73 			dentry->d_flags |= DCACHE_OP_DELETE;
74 		spin_unlock(&dentry->d_lock);
75 	}
76 
77 	__fuse_dentry_settime(dentry, time);
78 }
79 
80 /*
81  * FUSE caches dentries and attributes with separate timeout.  The
82  * time in jiffies until the dentry/attributes are valid is stored in
83  * dentry->d_fsdata and fuse_inode->i_time respectively.
84  */
85 
86 /*
87  * Calculate the time in jiffies until a dentry/attributes are valid
88  */
time_to_jiffies(u64 sec,u32 nsec)89 static u64 time_to_jiffies(u64 sec, u32 nsec)
90 {
91 	if (sec || nsec) {
92 		struct timespec64 ts = {
93 			sec,
94 			min_t(u32, nsec, NSEC_PER_SEC - 1)
95 		};
96 
97 		return get_jiffies_64() + timespec64_to_jiffies(&ts);
98 	} else
99 		return 0;
100 }
101 
102 /*
103  * Set dentry and possibly attribute timeouts from the lookup/mk*
104  * replies
105  */
fuse_change_entry_timeout(struct dentry * entry,struct fuse_entry_out * o)106 void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
107 {
108 	fuse_dentry_settime(entry,
109 		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
110 }
111 
attr_timeout(struct fuse_attr_out * o)112 static u64 attr_timeout(struct fuse_attr_out *o)
113 {
114 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
115 }
116 
entry_attr_timeout(struct fuse_entry_out * o)117 u64 entry_attr_timeout(struct fuse_entry_out *o)
118 {
119 	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
120 }
121 
fuse_invalidate_attr_mask(struct inode * inode,u32 mask)122 void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
123 {
124 	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
125 }
126 
127 /*
128  * Mark the attributes as stale, so that at the next call to
129  * ->getattr() they will be fetched from userspace
130  */
fuse_invalidate_attr(struct inode * inode)131 void fuse_invalidate_attr(struct inode *inode)
132 {
133 	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
134 }
135 
fuse_dir_changed(struct inode * dir)136 static void fuse_dir_changed(struct inode *dir)
137 {
138 	fuse_invalidate_attr(dir);
139 	inode_maybe_inc_iversion(dir, false);
140 }
141 
142 /**
143  * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
144  * atime is not used.
145  */
fuse_invalidate_atime(struct inode * inode)146 void fuse_invalidate_atime(struct inode *inode)
147 {
148 	if (!IS_RDONLY(inode))
149 		fuse_invalidate_attr_mask(inode, STATX_ATIME);
150 }
151 
152 /*
153  * Just mark the entry as stale, so that a next attempt to look it up
154  * will result in a new lookup call to userspace
155  *
156  * This is called when a dentry is about to become negative and the
157  * timeout is unknown (unlink, rmdir, rename and in some cases
158  * lookup)
159  */
fuse_invalidate_entry_cache(struct dentry * entry)160 void fuse_invalidate_entry_cache(struct dentry *entry)
161 {
162 	fuse_dentry_settime(entry, 0);
163 }
164 
165 /*
166  * Same as fuse_invalidate_entry_cache(), but also try to remove the
167  * dentry from the hash
168  */
fuse_invalidate_entry(struct dentry * entry)169 static void fuse_invalidate_entry(struct dentry *entry)
170 {
171 	d_invalidate(entry);
172 	fuse_invalidate_entry_cache(entry);
173 }
174 
fuse_lookup_init(struct fuse_conn * fc,struct fuse_args * args,u64 nodeid,const struct qstr * name,struct fuse_entry_out * outarg)175 static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
176 			     u64 nodeid, const struct qstr *name,
177 			     struct fuse_entry_out *outarg)
178 {
179 	memset(outarg, 0, sizeof(struct fuse_entry_out));
180 	args->opcode = FUSE_LOOKUP;
181 	args->nodeid = nodeid;
182 	args->in_numargs = 1;
183 	args->in_args[0].size = name->len + 1;
184 	args->in_args[0].value = name->name;
185 	args->out_numargs = 1;
186 	args->out_args[0].size = sizeof(struct fuse_entry_out);
187 	args->out_args[0].value = outarg;
188 }
189 
190 /*
191  * Check whether the dentry is still valid
192  *
193  * If the entry validity timeout has expired and the dentry is
194  * positive, try to redo the lookup.  If the lookup results in a
195  * different inode, then let the VFS invalidate the dentry and redo
196  * the lookup once more.  If the lookup results in the same inode,
197  * then refresh the attributes, timeouts and mark the dentry valid.
198  */
fuse_dentry_revalidate(struct dentry * entry,unsigned int flags)199 static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
200 {
201 	struct inode *inode;
202 	struct dentry *parent;
203 	struct fuse_mount *fm;
204 	struct fuse_inode *fi;
205 	int ret;
206 
207 	inode = d_inode_rcu(entry);
208 	if (inode && fuse_is_bad(inode))
209 		goto invalid;
210 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
211 		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) {
212 		struct fuse_entry_out outarg;
213 		FUSE_ARGS(args);
214 		struct fuse_forget_link *forget;
215 		u64 attr_version;
216 
217 		/* For negative dentries, always do a fresh lookup */
218 		if (!inode)
219 			goto invalid;
220 
221 		ret = -ECHILD;
222 		if (flags & LOOKUP_RCU)
223 			goto out;
224 
225 		fm = get_fuse_mount(inode);
226 
227 		forget = fuse_alloc_forget();
228 		ret = -ENOMEM;
229 		if (!forget)
230 			goto out;
231 
232 		attr_version = fuse_get_attr_version(fm->fc);
233 
234 		parent = dget_parent(entry);
235 		fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)),
236 				 &entry->d_name, &outarg);
237 		ret = fuse_simple_request(fm, &args);
238 		dput(parent);
239 		/* Zero nodeid is same as -ENOENT */
240 		if (!ret && !outarg.nodeid)
241 			ret = -ENOENT;
242 		if (!ret) {
243 			fi = get_fuse_inode(inode);
244 			if (outarg.nodeid != get_node_id(inode) ||
245 			    (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) {
246 				fuse_queue_forget(fm->fc, forget,
247 						  outarg.nodeid, 1);
248 				goto invalid;
249 			}
250 			spin_lock(&fi->lock);
251 			fi->nlookup++;
252 			spin_unlock(&fi->lock);
253 		}
254 		kfree(forget);
255 		if (ret == -ENOMEM)
256 			goto out;
257 		if (ret || fuse_invalid_attr(&outarg.attr) ||
258 		    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
259 			goto invalid;
260 
261 		forget_all_cached_acls(inode);
262 		fuse_change_attributes(inode, &outarg.attr,
263 				       entry_attr_timeout(&outarg),
264 				       attr_version);
265 		fuse_change_entry_timeout(entry, &outarg);
266 	} else if (inode) {
267 		fi = get_fuse_inode(inode);
268 		if (flags & LOOKUP_RCU) {
269 			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
270 				return -ECHILD;
271 		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
272 			parent = dget_parent(entry);
273 			fuse_advise_use_readdirplus(d_inode(parent));
274 			dput(parent);
275 		}
276 	}
277 	ret = 1;
278 out:
279 	return ret;
280 
281 invalid:
282 	ret = 0;
283 	goto out;
284 }
285 
286 #if BITS_PER_LONG < 64
fuse_dentry_init(struct dentry * dentry)287 static int fuse_dentry_init(struct dentry *dentry)
288 {
289 	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
290 				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
291 
292 	return dentry->d_fsdata ? 0 : -ENOMEM;
293 }
fuse_dentry_release(struct dentry * dentry)294 static void fuse_dentry_release(struct dentry *dentry)
295 {
296 	union fuse_dentry *fd = dentry->d_fsdata;
297 
298 	kfree_rcu(fd, rcu);
299 }
300 #endif
301 
fuse_dentry_delete(const struct dentry * dentry)302 static int fuse_dentry_delete(const struct dentry *dentry)
303 {
304 	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
305 }
306 
307 /*
308  * Create a fuse_mount object with a new superblock (with path->dentry
309  * as the root), and return that mount so it can be auto-mounted on
310  * @path.
311  */
fuse_dentry_automount(struct path * path)312 static struct vfsmount *fuse_dentry_automount(struct path *path)
313 {
314 	struct fs_context *fsc;
315 	struct vfsmount *mnt;
316 	struct fuse_inode *mp_fi = get_fuse_inode(d_inode(path->dentry));
317 
318 	fsc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry);
319 	if (IS_ERR(fsc))
320 		return ERR_CAST(fsc);
321 
322 	/* Pass the FUSE inode of the mount for fuse_get_tree_submount() */
323 	fsc->fs_private = mp_fi;
324 
325 	/* Create the submount */
326 	mnt = fc_mount(fsc);
327 	if (!IS_ERR(mnt))
328 		mntget(mnt);
329 
330 	put_fs_context(fsc);
331 	return mnt;
332 }
333 
334 const struct dentry_operations fuse_dentry_operations = {
335 	.d_revalidate	= fuse_dentry_revalidate,
336 	.d_delete	= fuse_dentry_delete,
337 #if BITS_PER_LONG < 64
338 	.d_init		= fuse_dentry_init,
339 	.d_release	= fuse_dentry_release,
340 #endif
341 	.d_automount	= fuse_dentry_automount,
342 };
343 
344 const struct dentry_operations fuse_root_dentry_operations = {
345 #if BITS_PER_LONG < 64
346 	.d_init		= fuse_dentry_init,
347 	.d_release	= fuse_dentry_release,
348 #endif
349 };
350 
fuse_valid_type(int m)351 int fuse_valid_type(int m)
352 {
353 	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
354 		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
355 }
356 
fuse_invalid_attr(struct fuse_attr * attr)357 bool fuse_invalid_attr(struct fuse_attr *attr)
358 {
359 	return !fuse_valid_type(attr->mode) ||
360 		attr->size > LLONG_MAX;
361 }
362 
fuse_lookup_name(struct super_block * sb,u64 nodeid,const struct qstr * name,struct fuse_entry_out * outarg,struct inode ** inode)363 int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
364 		     struct fuse_entry_out *outarg, struct inode **inode)
365 {
366 	struct fuse_mount *fm = get_fuse_mount_super(sb);
367 	FUSE_ARGS(args);
368 	struct fuse_forget_link *forget;
369 	u64 attr_version;
370 	int err;
371 
372 	*inode = NULL;
373 	err = -ENAMETOOLONG;
374 	if (name->len > FUSE_NAME_MAX)
375 		goto out;
376 
377 
378 	forget = fuse_alloc_forget();
379 	err = -ENOMEM;
380 	if (!forget)
381 		goto out;
382 
383 	attr_version = fuse_get_attr_version(fm->fc);
384 
385 	fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
386 	err = fuse_simple_request(fm, &args);
387 	/* Zero nodeid is same as -ENOENT, but with valid timeout */
388 	if (err || !outarg->nodeid)
389 		goto out_put_forget;
390 
391 	err = -EIO;
392 	if (!outarg->nodeid)
393 		goto out_put_forget;
394 	if (fuse_invalid_attr(&outarg->attr))
395 		goto out_put_forget;
396 
397 	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
398 			   &outarg->attr, entry_attr_timeout(outarg),
399 			   attr_version);
400 	err = -ENOMEM;
401 	if (!*inode) {
402 		fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
403 		goto out;
404 	}
405 	err = 0;
406 
407  out_put_forget:
408 	kfree(forget);
409  out:
410 	return err;
411 }
412 
fuse_lookup(struct inode * dir,struct dentry * entry,unsigned int flags)413 static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
414 				  unsigned int flags)
415 {
416 	int err;
417 	struct fuse_entry_out outarg;
418 	struct inode *inode;
419 	struct dentry *newent;
420 	bool outarg_valid = true;
421 	bool locked;
422 
423 	if (fuse_is_bad(dir))
424 		return ERR_PTR(-EIO);
425 
426 	locked = fuse_lock_inode(dir);
427 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
428 			       &outarg, &inode);
429 	fuse_unlock_inode(dir, locked);
430 	if (err == -ENOENT) {
431 		outarg_valid = false;
432 		err = 0;
433 	}
434 	if (err)
435 		goto out_err;
436 
437 	err = -EIO;
438 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
439 		goto out_iput;
440 
441 	newent = d_splice_alias(inode, entry);
442 	err = PTR_ERR(newent);
443 	if (IS_ERR(newent))
444 		goto out_err;
445 
446 	entry = newent ? newent : entry;
447 	if (outarg_valid)
448 		fuse_change_entry_timeout(entry, &outarg);
449 	else
450 		fuse_invalidate_entry_cache(entry);
451 
452 	if (inode)
453 		fuse_advise_use_readdirplus(dir);
454 	return newent;
455 
456  out_iput:
457 	iput(inode);
458  out_err:
459 	return ERR_PTR(err);
460 }
461 
get_security_context(struct dentry * entry,umode_t mode,void ** security_ctx,u32 * security_ctxlen)462 static int get_security_context(struct dentry *entry, umode_t mode,
463 				void **security_ctx, u32 *security_ctxlen)
464 {
465 	struct fuse_secctx *fctx;
466 	struct fuse_secctx_header *header;
467 	void *ctx = NULL, *ptr;
468 	u32 ctxlen, total_len = sizeof(*header);
469 	int err, nr_ctx = 0;
470 	const char *name;
471 	size_t namelen;
472 
473 	err = security_dentry_init_security(entry, mode, &entry->d_name,
474 					    &name, &ctx, &ctxlen);
475 	if (err) {
476 		if (err != -EOPNOTSUPP)
477 			goto out_err;
478 		/* No LSM is supporting this security hook. Ignore error */
479 		ctxlen = 0;
480 		ctx = NULL;
481 	}
482 
483 	if (ctxlen) {
484 		nr_ctx = 1;
485 		namelen = strlen(name) + 1;
486 		err = -EIO;
487 		if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || ctxlen > S32_MAX))
488 			goto out_err;
489 		total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + ctxlen);
490 	}
491 
492 	err = -ENOMEM;
493 	header = ptr = kzalloc(total_len, GFP_KERNEL);
494 	if (!ptr)
495 		goto out_err;
496 
497 	header->nr_secctx = nr_ctx;
498 	header->size = total_len;
499 	ptr += sizeof(*header);
500 	if (nr_ctx) {
501 		fctx = ptr;
502 		fctx->size = ctxlen;
503 		ptr += sizeof(*fctx);
504 
505 		strcpy(ptr, name);
506 		ptr += namelen;
507 
508 		memcpy(ptr, ctx, ctxlen);
509 	}
510 	*security_ctxlen = total_len;
511 	*security_ctx = header;
512 	err = 0;
513 out_err:
514 	kfree(ctx);
515 	return err;
516 }
517 
518 /*
519  * Atomic create+open operation
520  *
521  * If the filesystem doesn't support this, then fall back to separate
522  * 'mknod' + 'open' requests.
523  */
fuse_create_open(struct inode * dir,struct dentry * entry,struct file * file,unsigned int flags,umode_t mode)524 static int fuse_create_open(struct inode *dir, struct dentry *entry,
525 			    struct file *file, unsigned int flags,
526 			    umode_t mode)
527 {
528 	int err;
529 	struct inode *inode;
530 	struct fuse_mount *fm = get_fuse_mount(dir);
531 	FUSE_ARGS(args);
532 	struct fuse_forget_link *forget;
533 	struct fuse_create_in inarg;
534 	struct fuse_open_out outopen;
535 	struct fuse_entry_out outentry;
536 	struct fuse_inode *fi;
537 	struct fuse_file *ff;
538 	void *security_ctx = NULL;
539 	u32 security_ctxlen;
540 	bool trunc = flags & O_TRUNC;
541 
542 	/* Userspace expects S_IFREG in create mode */
543 	BUG_ON((mode & S_IFMT) != S_IFREG);
544 
545 	forget = fuse_alloc_forget();
546 	err = -ENOMEM;
547 	if (!forget)
548 		goto out_err;
549 
550 	err = -ENOMEM;
551 	ff = fuse_file_alloc(fm);
552 	if (!ff)
553 		goto out_put_forget_req;
554 
555 	if (!fm->fc->dont_mask)
556 		mode &= ~current_umask();
557 
558 	flags &= ~O_NOCTTY;
559 	memset(&inarg, 0, sizeof(inarg));
560 	memset(&outentry, 0, sizeof(outentry));
561 	inarg.flags = flags;
562 	inarg.mode = mode;
563 	inarg.umask = current_umask();
564 
565 	if (fm->fc->handle_killpriv_v2 && trunc &&
566 	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
567 		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
568 	}
569 
570 	args.opcode = FUSE_CREATE;
571 	args.nodeid = get_node_id(dir);
572 	args.in_numargs = 2;
573 	args.in_args[0].size = sizeof(inarg);
574 	args.in_args[0].value = &inarg;
575 	args.in_args[1].size = entry->d_name.len + 1;
576 	args.in_args[1].value = entry->d_name.name;
577 	args.out_numargs = 2;
578 	args.out_args[0].size = sizeof(outentry);
579 	args.out_args[0].value = &outentry;
580 	args.out_args[1].size = sizeof(outopen);
581 	args.out_args[1].value = &outopen;
582 
583 	if (fm->fc->init_security) {
584 		err = get_security_context(entry, mode, &security_ctx,
585 					   &security_ctxlen);
586 		if (err)
587 			goto out_put_forget_req;
588 
589 		args.in_numargs = 3;
590 		args.in_args[2].size = security_ctxlen;
591 		args.in_args[2].value = security_ctx;
592 	}
593 
594 	err = fuse_simple_request(fm, &args);
595 	kfree(security_ctx);
596 	if (err)
597 		goto out_free_ff;
598 
599 	err = -EIO;
600 	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) ||
601 	    fuse_invalid_attr(&outentry.attr))
602 		goto out_free_ff;
603 
604 	ff->fh = outopen.fh;
605 	ff->nodeid = outentry.nodeid;
606 	ff->open_flags = outopen.open_flags;
607 	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
608 			  &outentry.attr, entry_attr_timeout(&outentry), 0);
609 	if (!inode) {
610 		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
611 		fuse_sync_release(NULL, ff, flags);
612 		fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1);
613 		err = -ENOMEM;
614 		goto out_err;
615 	}
616 	kfree(forget);
617 	d_instantiate(entry, inode);
618 	fuse_change_entry_timeout(entry, &outentry);
619 	fuse_dir_changed(dir);
620 	err = finish_open(file, entry, generic_file_open);
621 	if (err) {
622 		fi = get_fuse_inode(inode);
623 		fuse_sync_release(fi, ff, flags);
624 	} else {
625 		file->private_data = ff;
626 		fuse_finish_open(inode, file);
627 		if (fm->fc->atomic_o_trunc && trunc)
628 			truncate_pagecache(inode, 0);
629 		else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
630 			invalidate_inode_pages2(inode->i_mapping);
631 	}
632 	return err;
633 
634 out_free_ff:
635 	fuse_file_free(ff);
636 out_put_forget_req:
637 	kfree(forget);
638 out_err:
639 	return err;
640 }
641 
642 static int fuse_mknod(struct user_namespace *, struct inode *, struct dentry *,
643 		      umode_t, dev_t);
fuse_atomic_open(struct inode * dir,struct dentry * entry,struct file * file,unsigned flags,umode_t mode)644 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
645 			    struct file *file, unsigned flags,
646 			    umode_t mode)
647 {
648 	int err;
649 	struct fuse_conn *fc = get_fuse_conn(dir);
650 	struct dentry *res = NULL;
651 
652 	if (fuse_is_bad(dir))
653 		return -EIO;
654 
655 	if (d_in_lookup(entry)) {
656 		res = fuse_lookup(dir, entry, 0);
657 		if (IS_ERR(res))
658 			return PTR_ERR(res);
659 
660 		if (res)
661 			entry = res;
662 	}
663 
664 	if (!(flags & O_CREAT) || d_really_is_positive(entry))
665 		goto no_open;
666 
667 	/* Only creates */
668 	file->f_mode |= FMODE_CREATED;
669 
670 	if (fc->no_create)
671 		goto mknod;
672 
673 	err = fuse_create_open(dir, entry, file, flags, mode);
674 	if (err == -ENOSYS) {
675 		fc->no_create = 1;
676 		goto mknod;
677 	}
678 out_dput:
679 	dput(res);
680 	return err;
681 
682 mknod:
683 	err = fuse_mknod(&init_user_ns, dir, entry, mode, 0);
684 	if (err)
685 		goto out_dput;
686 no_open:
687 	return finish_no_open(file, res);
688 }
689 
690 /*
691  * Code shared between mknod, mkdir, symlink and link
692  */
create_new_entry(struct fuse_mount * fm,struct fuse_args * args,struct inode * dir,struct dentry * entry,umode_t mode)693 static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
694 			    struct inode *dir, struct dentry *entry,
695 			    umode_t mode)
696 {
697 	struct fuse_entry_out outarg;
698 	struct inode *inode;
699 	struct dentry *d;
700 	int err;
701 	struct fuse_forget_link *forget;
702 	void *security_ctx = NULL;
703 	u32 security_ctxlen;
704 
705 	if (fuse_is_bad(dir))
706 		return -EIO;
707 
708 	forget = fuse_alloc_forget();
709 	if (!forget)
710 		return -ENOMEM;
711 
712 	memset(&outarg, 0, sizeof(outarg));
713 	args->nodeid = get_node_id(dir);
714 	args->out_numargs = 1;
715 	args->out_args[0].size = sizeof(outarg);
716 	args->out_args[0].value = &outarg;
717 
718 	if (fm->fc->init_security && args->opcode != FUSE_LINK) {
719 		err = get_security_context(entry, mode, &security_ctx,
720 					   &security_ctxlen);
721 		if (err)
722 			goto out_put_forget_req;
723 
724 		BUG_ON(args->in_numargs != 2);
725 
726 		args->in_numargs = 3;
727 		args->in_args[2].size = security_ctxlen;
728 		args->in_args[2].value = security_ctx;
729 	}
730 
731 	err = fuse_simple_request(fm, args);
732 	kfree(security_ctx);
733 	if (err)
734 		goto out_put_forget_req;
735 
736 	err = -EIO;
737 	if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr))
738 		goto out_put_forget_req;
739 
740 	if ((outarg.attr.mode ^ mode) & S_IFMT)
741 		goto out_put_forget_req;
742 
743 	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
744 			  &outarg.attr, entry_attr_timeout(&outarg), 0);
745 	if (!inode) {
746 		fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
747 		return -ENOMEM;
748 	}
749 	kfree(forget);
750 
751 	d_drop(entry);
752 	d = d_splice_alias(inode, entry);
753 	if (IS_ERR(d))
754 		return PTR_ERR(d);
755 
756 	if (d) {
757 		fuse_change_entry_timeout(d, &outarg);
758 		dput(d);
759 	} else {
760 		fuse_change_entry_timeout(entry, &outarg);
761 	}
762 	fuse_dir_changed(dir);
763 	return 0;
764 
765  out_put_forget_req:
766 	kfree(forget);
767 	return err;
768 }
769 
fuse_mknod(struct user_namespace * mnt_userns,struct inode * dir,struct dentry * entry,umode_t mode,dev_t rdev)770 static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir,
771 		      struct dentry *entry, umode_t mode, dev_t rdev)
772 {
773 	struct fuse_mknod_in inarg;
774 	struct fuse_mount *fm = get_fuse_mount(dir);
775 	FUSE_ARGS(args);
776 
777 	if (!fm->fc->dont_mask)
778 		mode &= ~current_umask();
779 
780 	memset(&inarg, 0, sizeof(inarg));
781 	inarg.mode = mode;
782 	inarg.rdev = new_encode_dev(rdev);
783 	inarg.umask = current_umask();
784 	args.opcode = FUSE_MKNOD;
785 	args.in_numargs = 2;
786 	args.in_args[0].size = sizeof(inarg);
787 	args.in_args[0].value = &inarg;
788 	args.in_args[1].size = entry->d_name.len + 1;
789 	args.in_args[1].value = entry->d_name.name;
790 	return create_new_entry(fm, &args, dir, entry, mode);
791 }
792 
fuse_create(struct user_namespace * mnt_userns,struct inode * dir,struct dentry * entry,umode_t mode,bool excl)793 static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir,
794 		       struct dentry *entry, umode_t mode, bool excl)
795 {
796 	return fuse_mknod(&init_user_ns, dir, entry, mode, 0);
797 }
798 
fuse_mkdir(struct user_namespace * mnt_userns,struct inode * dir,struct dentry * entry,umode_t mode)799 static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
800 		      struct dentry *entry, umode_t mode)
801 {
802 	struct fuse_mkdir_in inarg;
803 	struct fuse_mount *fm = get_fuse_mount(dir);
804 	FUSE_ARGS(args);
805 
806 	if (!fm->fc->dont_mask)
807 		mode &= ~current_umask();
808 
809 	memset(&inarg, 0, sizeof(inarg));
810 	inarg.mode = mode;
811 	inarg.umask = current_umask();
812 	args.opcode = FUSE_MKDIR;
813 	args.in_numargs = 2;
814 	args.in_args[0].size = sizeof(inarg);
815 	args.in_args[0].value = &inarg;
816 	args.in_args[1].size = entry->d_name.len + 1;
817 	args.in_args[1].value = entry->d_name.name;
818 	return create_new_entry(fm, &args, dir, entry, S_IFDIR);
819 }
820 
fuse_symlink(struct user_namespace * mnt_userns,struct inode * dir,struct dentry * entry,const char * link)821 static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir,
822 			struct dentry *entry, const char *link)
823 {
824 	struct fuse_mount *fm = get_fuse_mount(dir);
825 	unsigned len = strlen(link) + 1;
826 	FUSE_ARGS(args);
827 
828 	args.opcode = FUSE_SYMLINK;
829 	args.in_numargs = 2;
830 	args.in_args[0].size = entry->d_name.len + 1;
831 	args.in_args[0].value = entry->d_name.name;
832 	args.in_args[1].size = len;
833 	args.in_args[1].value = link;
834 	return create_new_entry(fm, &args, dir, entry, S_IFLNK);
835 }
836 
fuse_flush_time_update(struct inode * inode)837 void fuse_flush_time_update(struct inode *inode)
838 {
839 	int err = sync_inode_metadata(inode, 1);
840 
841 	mapping_set_error(inode->i_mapping, err);
842 }
843 
fuse_update_ctime_in_cache(struct inode * inode)844 static void fuse_update_ctime_in_cache(struct inode *inode)
845 {
846 	if (!IS_NOCMTIME(inode)) {
847 		inode->i_ctime = current_time(inode);
848 		mark_inode_dirty_sync(inode);
849 		fuse_flush_time_update(inode);
850 	}
851 }
852 
fuse_update_ctime(struct inode * inode)853 void fuse_update_ctime(struct inode *inode)
854 {
855 	fuse_invalidate_attr_mask(inode, STATX_CTIME);
856 	fuse_update_ctime_in_cache(inode);
857 }
858 
fuse_entry_unlinked(struct dentry * entry)859 static void fuse_entry_unlinked(struct dentry *entry)
860 {
861 	struct inode *inode = d_inode(entry);
862 	struct fuse_conn *fc = get_fuse_conn(inode);
863 	struct fuse_inode *fi = get_fuse_inode(inode);
864 
865 	spin_lock(&fi->lock);
866 	fi->attr_version = atomic64_inc_return(&fc->attr_version);
867 	/*
868 	 * If i_nlink == 0 then unlink doesn't make sense, yet this can
869 	 * happen if userspace filesystem is careless.  It would be
870 	 * difficult to enforce correct nlink usage so just ignore this
871 	 * condition here
872 	 */
873 	if (S_ISDIR(inode->i_mode))
874 		clear_nlink(inode);
875 	else if (inode->i_nlink > 0)
876 		drop_nlink(inode);
877 	spin_unlock(&fi->lock);
878 	fuse_invalidate_entry_cache(entry);
879 	fuse_update_ctime(inode);
880 }
881 
fuse_unlink(struct inode * dir,struct dentry * entry)882 static int fuse_unlink(struct inode *dir, struct dentry *entry)
883 {
884 	int err;
885 	struct fuse_mount *fm = get_fuse_mount(dir);
886 	FUSE_ARGS(args);
887 
888 	if (fuse_is_bad(dir))
889 		return -EIO;
890 
891 	args.opcode = FUSE_UNLINK;
892 	args.nodeid = get_node_id(dir);
893 	args.in_numargs = 1;
894 	args.in_args[0].size = entry->d_name.len + 1;
895 	args.in_args[0].value = entry->d_name.name;
896 	err = fuse_simple_request(fm, &args);
897 	if (!err) {
898 		fuse_dir_changed(dir);
899 		fuse_entry_unlinked(entry);
900 	} else if (err == -EINTR)
901 		fuse_invalidate_entry(entry);
902 	return err;
903 }
904 
fuse_rmdir(struct inode * dir,struct dentry * entry)905 static int fuse_rmdir(struct inode *dir, struct dentry *entry)
906 {
907 	int err;
908 	struct fuse_mount *fm = get_fuse_mount(dir);
909 	FUSE_ARGS(args);
910 
911 	if (fuse_is_bad(dir))
912 		return -EIO;
913 
914 	args.opcode = FUSE_RMDIR;
915 	args.nodeid = get_node_id(dir);
916 	args.in_numargs = 1;
917 	args.in_args[0].size = entry->d_name.len + 1;
918 	args.in_args[0].value = entry->d_name.name;
919 	err = fuse_simple_request(fm, &args);
920 	if (!err) {
921 		fuse_dir_changed(dir);
922 		fuse_entry_unlinked(entry);
923 	} else if (err == -EINTR)
924 		fuse_invalidate_entry(entry);
925 	return err;
926 }
927 
fuse_rename_common(struct inode * olddir,struct dentry * oldent,struct inode * newdir,struct dentry * newent,unsigned int flags,int opcode,size_t argsize)928 static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
929 			      struct inode *newdir, struct dentry *newent,
930 			      unsigned int flags, int opcode, size_t argsize)
931 {
932 	int err;
933 	struct fuse_rename2_in inarg;
934 	struct fuse_mount *fm = get_fuse_mount(olddir);
935 	FUSE_ARGS(args);
936 
937 	memset(&inarg, 0, argsize);
938 	inarg.newdir = get_node_id(newdir);
939 	inarg.flags = flags;
940 	args.opcode = opcode;
941 	args.nodeid = get_node_id(olddir);
942 	args.in_numargs = 3;
943 	args.in_args[0].size = argsize;
944 	args.in_args[0].value = &inarg;
945 	args.in_args[1].size = oldent->d_name.len + 1;
946 	args.in_args[1].value = oldent->d_name.name;
947 	args.in_args[2].size = newent->d_name.len + 1;
948 	args.in_args[2].value = newent->d_name.name;
949 	err = fuse_simple_request(fm, &args);
950 	if (!err) {
951 		/* ctime changes */
952 		fuse_update_ctime(d_inode(oldent));
953 
954 		if (flags & RENAME_EXCHANGE)
955 			fuse_update_ctime(d_inode(newent));
956 
957 		fuse_dir_changed(olddir);
958 		if (olddir != newdir)
959 			fuse_dir_changed(newdir);
960 
961 		/* newent will end up negative */
962 		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
963 			fuse_entry_unlinked(newent);
964 	} else if (err == -EINTR) {
965 		/* If request was interrupted, DEITY only knows if the
966 		   rename actually took place.  If the invalidation
967 		   fails (e.g. some process has CWD under the renamed
968 		   directory), then there can be inconsistency between
969 		   the dcache and the real filesystem.  Tough luck. */
970 		fuse_invalidate_entry(oldent);
971 		if (d_really_is_positive(newent))
972 			fuse_invalidate_entry(newent);
973 	}
974 
975 	return err;
976 }
977 
fuse_rename2(struct user_namespace * mnt_userns,struct inode * olddir,struct dentry * oldent,struct inode * newdir,struct dentry * newent,unsigned int flags)978 static int fuse_rename2(struct user_namespace *mnt_userns, struct inode *olddir,
979 			struct dentry *oldent, struct inode *newdir,
980 			struct dentry *newent, unsigned int flags)
981 {
982 	struct fuse_conn *fc = get_fuse_conn(olddir);
983 	int err;
984 
985 	if (fuse_is_bad(olddir))
986 		return -EIO;
987 
988 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
989 		return -EINVAL;
990 
991 	if (flags) {
992 		if (fc->no_rename2 || fc->minor < 23)
993 			return -EINVAL;
994 
995 		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
996 					 FUSE_RENAME2,
997 					 sizeof(struct fuse_rename2_in));
998 		if (err == -ENOSYS) {
999 			fc->no_rename2 = 1;
1000 			err = -EINVAL;
1001 		}
1002 	} else {
1003 		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
1004 					 FUSE_RENAME,
1005 					 sizeof(struct fuse_rename_in));
1006 	}
1007 
1008 	return err;
1009 }
1010 
fuse_link(struct dentry * entry,struct inode * newdir,struct dentry * newent)1011 static int fuse_link(struct dentry *entry, struct inode *newdir,
1012 		     struct dentry *newent)
1013 {
1014 	int err;
1015 	struct fuse_link_in inarg;
1016 	struct inode *inode = d_inode(entry);
1017 	struct fuse_mount *fm = get_fuse_mount(inode);
1018 	FUSE_ARGS(args);
1019 
1020 	memset(&inarg, 0, sizeof(inarg));
1021 	inarg.oldnodeid = get_node_id(inode);
1022 	args.opcode = FUSE_LINK;
1023 	args.in_numargs = 2;
1024 	args.in_args[0].size = sizeof(inarg);
1025 	args.in_args[0].value = &inarg;
1026 	args.in_args[1].size = newent->d_name.len + 1;
1027 	args.in_args[1].value = newent->d_name.name;
1028 	err = create_new_entry(fm, &args, newdir, newent, inode->i_mode);
1029 	if (!err)
1030 		fuse_update_ctime_in_cache(inode);
1031 	else if (err == -EINTR)
1032 		fuse_invalidate_attr(inode);
1033 
1034 	return err;
1035 }
1036 
fuse_fillattr(struct inode * inode,struct fuse_attr * attr,struct kstat * stat)1037 static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
1038 			  struct kstat *stat)
1039 {
1040 	unsigned int blkbits;
1041 	struct fuse_conn *fc = get_fuse_conn(inode);
1042 
1043 	stat->dev = inode->i_sb->s_dev;
1044 	stat->ino = attr->ino;
1045 	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
1046 	stat->nlink = attr->nlink;
1047 	stat->uid = make_kuid(fc->user_ns, attr->uid);
1048 	stat->gid = make_kgid(fc->user_ns, attr->gid);
1049 	stat->rdev = inode->i_rdev;
1050 	stat->atime.tv_sec = attr->atime;
1051 	stat->atime.tv_nsec = attr->atimensec;
1052 	stat->mtime.tv_sec = attr->mtime;
1053 	stat->mtime.tv_nsec = attr->mtimensec;
1054 	stat->ctime.tv_sec = attr->ctime;
1055 	stat->ctime.tv_nsec = attr->ctimensec;
1056 	stat->size = attr->size;
1057 	stat->blocks = attr->blocks;
1058 
1059 	if (attr->blksize != 0)
1060 		blkbits = ilog2(attr->blksize);
1061 	else
1062 		blkbits = inode->i_sb->s_blocksize_bits;
1063 
1064 	stat->blksize = 1 << blkbits;
1065 }
1066 
fuse_do_getattr(struct inode * inode,struct kstat * stat,struct file * file)1067 static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
1068 			   struct file *file)
1069 {
1070 	int err;
1071 	struct fuse_getattr_in inarg;
1072 	struct fuse_attr_out outarg;
1073 	struct fuse_mount *fm = get_fuse_mount(inode);
1074 	FUSE_ARGS(args);
1075 	u64 attr_version;
1076 
1077 	attr_version = fuse_get_attr_version(fm->fc);
1078 
1079 	memset(&inarg, 0, sizeof(inarg));
1080 	memset(&outarg, 0, sizeof(outarg));
1081 	/* Directories have separate file-handle space */
1082 	if (file && S_ISREG(inode->i_mode)) {
1083 		struct fuse_file *ff = file->private_data;
1084 
1085 		inarg.getattr_flags |= FUSE_GETATTR_FH;
1086 		inarg.fh = ff->fh;
1087 	}
1088 	args.opcode = FUSE_GETATTR;
1089 	args.nodeid = get_node_id(inode);
1090 	args.in_numargs = 1;
1091 	args.in_args[0].size = sizeof(inarg);
1092 	args.in_args[0].value = &inarg;
1093 	args.out_numargs = 1;
1094 	args.out_args[0].size = sizeof(outarg);
1095 	args.out_args[0].value = &outarg;
1096 	err = fuse_simple_request(fm, &args);
1097 	if (!err) {
1098 		if (fuse_invalid_attr(&outarg.attr) ||
1099 		    inode_wrong_type(inode, outarg.attr.mode)) {
1100 			fuse_make_bad(inode);
1101 			err = -EIO;
1102 		} else {
1103 			fuse_change_attributes(inode, &outarg.attr,
1104 					       attr_timeout(&outarg),
1105 					       attr_version);
1106 			if (stat)
1107 				fuse_fillattr(inode, &outarg.attr, stat);
1108 		}
1109 	}
1110 	return err;
1111 }
1112 
fuse_update_get_attr(struct inode * inode,struct file * file,struct kstat * stat,u32 request_mask,unsigned int flags)1113 static int fuse_update_get_attr(struct inode *inode, struct file *file,
1114 				struct kstat *stat, u32 request_mask,
1115 				unsigned int flags)
1116 {
1117 	struct fuse_inode *fi = get_fuse_inode(inode);
1118 	int err = 0;
1119 	bool sync;
1120 	u32 inval_mask = READ_ONCE(fi->inval_mask);
1121 	u32 cache_mask = fuse_get_cache_mask(inode);
1122 
1123 	if (flags & AT_STATX_FORCE_SYNC)
1124 		sync = true;
1125 	else if (flags & AT_STATX_DONT_SYNC)
1126 		sync = false;
1127 	else if (request_mask & inval_mask & ~cache_mask)
1128 		sync = true;
1129 	else
1130 		sync = time_before64(fi->i_time, get_jiffies_64());
1131 
1132 	if (sync) {
1133 		forget_all_cached_acls(inode);
1134 		err = fuse_do_getattr(inode, stat, file);
1135 	} else if (stat) {
1136 		generic_fillattr(&init_user_ns, inode, stat);
1137 		stat->mode = fi->orig_i_mode;
1138 		stat->ino = fi->orig_ino;
1139 	}
1140 
1141 	return err;
1142 }
1143 
fuse_update_attributes(struct inode * inode,struct file * file,u32 mask)1144 int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask)
1145 {
1146 	return fuse_update_get_attr(inode, file, NULL, mask, 0);
1147 }
1148 
fuse_reverse_inval_entry(struct fuse_conn * fc,u64 parent_nodeid,u64 child_nodeid,struct qstr * name)1149 int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid,
1150 			     u64 child_nodeid, struct qstr *name)
1151 {
1152 	int err = -ENOTDIR;
1153 	struct inode *parent;
1154 	struct dentry *dir;
1155 	struct dentry *entry;
1156 
1157 	parent = fuse_ilookup(fc, parent_nodeid, NULL);
1158 	if (!parent)
1159 		return -ENOENT;
1160 
1161 	inode_lock_nested(parent, I_MUTEX_PARENT);
1162 	if (!S_ISDIR(parent->i_mode))
1163 		goto unlock;
1164 
1165 	err = -ENOENT;
1166 	dir = d_find_alias(parent);
1167 	if (!dir)
1168 		goto unlock;
1169 
1170 	name->hash = full_name_hash(dir, name->name, name->len);
1171 	entry = d_lookup(dir, name);
1172 	dput(dir);
1173 	if (!entry)
1174 		goto unlock;
1175 
1176 	fuse_dir_changed(parent);
1177 	fuse_invalidate_entry(entry);
1178 
1179 	if (child_nodeid != 0 && d_really_is_positive(entry)) {
1180 		inode_lock(d_inode(entry));
1181 		if (get_node_id(d_inode(entry)) != child_nodeid) {
1182 			err = -ENOENT;
1183 			goto badentry;
1184 		}
1185 		if (d_mountpoint(entry)) {
1186 			err = -EBUSY;
1187 			goto badentry;
1188 		}
1189 		if (d_is_dir(entry)) {
1190 			shrink_dcache_parent(entry);
1191 			if (!simple_empty(entry)) {
1192 				err = -ENOTEMPTY;
1193 				goto badentry;
1194 			}
1195 			d_inode(entry)->i_flags |= S_DEAD;
1196 		}
1197 		dont_mount(entry);
1198 		clear_nlink(d_inode(entry));
1199 		err = 0;
1200  badentry:
1201 		inode_unlock(d_inode(entry));
1202 		if (!err)
1203 			d_delete(entry);
1204 	} else {
1205 		err = 0;
1206 	}
1207 	dput(entry);
1208 
1209  unlock:
1210 	inode_unlock(parent);
1211 	iput(parent);
1212 	return err;
1213 }
1214 
1215 /*
1216  * Calling into a user-controlled filesystem gives the filesystem
1217  * daemon ptrace-like capabilities over the current process.  This
1218  * means, that the filesystem daemon is able to record the exact
1219  * filesystem operations performed, and can also control the behavior
1220  * of the requester process in otherwise impossible ways.  For example
1221  * it can delay the operation for arbitrary length of time allowing
1222  * DoS against the requester.
1223  *
1224  * For this reason only those processes can call into the filesystem,
1225  * for which the owner of the mount has ptrace privilege.  This
1226  * excludes processes started by other users, suid or sgid processes.
1227  */
fuse_allow_current_process(struct fuse_conn * fc)1228 int fuse_allow_current_process(struct fuse_conn *fc)
1229 {
1230 	const struct cred *cred;
1231 
1232 	if (fc->allow_other)
1233 		return current_in_userns(fc->user_ns);
1234 
1235 	cred = current_cred();
1236 	if (uid_eq(cred->euid, fc->user_id) &&
1237 	    uid_eq(cred->suid, fc->user_id) &&
1238 	    uid_eq(cred->uid,  fc->user_id) &&
1239 	    gid_eq(cred->egid, fc->group_id) &&
1240 	    gid_eq(cred->sgid, fc->group_id) &&
1241 	    gid_eq(cred->gid,  fc->group_id))
1242 		return 1;
1243 
1244 	return 0;
1245 }
1246 
fuse_access(struct inode * inode,int mask)1247 static int fuse_access(struct inode *inode, int mask)
1248 {
1249 	struct fuse_mount *fm = get_fuse_mount(inode);
1250 	FUSE_ARGS(args);
1251 	struct fuse_access_in inarg;
1252 	int err;
1253 
1254 	BUG_ON(mask & MAY_NOT_BLOCK);
1255 
1256 	if (fm->fc->no_access)
1257 		return 0;
1258 
1259 	memset(&inarg, 0, sizeof(inarg));
1260 	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1261 	args.opcode = FUSE_ACCESS;
1262 	args.nodeid = get_node_id(inode);
1263 	args.in_numargs = 1;
1264 	args.in_args[0].size = sizeof(inarg);
1265 	args.in_args[0].value = &inarg;
1266 	err = fuse_simple_request(fm, &args);
1267 	if (err == -ENOSYS) {
1268 		fm->fc->no_access = 1;
1269 		err = 0;
1270 	}
1271 	return err;
1272 }
1273 
fuse_perm_getattr(struct inode * inode,int mask)1274 static int fuse_perm_getattr(struct inode *inode, int mask)
1275 {
1276 	if (mask & MAY_NOT_BLOCK)
1277 		return -ECHILD;
1278 
1279 	forget_all_cached_acls(inode);
1280 	return fuse_do_getattr(inode, NULL, NULL);
1281 }
1282 
1283 /*
1284  * Check permission.  The two basic access models of FUSE are:
1285  *
1286  * 1) Local access checking ('default_permissions' mount option) based
1287  * on file mode.  This is the plain old disk filesystem permission
1288  * modell.
1289  *
1290  * 2) "Remote" access checking, where server is responsible for
1291  * checking permission in each inode operation.  An exception to this
1292  * is if ->permission() was invoked from sys_access() in which case an
1293  * access request is sent.  Execute permission is still checked
1294  * locally based on file mode.
1295  */
fuse_permission(struct user_namespace * mnt_userns,struct inode * inode,int mask)1296 static int fuse_permission(struct user_namespace *mnt_userns,
1297 			   struct inode *inode, int mask)
1298 {
1299 	struct fuse_conn *fc = get_fuse_conn(inode);
1300 	bool refreshed = false;
1301 	int err = 0;
1302 
1303 	if (fuse_is_bad(inode))
1304 		return -EIO;
1305 
1306 	if (!fuse_allow_current_process(fc))
1307 		return -EACCES;
1308 
1309 	/*
1310 	 * If attributes are needed, refresh them before proceeding
1311 	 */
1312 	if (fc->default_permissions ||
1313 	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1314 		struct fuse_inode *fi = get_fuse_inode(inode);
1315 		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1316 
1317 		if (perm_mask & READ_ONCE(fi->inval_mask) ||
1318 		    time_before64(fi->i_time, get_jiffies_64())) {
1319 			refreshed = true;
1320 
1321 			err = fuse_perm_getattr(inode, mask);
1322 			if (err)
1323 				return err;
1324 		}
1325 	}
1326 
1327 	if (fc->default_permissions) {
1328 		err = generic_permission(&init_user_ns, inode, mask);
1329 
1330 		/* If permission is denied, try to refresh file
1331 		   attributes.  This is also needed, because the root
1332 		   node will at first have no permissions */
1333 		if (err == -EACCES && !refreshed) {
1334 			err = fuse_perm_getattr(inode, mask);
1335 			if (!err)
1336 				err = generic_permission(&init_user_ns,
1337 							 inode, mask);
1338 		}
1339 
1340 		/* Note: the opposite of the above test does not
1341 		   exist.  So if permissions are revoked this won't be
1342 		   noticed immediately, only after the attribute
1343 		   timeout has expired */
1344 	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1345 		err = fuse_access(inode, mask);
1346 	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1347 		if (!(inode->i_mode & S_IXUGO)) {
1348 			if (refreshed)
1349 				return -EACCES;
1350 
1351 			err = fuse_perm_getattr(inode, mask);
1352 			if (!err && !(inode->i_mode & S_IXUGO))
1353 				return -EACCES;
1354 		}
1355 	}
1356 	return err;
1357 }
1358 
fuse_readlink_page(struct inode * inode,struct page * page)1359 static int fuse_readlink_page(struct inode *inode, struct page *page)
1360 {
1361 	struct fuse_mount *fm = get_fuse_mount(inode);
1362 	struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
1363 	struct fuse_args_pages ap = {
1364 		.num_pages = 1,
1365 		.pages = &page,
1366 		.descs = &desc,
1367 	};
1368 	char *link;
1369 	ssize_t res;
1370 
1371 	ap.args.opcode = FUSE_READLINK;
1372 	ap.args.nodeid = get_node_id(inode);
1373 	ap.args.out_pages = true;
1374 	ap.args.out_argvar = true;
1375 	ap.args.page_zeroing = true;
1376 	ap.args.out_numargs = 1;
1377 	ap.args.out_args[0].size = desc.length;
1378 	res = fuse_simple_request(fm, &ap.args);
1379 
1380 	fuse_invalidate_atime(inode);
1381 
1382 	if (res < 0)
1383 		return res;
1384 
1385 	if (WARN_ON(res >= PAGE_SIZE))
1386 		return -EIO;
1387 
1388 	link = page_address(page);
1389 	link[res] = '\0';
1390 
1391 	return 0;
1392 }
1393 
fuse_get_link(struct dentry * dentry,struct inode * inode,struct delayed_call * callback)1394 static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
1395 				 struct delayed_call *callback)
1396 {
1397 	struct fuse_conn *fc = get_fuse_conn(inode);
1398 	struct page *page;
1399 	int err;
1400 
1401 	err = -EIO;
1402 	if (fuse_is_bad(inode))
1403 		goto out_err;
1404 
1405 	if (fc->cache_symlinks)
1406 		return page_get_link(dentry, inode, callback);
1407 
1408 	err = -ECHILD;
1409 	if (!dentry)
1410 		goto out_err;
1411 
1412 	page = alloc_page(GFP_KERNEL);
1413 	err = -ENOMEM;
1414 	if (!page)
1415 		goto out_err;
1416 
1417 	err = fuse_readlink_page(inode, page);
1418 	if (err) {
1419 		__free_page(page);
1420 		goto out_err;
1421 	}
1422 
1423 	set_delayed_call(callback, page_put_link, page);
1424 
1425 	return page_address(page);
1426 
1427 out_err:
1428 	return ERR_PTR(err);
1429 }
1430 
fuse_dir_open(struct inode * inode,struct file * file)1431 static int fuse_dir_open(struct inode *inode, struct file *file)
1432 {
1433 	return fuse_open_common(inode, file, true);
1434 }
1435 
fuse_dir_release(struct inode * inode,struct file * file)1436 static int fuse_dir_release(struct inode *inode, struct file *file)
1437 {
1438 	fuse_release_common(file, true);
1439 
1440 	return 0;
1441 }
1442 
fuse_dir_fsync(struct file * file,loff_t start,loff_t end,int datasync)1443 static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1444 			  int datasync)
1445 {
1446 	struct inode *inode = file->f_mapping->host;
1447 	struct fuse_conn *fc = get_fuse_conn(inode);
1448 	int err;
1449 
1450 	if (fuse_is_bad(inode))
1451 		return -EIO;
1452 
1453 	if (fc->no_fsyncdir)
1454 		return 0;
1455 
1456 	inode_lock(inode);
1457 	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
1458 	if (err == -ENOSYS) {
1459 		fc->no_fsyncdir = 1;
1460 		err = 0;
1461 	}
1462 	inode_unlock(inode);
1463 
1464 	return err;
1465 }
1466 
fuse_dir_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1467 static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
1468 			    unsigned long arg)
1469 {
1470 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1471 
1472 	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
1473 	if (fc->minor < 18)
1474 		return -ENOTTY;
1475 
1476 	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
1477 }
1478 
fuse_dir_compat_ioctl(struct file * file,unsigned int cmd,unsigned long arg)1479 static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
1480 				   unsigned long arg)
1481 {
1482 	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
1483 
1484 	if (fc->minor < 18)
1485 		return -ENOTTY;
1486 
1487 	return fuse_ioctl_common(file, cmd, arg,
1488 				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
1489 }
1490 
update_mtime(unsigned ivalid,bool trust_local_mtime)1491 static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
1492 {
1493 	/* Always update if mtime is explicitly set  */
1494 	if (ivalid & ATTR_MTIME_SET)
1495 		return true;
1496 
1497 	/* Or if kernel i_mtime is the official one */
1498 	if (trust_local_mtime)
1499 		return true;
1500 
1501 	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
1502 	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
1503 		return false;
1504 
1505 	/* In all other cases update */
1506 	return true;
1507 }
1508 
iattr_to_fattr(struct fuse_conn * fc,struct iattr * iattr,struct fuse_setattr_in * arg,bool trust_local_cmtime)1509 static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
1510 			   struct fuse_setattr_in *arg, bool trust_local_cmtime)
1511 {
1512 	unsigned ivalid = iattr->ia_valid;
1513 
1514 	if (ivalid & ATTR_MODE)
1515 		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1516 	if (ivalid & ATTR_UID)
1517 		arg->valid |= FATTR_UID,    arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1518 	if (ivalid & ATTR_GID)
1519 		arg->valid |= FATTR_GID,    arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1520 	if (ivalid & ATTR_SIZE)
1521 		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
1522 	if (ivalid & ATTR_ATIME) {
1523 		arg->valid |= FATTR_ATIME;
1524 		arg->atime = iattr->ia_atime.tv_sec;
1525 		arg->atimensec = iattr->ia_atime.tv_nsec;
1526 		if (!(ivalid & ATTR_ATIME_SET))
1527 			arg->valid |= FATTR_ATIME_NOW;
1528 	}
1529 	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
1530 		arg->valid |= FATTR_MTIME;
1531 		arg->mtime = iattr->ia_mtime.tv_sec;
1532 		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1533 		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
1534 			arg->valid |= FATTR_MTIME_NOW;
1535 	}
1536 	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
1537 		arg->valid |= FATTR_CTIME;
1538 		arg->ctime = iattr->ia_ctime.tv_sec;
1539 		arg->ctimensec = iattr->ia_ctime.tv_nsec;
1540 	}
1541 }
1542 
1543 /*
1544  * Prevent concurrent writepages on inode
1545  *
1546  * This is done by adding a negative bias to the inode write counter
1547  * and waiting for all pending writes to finish.
1548  */
fuse_set_nowrite(struct inode * inode)1549 void fuse_set_nowrite(struct inode *inode)
1550 {
1551 	struct fuse_inode *fi = get_fuse_inode(inode);
1552 
1553 	BUG_ON(!inode_is_locked(inode));
1554 
1555 	spin_lock(&fi->lock);
1556 	BUG_ON(fi->writectr < 0);
1557 	fi->writectr += FUSE_NOWRITE;
1558 	spin_unlock(&fi->lock);
1559 	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
1560 }
1561 
1562 /*
1563  * Allow writepages on inode
1564  *
1565  * Remove the bias from the writecounter and send any queued
1566  * writepages.
1567  */
__fuse_release_nowrite(struct inode * inode)1568 static void __fuse_release_nowrite(struct inode *inode)
1569 {
1570 	struct fuse_inode *fi = get_fuse_inode(inode);
1571 
1572 	BUG_ON(fi->writectr != FUSE_NOWRITE);
1573 	fi->writectr = 0;
1574 	fuse_flush_writepages(inode);
1575 }
1576 
fuse_release_nowrite(struct inode * inode)1577 void fuse_release_nowrite(struct inode *inode)
1578 {
1579 	struct fuse_inode *fi = get_fuse_inode(inode);
1580 
1581 	spin_lock(&fi->lock);
1582 	__fuse_release_nowrite(inode);
1583 	spin_unlock(&fi->lock);
1584 }
1585 
fuse_setattr_fill(struct fuse_conn * fc,struct fuse_args * args,struct inode * inode,struct fuse_setattr_in * inarg_p,struct fuse_attr_out * outarg_p)1586 static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
1587 			      struct inode *inode,
1588 			      struct fuse_setattr_in *inarg_p,
1589 			      struct fuse_attr_out *outarg_p)
1590 {
1591 	args->opcode = FUSE_SETATTR;
1592 	args->nodeid = get_node_id(inode);
1593 	args->in_numargs = 1;
1594 	args->in_args[0].size = sizeof(*inarg_p);
1595 	args->in_args[0].value = inarg_p;
1596 	args->out_numargs = 1;
1597 	args->out_args[0].size = sizeof(*outarg_p);
1598 	args->out_args[0].value = outarg_p;
1599 }
1600 
1601 /*
1602  * Flush inode->i_mtime to the server
1603  */
fuse_flush_times(struct inode * inode,struct fuse_file * ff)1604 int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
1605 {
1606 	struct fuse_mount *fm = get_fuse_mount(inode);
1607 	FUSE_ARGS(args);
1608 	struct fuse_setattr_in inarg;
1609 	struct fuse_attr_out outarg;
1610 
1611 	memset(&inarg, 0, sizeof(inarg));
1612 	memset(&outarg, 0, sizeof(outarg));
1613 
1614 	inarg.valid = FATTR_MTIME;
1615 	inarg.mtime = inode->i_mtime.tv_sec;
1616 	inarg.mtimensec = inode->i_mtime.tv_nsec;
1617 	if (fm->fc->minor >= 23) {
1618 		inarg.valid |= FATTR_CTIME;
1619 		inarg.ctime = inode->i_ctime.tv_sec;
1620 		inarg.ctimensec = inode->i_ctime.tv_nsec;
1621 	}
1622 	if (ff) {
1623 		inarg.valid |= FATTR_FH;
1624 		inarg.fh = ff->fh;
1625 	}
1626 	fuse_setattr_fill(fm->fc, &args, inode, &inarg, &outarg);
1627 
1628 	return fuse_simple_request(fm, &args);
1629 }
1630 
1631 /*
1632  * Set attributes, and at the same time refresh them.
1633  *
1634  * Truncation is slightly complicated, because the 'truncate' request
1635  * may fail, in which case we don't want to touch the mapping.
1636  * vmtruncate() doesn't allow for this case, so do the rlimit checking
1637  * and the actual truncation by hand.
1638  */
fuse_do_setattr(struct dentry * dentry,struct iattr * attr,struct file * file)1639 int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1640 		    struct file *file)
1641 {
1642 	struct inode *inode = d_inode(dentry);
1643 	struct fuse_mount *fm = get_fuse_mount(inode);
1644 	struct fuse_conn *fc = fm->fc;
1645 	struct fuse_inode *fi = get_fuse_inode(inode);
1646 	struct address_space *mapping = inode->i_mapping;
1647 	FUSE_ARGS(args);
1648 	struct fuse_setattr_in inarg;
1649 	struct fuse_attr_out outarg;
1650 	bool is_truncate = false;
1651 	bool is_wb = fc->writeback_cache && S_ISREG(inode->i_mode);
1652 	loff_t oldsize;
1653 	int err;
1654 	bool trust_local_cmtime = is_wb;
1655 	bool fault_blocked = false;
1656 
1657 	if (!fc->default_permissions)
1658 		attr->ia_valid |= ATTR_FORCE;
1659 
1660 	err = setattr_prepare(&init_user_ns, dentry, attr);
1661 	if (err)
1662 		return err;
1663 
1664 	if (attr->ia_valid & ATTR_SIZE) {
1665 		if (WARN_ON(!S_ISREG(inode->i_mode)))
1666 			return -EIO;
1667 		is_truncate = true;
1668 	}
1669 
1670 	if (FUSE_IS_DAX(inode) && is_truncate) {
1671 		filemap_invalidate_lock(mapping);
1672 		fault_blocked = true;
1673 		err = fuse_dax_break_layouts(inode, 0, 0);
1674 		if (err) {
1675 			filemap_invalidate_unlock(mapping);
1676 			return err;
1677 		}
1678 	}
1679 
1680 	if (attr->ia_valid & ATTR_OPEN) {
1681 		/* This is coming from open(..., ... | O_TRUNC); */
1682 		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
1683 		WARN_ON(attr->ia_size != 0);
1684 		if (fc->atomic_o_trunc) {
1685 			/*
1686 			 * No need to send request to userspace, since actual
1687 			 * truncation has already been done by OPEN.  But still
1688 			 * need to truncate page cache.
1689 			 */
1690 			i_size_write(inode, 0);
1691 			truncate_pagecache(inode, 0);
1692 			goto out;
1693 		}
1694 		file = NULL;
1695 	}
1696 
1697 	/* Flush dirty data/metadata before non-truncate SETATTR */
1698 	if (is_wb &&
1699 	    attr->ia_valid &
1700 			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
1701 			 ATTR_TIMES_SET)) {
1702 		err = write_inode_now(inode, true);
1703 		if (err)
1704 			return err;
1705 
1706 		fuse_set_nowrite(inode);
1707 		fuse_release_nowrite(inode);
1708 	}
1709 
1710 	if (is_truncate) {
1711 		fuse_set_nowrite(inode);
1712 		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1713 		if (trust_local_cmtime && attr->ia_size != inode->i_size)
1714 			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1715 	}
1716 
1717 	memset(&inarg, 0, sizeof(inarg));
1718 	memset(&outarg, 0, sizeof(outarg));
1719 	iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1720 	if (file) {
1721 		struct fuse_file *ff = file->private_data;
1722 		inarg.valid |= FATTR_FH;
1723 		inarg.fh = ff->fh;
1724 	}
1725 
1726 	/* Kill suid/sgid for non-directory chown unconditionally */
1727 	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
1728 	    attr->ia_valid & (ATTR_UID | ATTR_GID))
1729 		inarg.valid |= FATTR_KILL_SUIDGID;
1730 
1731 	if (attr->ia_valid & ATTR_SIZE) {
1732 		/* For mandatory locking in truncate */
1733 		inarg.valid |= FATTR_LOCKOWNER;
1734 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
1735 
1736 		/* Kill suid/sgid for truncate only if no CAP_FSETID */
1737 		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
1738 			inarg.valid |= FATTR_KILL_SUIDGID;
1739 	}
1740 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
1741 	err = fuse_simple_request(fm, &args);
1742 	if (err) {
1743 		if (err == -EINTR)
1744 			fuse_invalidate_attr(inode);
1745 		goto error;
1746 	}
1747 
1748 	if (fuse_invalid_attr(&outarg.attr) ||
1749 	    inode_wrong_type(inode, outarg.attr.mode)) {
1750 		fuse_make_bad(inode);
1751 		err = -EIO;
1752 		goto error;
1753 	}
1754 
1755 	spin_lock(&fi->lock);
1756 	/* the kernel maintains i_mtime locally */
1757 	if (trust_local_cmtime) {
1758 		if (attr->ia_valid & ATTR_MTIME)
1759 			inode->i_mtime = attr->ia_mtime;
1760 		if (attr->ia_valid & ATTR_CTIME)
1761 			inode->i_ctime = attr->ia_ctime;
1762 		/* FIXME: clear I_DIRTY_SYNC? */
1763 	}
1764 
1765 	fuse_change_attributes_common(inode, &outarg.attr,
1766 				      attr_timeout(&outarg),
1767 				      fuse_get_cache_mask(inode));
1768 	oldsize = inode->i_size;
1769 	/* see the comment in fuse_change_attributes() */
1770 	if (!is_wb || is_truncate)
1771 		i_size_write(inode, outarg.attr.size);
1772 
1773 	if (is_truncate) {
1774 		/* NOTE: this may release/reacquire fi->lock */
1775 		__fuse_release_nowrite(inode);
1776 	}
1777 	spin_unlock(&fi->lock);
1778 
1779 	/*
1780 	 * Only call invalidate_inode_pages2() after removing
1781 	 * FUSE_NOWRITE, otherwise fuse_launder_folio() would deadlock.
1782 	 */
1783 	if ((is_truncate || !is_wb) &&
1784 	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1785 		truncate_pagecache(inode, outarg.attr.size);
1786 		invalidate_inode_pages2(mapping);
1787 	}
1788 
1789 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1790 out:
1791 	if (fault_blocked)
1792 		filemap_invalidate_unlock(mapping);
1793 
1794 	return 0;
1795 
1796 error:
1797 	if (is_truncate)
1798 		fuse_release_nowrite(inode);
1799 
1800 	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1801 
1802 	if (fault_blocked)
1803 		filemap_invalidate_unlock(mapping);
1804 	return err;
1805 }
1806 
fuse_setattr(struct user_namespace * mnt_userns,struct dentry * entry,struct iattr * attr)1807 static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry,
1808 			struct iattr *attr)
1809 {
1810 	struct inode *inode = d_inode(entry);
1811 	struct fuse_conn *fc = get_fuse_conn(inode);
1812 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
1813 	int ret;
1814 
1815 	if (fuse_is_bad(inode))
1816 		return -EIO;
1817 
1818 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
1819 		return -EACCES;
1820 
1821 	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
1822 		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
1823 				    ATTR_MODE);
1824 
1825 		/*
1826 		 * The only sane way to reliably kill suid/sgid is to do it in
1827 		 * the userspace filesystem
1828 		 *
1829 		 * This should be done on write(), truncate() and chown().
1830 		 */
1831 		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
1832 			/*
1833 			 * ia_mode calculation may have used stale i_mode.
1834 			 * Refresh and recalculate.
1835 			 */
1836 			ret = fuse_do_getattr(inode, NULL, file);
1837 			if (ret)
1838 				return ret;
1839 
1840 			attr->ia_mode = inode->i_mode;
1841 			if (inode->i_mode & S_ISUID) {
1842 				attr->ia_valid |= ATTR_MODE;
1843 				attr->ia_mode &= ~S_ISUID;
1844 			}
1845 			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1846 				attr->ia_valid |= ATTR_MODE;
1847 				attr->ia_mode &= ~S_ISGID;
1848 			}
1849 		}
1850 	}
1851 	if (!attr->ia_valid)
1852 		return 0;
1853 
1854 	ret = fuse_do_setattr(entry, attr, file);
1855 	if (!ret) {
1856 		/*
1857 		 * If filesystem supports acls it may have updated acl xattrs in
1858 		 * the filesystem, so forget cached acls for the inode.
1859 		 */
1860 		if (fc->posix_acl)
1861 			forget_all_cached_acls(inode);
1862 
1863 		/* Directory mode changed, may need to revalidate access */
1864 		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
1865 			fuse_invalidate_entry_cache(entry);
1866 	}
1867 	return ret;
1868 }
1869 
fuse_getattr(struct user_namespace * mnt_userns,const struct path * path,struct kstat * stat,u32 request_mask,unsigned int flags)1870 static int fuse_getattr(struct user_namespace *mnt_userns,
1871 			const struct path *path, struct kstat *stat,
1872 			u32 request_mask, unsigned int flags)
1873 {
1874 	struct inode *inode = d_inode(path->dentry);
1875 	struct fuse_conn *fc = get_fuse_conn(inode);
1876 
1877 	if (fuse_is_bad(inode))
1878 		return -EIO;
1879 
1880 	if (!fuse_allow_current_process(fc)) {
1881 		if (!request_mask) {
1882 			/*
1883 			 * If user explicitly requested *nothing* then don't
1884 			 * error out, but return st_dev only.
1885 			 */
1886 			stat->result_mask = 0;
1887 			stat->dev = inode->i_sb->s_dev;
1888 			return 0;
1889 		}
1890 		return -EACCES;
1891 	}
1892 
1893 	return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
1894 }
1895 
1896 static const struct inode_operations fuse_dir_inode_operations = {
1897 	.lookup		= fuse_lookup,
1898 	.mkdir		= fuse_mkdir,
1899 	.symlink	= fuse_symlink,
1900 	.unlink		= fuse_unlink,
1901 	.rmdir		= fuse_rmdir,
1902 	.rename		= fuse_rename2,
1903 	.link		= fuse_link,
1904 	.setattr	= fuse_setattr,
1905 	.create		= fuse_create,
1906 	.atomic_open	= fuse_atomic_open,
1907 	.mknod		= fuse_mknod,
1908 	.permission	= fuse_permission,
1909 	.getattr	= fuse_getattr,
1910 	.listxattr	= fuse_listxattr,
1911 	.get_acl	= fuse_get_acl,
1912 	.set_acl	= fuse_set_acl,
1913 	.fileattr_get	= fuse_fileattr_get,
1914 	.fileattr_set	= fuse_fileattr_set,
1915 };
1916 
1917 static const struct file_operations fuse_dir_operations = {
1918 	.llseek		= generic_file_llseek,
1919 	.read		= generic_read_dir,
1920 	.iterate_shared	= fuse_readdir,
1921 	.open		= fuse_dir_open,
1922 	.release	= fuse_dir_release,
1923 	.fsync		= fuse_dir_fsync,
1924 	.unlocked_ioctl	= fuse_dir_ioctl,
1925 	.compat_ioctl	= fuse_dir_compat_ioctl,
1926 };
1927 
1928 static const struct inode_operations fuse_common_inode_operations = {
1929 	.setattr	= fuse_setattr,
1930 	.permission	= fuse_permission,
1931 	.getattr	= fuse_getattr,
1932 	.listxattr	= fuse_listxattr,
1933 	.get_acl	= fuse_get_acl,
1934 	.set_acl	= fuse_set_acl,
1935 	.fileattr_get	= fuse_fileattr_get,
1936 	.fileattr_set	= fuse_fileattr_set,
1937 };
1938 
1939 static const struct inode_operations fuse_symlink_inode_operations = {
1940 	.setattr	= fuse_setattr,
1941 	.get_link	= fuse_get_link,
1942 	.getattr	= fuse_getattr,
1943 	.listxattr	= fuse_listxattr,
1944 };
1945 
fuse_init_common(struct inode * inode)1946 void fuse_init_common(struct inode *inode)
1947 {
1948 	inode->i_op = &fuse_common_inode_operations;
1949 }
1950 
fuse_init_dir(struct inode * inode)1951 void fuse_init_dir(struct inode *inode)
1952 {
1953 	struct fuse_inode *fi = get_fuse_inode(inode);
1954 
1955 	inode->i_op = &fuse_dir_inode_operations;
1956 	inode->i_fop = &fuse_dir_operations;
1957 
1958 	spin_lock_init(&fi->rdc.lock);
1959 	fi->rdc.cached = false;
1960 	fi->rdc.size = 0;
1961 	fi->rdc.pos = 0;
1962 	fi->rdc.version = 0;
1963 }
1964 
fuse_symlink_read_folio(struct file * null,struct folio * folio)1965 static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
1966 {
1967 	int err = fuse_readlink_page(folio->mapping->host, &folio->page);
1968 
1969 	if (!err)
1970 		folio_mark_uptodate(folio);
1971 
1972 	folio_unlock(folio);
1973 
1974 	return err;
1975 }
1976 
1977 static const struct address_space_operations fuse_symlink_aops = {
1978 	.read_folio	= fuse_symlink_read_folio,
1979 };
1980 
fuse_init_symlink(struct inode * inode)1981 void fuse_init_symlink(struct inode *inode)
1982 {
1983 	inode->i_op = &fuse_symlink_inode_operations;
1984 	inode->i_data.a_ops = &fuse_symlink_aops;
1985 	inode_nohighmem(inode);
1986 }
1987