1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * (C) 2001 Clemson University and The University of Chicago
4 * Copyright 2018 Omnibond Systems, L.L.C.
5 *
6 * See COPYING in top-level directory.
7 */
8 #include <linux/kernel.h>
9 #include "protocol.h"
10 #include "orangefs-kernel.h"
11 #include "orangefs-dev-proto.h"
12 #include "orangefs-bufmap.h"
13
fsid_of_op(struct orangefs_kernel_op_s * op)14 __s32 fsid_of_op(struct orangefs_kernel_op_s *op)
15 {
16 __s32 fsid = ORANGEFS_FS_ID_NULL;
17
18 if (op) {
19 switch (op->upcall.type) {
20 case ORANGEFS_VFS_OP_FILE_IO:
21 fsid = op->upcall.req.io.refn.fs_id;
22 break;
23 case ORANGEFS_VFS_OP_LOOKUP:
24 fsid = op->upcall.req.lookup.parent_refn.fs_id;
25 break;
26 case ORANGEFS_VFS_OP_CREATE:
27 fsid = op->upcall.req.create.parent_refn.fs_id;
28 break;
29 case ORANGEFS_VFS_OP_GETATTR:
30 fsid = op->upcall.req.getattr.refn.fs_id;
31 break;
32 case ORANGEFS_VFS_OP_REMOVE:
33 fsid = op->upcall.req.remove.parent_refn.fs_id;
34 break;
35 case ORANGEFS_VFS_OP_MKDIR:
36 fsid = op->upcall.req.mkdir.parent_refn.fs_id;
37 break;
38 case ORANGEFS_VFS_OP_READDIR:
39 fsid = op->upcall.req.readdir.refn.fs_id;
40 break;
41 case ORANGEFS_VFS_OP_SETATTR:
42 fsid = op->upcall.req.setattr.refn.fs_id;
43 break;
44 case ORANGEFS_VFS_OP_SYMLINK:
45 fsid = op->upcall.req.sym.parent_refn.fs_id;
46 break;
47 case ORANGEFS_VFS_OP_RENAME:
48 fsid = op->upcall.req.rename.old_parent_refn.fs_id;
49 break;
50 case ORANGEFS_VFS_OP_STATFS:
51 fsid = op->upcall.req.statfs.fs_id;
52 break;
53 case ORANGEFS_VFS_OP_TRUNCATE:
54 fsid = op->upcall.req.truncate.refn.fs_id;
55 break;
56 case ORANGEFS_VFS_OP_RA_FLUSH:
57 fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
58 break;
59 case ORANGEFS_VFS_OP_FS_UMOUNT:
60 fsid = op->upcall.req.fs_umount.fs_id;
61 break;
62 case ORANGEFS_VFS_OP_GETXATTR:
63 fsid = op->upcall.req.getxattr.refn.fs_id;
64 break;
65 case ORANGEFS_VFS_OP_SETXATTR:
66 fsid = op->upcall.req.setxattr.refn.fs_id;
67 break;
68 case ORANGEFS_VFS_OP_LISTXATTR:
69 fsid = op->upcall.req.listxattr.refn.fs_id;
70 break;
71 case ORANGEFS_VFS_OP_REMOVEXATTR:
72 fsid = op->upcall.req.removexattr.refn.fs_id;
73 break;
74 case ORANGEFS_VFS_OP_FSYNC:
75 fsid = op->upcall.req.fsync.refn.fs_id;
76 break;
77 default:
78 break;
79 }
80 }
81 return fsid;
82 }
83
orangefs_inode_flags(struct ORANGEFS_sys_attr_s * attrs)84 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
85 {
86 int flags = 0;
87 if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
88 flags |= S_IMMUTABLE;
89 else
90 flags &= ~S_IMMUTABLE;
91 if (attrs->flags & ORANGEFS_APPEND_FL)
92 flags |= S_APPEND;
93 else
94 flags &= ~S_APPEND;
95 if (attrs->flags & ORANGEFS_NOATIME_FL)
96 flags |= S_NOATIME;
97 else
98 flags &= ~S_NOATIME;
99 return flags;
100 }
101
orangefs_inode_perms(struct ORANGEFS_sys_attr_s * attrs)102 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
103 {
104 int perm_mode = 0;
105
106 if (attrs->perms & ORANGEFS_O_EXECUTE)
107 perm_mode |= S_IXOTH;
108 if (attrs->perms & ORANGEFS_O_WRITE)
109 perm_mode |= S_IWOTH;
110 if (attrs->perms & ORANGEFS_O_READ)
111 perm_mode |= S_IROTH;
112
113 if (attrs->perms & ORANGEFS_G_EXECUTE)
114 perm_mode |= S_IXGRP;
115 if (attrs->perms & ORANGEFS_G_WRITE)
116 perm_mode |= S_IWGRP;
117 if (attrs->perms & ORANGEFS_G_READ)
118 perm_mode |= S_IRGRP;
119
120 if (attrs->perms & ORANGEFS_U_EXECUTE)
121 perm_mode |= S_IXUSR;
122 if (attrs->perms & ORANGEFS_U_WRITE)
123 perm_mode |= S_IWUSR;
124 if (attrs->perms & ORANGEFS_U_READ)
125 perm_mode |= S_IRUSR;
126
127 if (attrs->perms & ORANGEFS_G_SGID)
128 perm_mode |= S_ISGID;
129 if (attrs->perms & ORANGEFS_U_SUID)
130 perm_mode |= S_ISUID;
131
132 return perm_mode;
133 }
134
135 /*
136 * NOTE: in kernel land, we never use the sys_attr->link_target for
137 * anything, so don't bother copying it into the sys_attr object here.
138 */
copy_attributes_from_inode(struct inode * inode,struct ORANGEFS_sys_attr_s * attrs)139 static inline void copy_attributes_from_inode(struct inode *inode,
140 struct ORANGEFS_sys_attr_s *attrs)
141 {
142 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
143 attrs->mask = 0;
144 if (orangefs_inode->attr_valid & ATTR_UID) {
145 attrs->owner = from_kuid(&init_user_ns, inode->i_uid);
146 attrs->mask |= ORANGEFS_ATTR_SYS_UID;
147 gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
148 }
149 if (orangefs_inode->attr_valid & ATTR_GID) {
150 attrs->group = from_kgid(&init_user_ns, inode->i_gid);
151 attrs->mask |= ORANGEFS_ATTR_SYS_GID;
152 gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
153 }
154
155 if (orangefs_inode->attr_valid & ATTR_ATIME) {
156 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
157 if (orangefs_inode->attr_valid & ATTR_ATIME_SET) {
158 attrs->atime = (time64_t)inode->i_atime.tv_sec;
159 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
160 }
161 }
162 if (orangefs_inode->attr_valid & ATTR_MTIME) {
163 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
164 if (orangefs_inode->attr_valid & ATTR_MTIME_SET) {
165 attrs->mtime = (time64_t)inode->i_mtime.tv_sec;
166 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
167 }
168 }
169 if (orangefs_inode->attr_valid & ATTR_CTIME)
170 attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
171
172 /*
173 * ORANGEFS cannot set size with a setattr operation. Probably not
174 * likely to be requested through the VFS, but just in case, don't
175 * worry about ATTR_SIZE
176 */
177
178 if (orangefs_inode->attr_valid & ATTR_MODE) {
179 attrs->perms = ORANGEFS_util_translate_mode(inode->i_mode);
180 attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
181 }
182 }
183
orangefs_inode_type(enum orangefs_ds_type objtype)184 static int orangefs_inode_type(enum orangefs_ds_type objtype)
185 {
186 if (objtype == ORANGEFS_TYPE_METAFILE)
187 return S_IFREG;
188 else if (objtype == ORANGEFS_TYPE_DIRECTORY)
189 return S_IFDIR;
190 else if (objtype == ORANGEFS_TYPE_SYMLINK)
191 return S_IFLNK;
192 else
193 return -1;
194 }
195
orangefs_make_bad_inode(struct inode * inode)196 static void orangefs_make_bad_inode(struct inode *inode)
197 {
198 if (is_root_handle(inode)) {
199 /*
200 * if this occurs, the pvfs2-client-core was killed but we
201 * can't afford to lose the inode operations and such
202 * associated with the root handle in any case.
203 */
204 gossip_debug(GOSSIP_UTILS_DEBUG,
205 "*** NOT making bad root inode %pU\n",
206 get_khandle_from_ino(inode));
207 } else {
208 gossip_debug(GOSSIP_UTILS_DEBUG,
209 "*** making bad inode %pU\n",
210 get_khandle_from_ino(inode));
211 make_bad_inode(inode);
212 }
213 }
214
orangefs_inode_is_stale(struct inode * inode,struct ORANGEFS_sys_attr_s * attrs,char * link_target)215 static int orangefs_inode_is_stale(struct inode *inode,
216 struct ORANGEFS_sys_attr_s *attrs, char *link_target)
217 {
218 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
219 int type = orangefs_inode_type(attrs->objtype);
220 /*
221 * If the inode type or symlink target have changed then this
222 * inode is stale.
223 */
224 if (type == -1 || inode_wrong_type(inode, type)) {
225 orangefs_make_bad_inode(inode);
226 return 1;
227 }
228 if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
229 link_target, ORANGEFS_NAME_MAX)) {
230 orangefs_make_bad_inode(inode);
231 return 1;
232 }
233 return 0;
234 }
235
orangefs_inode_getattr(struct inode * inode,int flags)236 int orangefs_inode_getattr(struct inode *inode, int flags)
237 {
238 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
239 struct orangefs_kernel_op_s *new_op;
240 loff_t inode_size;
241 int ret, type;
242
243 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU flags %d\n",
244 __func__, get_khandle_from_ino(inode), flags);
245
246 again:
247 spin_lock(&inode->i_lock);
248 /* Must have all the attributes in the mask and be within cache time. */
249 if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) ||
250 orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) {
251 if (orangefs_inode->attr_valid) {
252 spin_unlock(&inode->i_lock);
253 write_inode_now(inode, 1);
254 goto again;
255 }
256 spin_unlock(&inode->i_lock);
257 return 0;
258 }
259 spin_unlock(&inode->i_lock);
260
261 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
262 if (!new_op)
263 return -ENOMEM;
264 new_op->upcall.req.getattr.refn = orangefs_inode->refn;
265 /*
266 * Size is the hardest attribute to get. The incremental cost of any
267 * other attribute is essentially zero.
268 */
269 if (flags)
270 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
271 else
272 new_op->upcall.req.getattr.mask =
273 ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
274
275 ret = service_operation(new_op, __func__,
276 get_interruptible_flag(inode));
277 if (ret != 0)
278 goto out;
279
280 again2:
281 spin_lock(&inode->i_lock);
282 /* Must have all the attributes in the mask and be within cache time. */
283 if ((!flags && time_before(jiffies, orangefs_inode->getattr_time)) ||
284 orangefs_inode->attr_valid || inode->i_state & I_DIRTY_PAGES) {
285 if (orangefs_inode->attr_valid) {
286 spin_unlock(&inode->i_lock);
287 write_inode_now(inode, 1);
288 goto again2;
289 }
290 if (inode->i_state & I_DIRTY_PAGES) {
291 ret = 0;
292 goto out_unlock;
293 }
294 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: in cache or dirty\n",
295 __func__);
296 ret = 0;
297 goto out_unlock;
298 }
299
300 if (!(flags & ORANGEFS_GETATTR_NEW)) {
301 ret = orangefs_inode_is_stale(inode,
302 &new_op->downcall.resp.getattr.attributes,
303 new_op->downcall.resp.getattr.link_target);
304 if (ret) {
305 ret = -ESTALE;
306 goto out_unlock;
307 }
308 }
309
310 type = orangefs_inode_type(new_op->
311 downcall.resp.getattr.attributes.objtype);
312 switch (type) {
313 case S_IFREG:
314 inode->i_flags = orangefs_inode_flags(&new_op->
315 downcall.resp.getattr.attributes);
316 if (flags) {
317 inode_size = (loff_t)new_op->
318 downcall.resp.getattr.attributes.size;
319 inode->i_size = inode_size;
320 inode->i_blkbits = ffs(new_op->downcall.resp.getattr.
321 attributes.blksize);
322 inode->i_bytes = inode_size;
323 inode->i_blocks =
324 (inode_size + 512 - inode_size % 512)/512;
325 }
326 break;
327 case S_IFDIR:
328 if (flags) {
329 inode->i_size = PAGE_SIZE;
330 inode_set_bytes(inode, inode->i_size);
331 }
332 set_nlink(inode, 1);
333 break;
334 case S_IFLNK:
335 if (flags & ORANGEFS_GETATTR_NEW) {
336 inode->i_size = (loff_t)strlen(new_op->
337 downcall.resp.getattr.link_target);
338 ret = strscpy(orangefs_inode->link_target,
339 new_op->downcall.resp.getattr.link_target,
340 ORANGEFS_NAME_MAX);
341 if (ret == -E2BIG) {
342 ret = -EIO;
343 goto out_unlock;
344 }
345 inode->i_link = orangefs_inode->link_target;
346 }
347 break;
348 /* i.e. -1 */
349 default:
350 /* XXX: ESTALE? This is what is done if it is not new. */
351 orangefs_make_bad_inode(inode);
352 ret = -ESTALE;
353 goto out_unlock;
354 }
355
356 inode->i_uid = make_kuid(&init_user_ns, new_op->
357 downcall.resp.getattr.attributes.owner);
358 inode->i_gid = make_kgid(&init_user_ns, new_op->
359 downcall.resp.getattr.attributes.group);
360 inode->i_atime.tv_sec = (time64_t)new_op->
361 downcall.resp.getattr.attributes.atime;
362 inode->i_mtime.tv_sec = (time64_t)new_op->
363 downcall.resp.getattr.attributes.mtime;
364 inode->i_ctime.tv_sec = (time64_t)new_op->
365 downcall.resp.getattr.attributes.ctime;
366 inode->i_atime.tv_nsec = 0;
367 inode->i_mtime.tv_nsec = 0;
368 inode->i_ctime.tv_nsec = 0;
369
370 /* special case: mark the root inode as sticky */
371 inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
372 orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
373
374 orangefs_inode->getattr_time = jiffies +
375 orangefs_getattr_timeout_msecs*HZ/1000;
376 ret = 0;
377 out_unlock:
378 spin_unlock(&inode->i_lock);
379 out:
380 op_release(new_op);
381 return ret;
382 }
383
orangefs_inode_check_changed(struct inode * inode)384 int orangefs_inode_check_changed(struct inode *inode)
385 {
386 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
387 struct orangefs_kernel_op_s *new_op;
388 int ret;
389
390 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
391 get_khandle_from_ino(inode));
392
393 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
394 if (!new_op)
395 return -ENOMEM;
396 new_op->upcall.req.getattr.refn = orangefs_inode->refn;
397 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
398 ORANGEFS_ATTR_SYS_LNK_TARGET;
399
400 ret = service_operation(new_op, __func__,
401 get_interruptible_flag(inode));
402 if (ret != 0)
403 goto out;
404
405 ret = orangefs_inode_is_stale(inode,
406 &new_op->downcall.resp.getattr.attributes,
407 new_op->downcall.resp.getattr.link_target);
408 out:
409 op_release(new_op);
410 return ret;
411 }
412
413 /*
414 * issues a orangefs setattr request to make sure the new attribute values
415 * take effect if successful. returns 0 on success; -errno otherwise
416 */
orangefs_inode_setattr(struct inode * inode)417 int orangefs_inode_setattr(struct inode *inode)
418 {
419 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
420 struct orangefs_kernel_op_s *new_op;
421 int ret;
422
423 new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
424 if (!new_op)
425 return -ENOMEM;
426
427 spin_lock(&inode->i_lock);
428 new_op->upcall.uid = from_kuid(&init_user_ns, orangefs_inode->attr_uid);
429 new_op->upcall.gid = from_kgid(&init_user_ns, orangefs_inode->attr_gid);
430 new_op->upcall.req.setattr.refn = orangefs_inode->refn;
431 copy_attributes_from_inode(inode,
432 &new_op->upcall.req.setattr.attributes);
433 orangefs_inode->attr_valid = 0;
434 if (!new_op->upcall.req.setattr.attributes.mask) {
435 spin_unlock(&inode->i_lock);
436 op_release(new_op);
437 return 0;
438 }
439 spin_unlock(&inode->i_lock);
440
441 ret = service_operation(new_op, __func__,
442 get_interruptible_flag(inode) | ORANGEFS_OP_WRITEBACK);
443 gossip_debug(GOSSIP_UTILS_DEBUG,
444 "orangefs_inode_setattr: returning %d\n", ret);
445 if (ret)
446 orangefs_make_bad_inode(inode);
447
448 op_release(new_op);
449
450 if (ret == 0)
451 orangefs_inode->getattr_time = jiffies - 1;
452 return ret;
453 }
454
455 /*
456 * The following is a very dirty hack that is now a permanent part of the
457 * ORANGEFS protocol. See protocol.h for more error definitions.
458 */
459
460 /* The order matches include/orangefs-types.h in the OrangeFS source. */
461 static int PINT_errno_mapping[] = {
462 0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
463 EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
464 EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
465 ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
466 EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
467 EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
468 ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
469 EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
470 ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
471 EACCES, ECONNRESET, ERANGE
472 };
473
orangefs_normalize_to_errno(__s32 error_code)474 int orangefs_normalize_to_errno(__s32 error_code)
475 {
476 __u32 i;
477
478 /* Success */
479 if (error_code == 0) {
480 return 0;
481 /*
482 * This shouldn't ever happen. If it does it should be fixed on the
483 * server.
484 */
485 } else if (error_code > 0) {
486 gossip_err("orangefs: error status received.\n");
487 gossip_err("orangefs: assuming error code is inverted.\n");
488 error_code = -error_code;
489 }
490
491 /*
492 * XXX: This is very bad since error codes from ORANGEFS may not be
493 * suitable for return into userspace.
494 */
495
496 /*
497 * Convert ORANGEFS error values into errno values suitable for return
498 * from the kernel.
499 */
500 if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
501 if (((-error_code) &
502 (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
503 ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
504 /*
505 * cancellation error codes generally correspond to
506 * a timeout from the client's perspective
507 */
508 error_code = -ETIMEDOUT;
509 } else {
510 /* assume a default error code */
511 gossip_err("%s: bad error code :%d:.\n",
512 __func__,
513 error_code);
514 error_code = -EINVAL;
515 }
516
517 /* Convert ORANGEFS encoded errno values into regular errno values. */
518 } else if ((-error_code) & ORANGEFS_ERROR_BIT) {
519 i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
520 if (i < ARRAY_SIZE(PINT_errno_mapping))
521 error_code = -PINT_errno_mapping[i];
522 else
523 error_code = -EINVAL;
524
525 /*
526 * Only ORANGEFS protocol error codes should ever come here. Otherwise
527 * there is a bug somewhere.
528 */
529 } else {
530 gossip_err("%s: unknown error code.\n", __func__);
531 error_code = -EINVAL;
532 }
533 return error_code;
534 }
535
536 #define NUM_MODES 11
ORANGEFS_util_translate_mode(int mode)537 __s32 ORANGEFS_util_translate_mode(int mode)
538 {
539 int ret = 0;
540 int i = 0;
541 static int modes[NUM_MODES] = {
542 S_IXOTH, S_IWOTH, S_IROTH,
543 S_IXGRP, S_IWGRP, S_IRGRP,
544 S_IXUSR, S_IWUSR, S_IRUSR,
545 S_ISGID, S_ISUID
546 };
547 static int orangefs_modes[NUM_MODES] = {
548 ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
549 ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
550 ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
551 ORANGEFS_G_SGID, ORANGEFS_U_SUID
552 };
553
554 for (i = 0; i < NUM_MODES; i++)
555 if (mode & modes[i])
556 ret |= orangefs_modes[i];
557
558 return ret;
559 }
560 #undef NUM_MODES
561