1 /*
2  * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32 
33 #include "xfs.h"
34 #include "xfs_macros.h"
35 #include "xfs_types.h"
36 #include "xfs_inum.h"
37 #include "xfs_log.h"
38 #include "xfs_trans.h"
39 #include "xfs_sb.h"
40 #include "xfs_ag.h"
41 #include "xfs_dir.h"
42 #include "xfs_dir2.h"
43 #include "xfs_dmapi.h"
44 #include "xfs_mount.h"
45 #include "xfs_alloc_btree.h"
46 #include "xfs_bmap_btree.h"
47 #include "xfs_ialloc_btree.h"
48 #include "xfs_itable.h"
49 #include "xfs_btree.h"
50 #include "xfs_ialloc.h"
51 #include "xfs_alloc.h"
52 #include "xfs_attr_sf.h"
53 #include "xfs_dir_sf.h"
54 #include "xfs_dir2_sf.h"
55 #include "xfs_dinode.h"
56 #include "xfs_inode_item.h"
57 #include "xfs_inode.h"
58 #include "xfs_bmap.h"
59 #include "xfs_da_btree.h"
60 #include "xfs_attr.h"
61 #include "xfs_rw.h"
62 #include "xfs_refcache.h"
63 #include "xfs_error.h"
64 #include "xfs_bit.h"
65 #include "xfs_rtalloc.h"
66 #include "xfs_quota.h"
67 #include "xfs_utils.h"
68 #include "xfs_trans_space.h"
69 #include "xfs_dir_leaf.h"
70 #include "xfs_mac.h"
71 #include "xfs_log_priv.h"
72 
73 
74 /*
75  * The maximum pathlen is 1024 bytes. Since the minimum file system
76  * blocksize is 512 bytes, we can get a max of 2 extents back from
77  * bmapi.
78  */
79 #define SYMLINK_MAPS 2
80 
81 /*
82  * For xfs, we check that the file isn't too big to be opened by this kernel.
83  * No other open action is required for regular files.  Devices are handled
84  * through the specfs file system, pipes through fifofs.  Device and
85  * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively,
86  * when a new vnode is first looked up or created.
87  */
88 STATIC int
xfs_open(bhv_desc_t * bdp,cred_t * credp)89 xfs_open(
90 	bhv_desc_t	*bdp,
91 	cred_t		*credp)
92 {
93 	int		mode;
94 	vnode_t		*vp;
95 	xfs_inode_t	*ip;
96 
97 	vp = BHV_TO_VNODE(bdp);
98 	ip = XFS_BHVTOI(bdp);
99 
100 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
101 		return XFS_ERROR(EIO);
102 
103 	/*
104 	 * If it's a directory with any blocks, read-ahead block 0
105 	 * as we're almost certain to have the next operation be a read there.
106 	 */
107 	if (vp->v_type == VDIR && ip->i_d.di_nextents > 0) {
108 		mode = xfs_ilock_map_shared(ip);
109 		if (ip->i_d.di_nextents > 0)
110 			(void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
111 		xfs_iunlock(ip, mode);
112 	}
113 	return 0;
114 }
115 
116 
117 /*
118  * xfs_getattr
119  */
120 STATIC int
xfs_getattr(bhv_desc_t * bdp,vattr_t * vap,int flags,cred_t * credp)121 xfs_getattr(
122 	bhv_desc_t	*bdp,
123 	vattr_t		*vap,
124 	int		flags,
125 	cred_t		*credp)
126 {
127 	xfs_inode_t	*ip;
128 	xfs_mount_t	*mp;
129 	vnode_t		*vp;
130 
131 	vp  = BHV_TO_VNODE(bdp);
132 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
133 
134 	ip = XFS_BHVTOI(bdp);
135 	mp = ip->i_mount;
136 
137 	if (XFS_FORCED_SHUTDOWN(mp))
138 		return XFS_ERROR(EIO);
139 
140 	if (!(flags & ATTR_LAZY))
141 		xfs_ilock(ip, XFS_ILOCK_SHARED);
142 
143 	vap->va_size = ip->i_d.di_size;
144 	if (vap->va_mask == XFS_AT_SIZE)
145 		goto all_done;
146 
147 	vap->va_nblocks =
148 		XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
149 	vap->va_nodeid = ip->i_ino;
150 #if XFS_BIG_INUMS
151 	vap->va_nodeid += mp->m_inoadd;
152 #endif
153 	vap->va_nlink = ip->i_d.di_nlink;
154 
155 	/*
156 	 * Quick exit for non-stat callers
157 	 */
158 	if ((vap->va_mask &
159 	    ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID|
160 	      XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0)
161 		goto all_done;
162 
163 	/*
164 	 * Copy from in-core inode.
165 	 */
166 	vap->va_type = vp->v_type;
167 	vap->va_mode = ip->i_d.di_mode & MODEMASK;
168 	vap->va_uid = ip->i_d.di_uid;
169 	vap->va_gid = ip->i_d.di_gid;
170 	vap->va_projid = ip->i_d.di_projid;
171 
172 	/*
173 	 * Check vnode type block/char vs. everything else.
174 	 * Do it with bitmask because that's faster than looking
175 	 * for multiple values individually.
176 	 */
177 	if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
178 		vap->va_rdev = 0;
179 
180 		if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
181 
182 #if 0
183 			/* Large block sizes confuse various
184 			 * user space programs, so letting the
185 			 * stripe size through is not a good
186 			 * idea for now.
187 			 */
188 			vap->va_blocksize = mp->m_swidth ?
189 				/*
190 				 * If the underlying volume is a stripe, then
191 				 * return the stripe width in bytes as the
192 				 * recommended I/O size.
193 				 */
194 				(mp->m_swidth << mp->m_sb.sb_blocklog) :
195 				/*
196 				 * Return the largest of the preferred buffer
197 				 * sizes since doing small I/Os into larger
198 				 * buffers causes buffers to be decommissioned.
199 				 * The value returned is in bytes.
200 				 */
201 				(1 << (int)MAX(mp->m_readio_log,
202 					       mp->m_writeio_log));
203 
204 #else
205 			vap->va_blocksize =
206 				/*
207 				 * Return the largest of the preferred buffer
208 				 * sizes since doing small I/Os into larger
209 				 * buffers causes buffers to be decommissioned.
210 				 * The value returned is in bytes.
211 				 */
212 				1 << (int)MAX(mp->m_readio_log,
213 					       mp->m_writeio_log);
214 #endif
215 		} else {
216 
217 			/*
218 			 * If the file blocks are being allocated from a
219 			 * realtime partition, then return the inode's
220 			 * realtime extent size or the realtime volume's
221 			 * extent size.
222 			 */
223 			vap->va_blocksize = ip->i_d.di_extsize ?
224 				(ip->i_d.di_extsize << mp->m_sb.sb_blocklog) :
225 				(mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
226 		}
227 	} else {
228 		vap->va_rdev = ip->i_df.if_u2.if_rdev;
229 		vap->va_blocksize = BLKDEV_IOSIZE;
230 	}
231 
232 	vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec;
233 	vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
234 	vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
235 	vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
236 	vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
237 	vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
238 
239 	/*
240 	 * Exit for stat callers.  See if any of the rest of the fields
241 	 * to be filled in are needed.
242 	 */
243 	if ((vap->va_mask &
244 	     (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
245 	      XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
246 		goto all_done;
247 
248 	/*
249 	 * Convert di_flags to xflags.
250 	 */
251 	vap->va_xflags = xfs_dic2xflags(&ip->i_d, ARCH_NOCONVERT);
252 
253 	/*
254 	 * Exit for inode revalidate.  See if any of the rest of
255 	 * the fields to be filled in are needed.
256 	 */
257 	if ((vap->va_mask &
258 	     (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
259 	      XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
260 		goto all_done;
261 
262 	vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog;
263 	vap->va_nextents =
264 		(ip->i_df.if_flags & XFS_IFEXTENTS) ?
265 			ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) :
266 			ip->i_d.di_nextents;
267 	if (ip->i_afp)
268 		vap->va_anextents =
269 			(ip->i_afp->if_flags & XFS_IFEXTENTS) ?
270 				ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) :
271 				 ip->i_d.di_anextents;
272 	else
273 		vap->va_anextents = 0;
274 	vap->va_gen = ip->i_d.di_gen;
275 
276  all_done:
277 	if (!(flags & ATTR_LAZY))
278 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
279 	return 0;
280 }
281 
282 
283 /*
284  * xfs_setattr
285  */
286 int
xfs_setattr(bhv_desc_t * bdp,vattr_t * vap,int flags,cred_t * credp)287 xfs_setattr(
288 	bhv_desc_t		*bdp,
289 	vattr_t			*vap,
290 	int			flags,
291 	cred_t			*credp)
292 {
293 	xfs_inode_t		*ip;
294 	xfs_trans_t		*tp;
295 	xfs_mount_t		*mp;
296 	int			mask;
297 	int			code;
298 	uint			lock_flags;
299 	uint			commit_flags=0;
300 	uid_t			uid=0, iuid=0;
301 	gid_t			gid=0, igid=0;
302 	int			timeflags = 0;
303 	vnode_t			*vp;
304 	xfs_prid_t		projid=0, iprojid=0;
305 	int			mandlock_before, mandlock_after;
306 	struct xfs_dquot	*udqp, *gdqp, *olddquot1, *olddquot2;
307 	int			file_owner;
308 	int			need_iolock = (flags & ATTR_DMI) == 0;
309 
310 	vp = BHV_TO_VNODE(bdp);
311 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
312 
313 	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
314 		return XFS_ERROR(EROFS);
315 
316 	/*
317 	 * Cannot set certain attributes.
318 	 */
319 	mask = vap->va_mask;
320 	if (mask & XFS_AT_NOSET) {
321 		return XFS_ERROR(EINVAL);
322 	}
323 
324 	ip = XFS_BHVTOI(bdp);
325 	mp = ip->i_mount;
326 
327 	if (XFS_FORCED_SHUTDOWN(mp))
328 		return XFS_ERROR(EIO);
329 
330 	/*
331 	 * Timestamps do not need to be logged and hence do not
332 	 * need to be done within a transaction.
333 	 */
334 	if (mask & XFS_AT_UPDTIMES) {
335 		ASSERT((mask & ~XFS_AT_UPDTIMES) == 0);
336 		timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) |
337 			    ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) |
338 			    ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0);
339 		xfs_ichgtime(ip, timeflags);
340 		return 0;
341 	}
342 
343 	olddquot1 = olddquot2 = NULL;
344 	udqp = gdqp = NULL;
345 
346 	/*
347 	 * If disk quotas is on, we make sure that the dquots do exist on disk,
348 	 * before we start any other transactions. Trying to do this later
349 	 * is messy. We don't care to take a readlock to look at the ids
350 	 * in inode here, because we can't hold it across the trans_reserve.
351 	 * If the IDs do change before we take the ilock, we're covered
352 	 * because the i_*dquot fields will get updated anyway.
353 	 */
354 	if (XFS_IS_QUOTA_ON(mp) && (mask & (XFS_AT_UID|XFS_AT_GID))) {
355 		uint	qflags = 0;
356 
357 		if (mask & XFS_AT_UID) {
358 			uid = vap->va_uid;
359 			qflags |= XFS_QMOPT_UQUOTA;
360 		} else {
361 			uid = ip->i_d.di_uid;
362 		}
363 		if (mask & XFS_AT_GID) {
364 			gid = vap->va_gid;
365 			qflags |= XFS_QMOPT_GQUOTA;
366 		}  else {
367 			gid = ip->i_d.di_gid;
368 		}
369 		/*
370 		 * We take a reference when we initialize udqp and gdqp,
371 		 * so it is important that we never blindly double trip on
372 		 * the same variable. See xfs_create() for an example.
373 		 */
374 		ASSERT(udqp == NULL);
375 		ASSERT(gdqp == NULL);
376 		code = XFS_QM_DQVOPALLOC(mp, ip, uid,gid, qflags, &udqp, &gdqp);
377 		if (code)
378 			return (code);
379 	}
380 
381 	/*
382 	 * For the other attributes, we acquire the inode lock and
383 	 * first do an error checking pass.
384 	 */
385 	tp = NULL;
386 	lock_flags = XFS_ILOCK_EXCL;
387 	if (!(mask & XFS_AT_SIZE)) {
388 		if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) ||
389 		    (mp->m_flags & XFS_MOUNT_WSYNC)) {
390 			tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
391 			commit_flags = 0;
392 			if ((code = xfs_trans_reserve(tp, 0,
393 						     XFS_ICHANGE_LOG_RES(mp), 0,
394 						     0, 0))) {
395 				lock_flags = 0;
396 				goto error_return;
397 			}
398 		}
399 	} else {
400 		if (DM_EVENT_ENABLED (vp->v_vfsp, ip, DM_EVENT_TRUNCATE) &&
401 		    !(flags & ATTR_DMI)) {
402 			int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
403 			code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp,
404 				vap->va_size, 0, dmflags, NULL);
405 			if (code) {
406 				lock_flags = 0;
407 				goto error_return;
408 			}
409 		}
410 		if (need_iolock)
411 			lock_flags |= XFS_IOLOCK_EXCL;
412 	}
413 
414 	xfs_ilock(ip, lock_flags);
415 
416 	/* boolean: are we the file owner? */
417 	file_owner = (current_fsuid(credp) == ip->i_d.di_uid);
418 
419 	/*
420 	 * Change various properties of a file.
421 	 * Only the owner or users with CAP_FOWNER
422 	 * capability may do these things.
423 	 */
424 	if (mask &
425 	    (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID|
426 	     XFS_AT_GID|XFS_AT_PROJID)) {
427 		/*
428 		 * CAP_FOWNER overrides the following restrictions:
429 		 *
430 		 * The user ID of the calling process must be equal
431 		 * to the file owner ID, except in cases where the
432 		 * CAP_FSETID capability is applicable.
433 		 */
434 		if (!file_owner && !capable(CAP_FOWNER)) {
435 			code = XFS_ERROR(EPERM);
436 			goto error_return;
437 		}
438 
439 		/*
440 		 * CAP_FSETID overrides the following restrictions:
441 		 *
442 		 * The effective user ID of the calling process shall match
443 		 * the file owner when setting the set-user-ID and
444 		 * set-group-ID bits on that file.
445 		 *
446 		 * The effective group ID or one of the supplementary group
447 		 * IDs of the calling process shall match the group owner of
448 		 * the file when setting the set-group-ID bit on that file
449 		 */
450 		if (mask & XFS_AT_MODE) {
451 			mode_t m = 0;
452 
453 			if ((vap->va_mode & S_ISUID) && !file_owner)
454 				m |= S_ISUID;
455 			if ((vap->va_mode & S_ISGID) &&
456 			    !in_group_p((gid_t)ip->i_d.di_gid))
457 				m |= S_ISGID;
458 #if 0
459 			/* Linux allows this, Irix doesn't. */
460 			if ((vap->va_mode & S_ISVTX) && vp->v_type != VDIR)
461 				m |= S_ISVTX;
462 #endif
463 			if (m && !capable(CAP_FSETID))
464 				vap->va_mode &= ~m;
465 		}
466 	}
467 
468 	/*
469 	 * Change file ownership.  Must be the owner or privileged.
470 	 * If the system was configured with the "restricted_chown"
471 	 * option, the owner is not permitted to give away the file,
472 	 * and can change the group id only to a group of which he
473 	 * or she is a member.
474 	 */
475 	if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
476 		/*
477 		 * These IDs could have changed since we last looked at them.
478 		 * But, we're assured that if the ownership did change
479 		 * while we didn't have the inode locked, inode's dquot(s)
480 		 * would have changed also.
481 		 */
482 		iuid = ip->i_d.di_uid;
483 		iprojid = ip->i_d.di_projid;
484 		igid = ip->i_d.di_gid;
485 		gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
486 		uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid;
487 		projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
488 			 iprojid;
489 
490 		/*
491 		 * CAP_CHOWN overrides the following restrictions:
492 		 *
493 		 * If _POSIX_CHOWN_RESTRICTED is defined, this capability
494 		 * shall override the restriction that a process cannot
495 		 * change the user ID of a file it owns and the restriction
496 		 * that the group ID supplied to the chown() function
497 		 * shall be equal to either the group ID or one of the
498 		 * supplementary group IDs of the calling process.
499 		 *
500 		 * XXX: How does restricted_chown affect projid?
501 		 */
502 		if (restricted_chown &&
503 		    (iuid != uid || (igid != gid &&
504 				     !in_group_p((gid_t)gid))) &&
505 		    !capable(CAP_CHOWN)) {
506 			code = XFS_ERROR(EPERM);
507 			goto error_return;
508 		}
509 		/*
510 		 * Do a quota reservation only if uid or gid is actually
511 		 * going to change.
512 		 */
513 		if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
514 		    (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
515 			ASSERT(tp);
516 			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
517 						capable(CAP_FOWNER) ?
518 						XFS_QMOPT_FORCE_RES : 0);
519 			if (code)	/* out of quota */
520 				goto error_return;
521 		}
522 	}
523 
524 	/*
525 	 * Truncate file.  Must have write permission and not be a directory.
526 	 */
527 	if (mask & XFS_AT_SIZE) {
528 		/* Short circuit the truncate case for zero length files */
529 		if ((vap->va_size == 0) &&
530 		   (ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) {
531 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
532 			lock_flags &= ~XFS_ILOCK_EXCL;
533 			if (mask & XFS_AT_CTIME)
534 				xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
535 			code = 0;
536 			goto error_return;
537 		}
538 
539 		if (vp->v_type == VDIR) {
540 			code = XFS_ERROR(EISDIR);
541 			goto error_return;
542 		} else if (vp->v_type != VREG) {
543 			code = XFS_ERROR(EINVAL);
544 			goto error_return;
545 		}
546 		/*
547 		 * Make sure that the dquots are attached to the inode.
548 		 */
549 		if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED)))
550 			goto error_return;
551 	}
552 
553 	/*
554 	 * Change file access or modified times.
555 	 */
556 	if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
557 		if (!file_owner) {
558 			if ((flags & ATTR_UTIME) &&
559 			    !capable(CAP_FOWNER)) {
560 				code = XFS_ERROR(EPERM);
561 				goto error_return;
562 			}
563 		}
564 	}
565 
566 	/*
567 	 * Change extent size or realtime flag.
568 	 */
569 	if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
570 		/*
571 		 * Can't change extent size if any extents are allocated.
572 		 */
573 		if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
574 		    (mask & XFS_AT_EXTSIZE) &&
575 		    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
576 		     vap->va_extsize) ) {
577 			code = XFS_ERROR(EINVAL);	/* EFBIG? */
578 			goto error_return;
579 		}
580 
581 		/*
582 		 * Can't set extent size unless the file is marked, or
583 		 * about to be marked as a realtime file.
584 		 *
585 		 * This check will be removed when fixed size extents
586 		 * with buffered data writes is implemented.
587 		 *
588 		 */
589 		if ((mask & XFS_AT_EXTSIZE)			&&
590 		    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
591 		     vap->va_extsize) &&
592 		    (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
593 		       ((mask & XFS_AT_XFLAGS) &&
594 			(vap->va_xflags & XFS_XFLAG_REALTIME))))) {
595 			code = XFS_ERROR(EINVAL);
596 			goto error_return;
597 		}
598 
599 		/*
600 		 * Can't change realtime flag if any extents are allocated.
601 		 */
602 		if (ip->i_d.di_nextents && (mask & XFS_AT_XFLAGS) &&
603 		    (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) !=
604 		    (vap->va_xflags & XFS_XFLAG_REALTIME)) {
605 			code = XFS_ERROR(EINVAL);	/* EFBIG? */
606 			goto error_return;
607 		}
608 		/*
609 		 * Extent size must be a multiple of the appropriate block
610 		 * size, if set at all.
611 		 */
612 		if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) {
613 			xfs_extlen_t	size;
614 
615 			if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
616 			    ((mask & XFS_AT_XFLAGS) &&
617 			    (vap->va_xflags & XFS_XFLAG_REALTIME))) {
618 				size = mp->m_sb.sb_rextsize <<
619 				       mp->m_sb.sb_blocklog;
620 			} else {
621 				size = mp->m_sb.sb_blocksize;
622 			}
623 			if (vap->va_extsize % size) {
624 				code = XFS_ERROR(EINVAL);
625 				goto error_return;
626 			}
627 		}
628 		/*
629 		 * If realtime flag is set then must have realtime data.
630 		 */
631 		if ((mask & XFS_AT_XFLAGS) &&
632 		    (vap->va_xflags & XFS_XFLAG_REALTIME)) {
633 			if ((mp->m_sb.sb_rblocks == 0) ||
634 			    (mp->m_sb.sb_rextsize == 0) ||
635 			    (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
636 				code = XFS_ERROR(EINVAL);
637 				goto error_return;
638 			}
639 		}
640 
641 		/*
642 		 * Can't modify an immutable/append-only file unless
643 		 * we have appropriate permission.
644 		 */
645 		if ((mask & XFS_AT_XFLAGS) &&
646 		    (ip->i_d.di_flags &
647 				(XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
648 		     (vap->va_xflags &
649 				(XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
650 		    !capable(CAP_LINUX_IMMUTABLE)) {
651 			code = XFS_ERROR(EPERM);
652 			goto error_return;
653 		}
654 	}
655 
656 	/*
657 	 * Now we can make the changes.  Before we join the inode
658 	 * to the transaction, if XFS_AT_SIZE is set then take care of
659 	 * the part of the truncation that must be done without the
660 	 * inode lock.  This needs to be done before joining the inode
661 	 * to the transaction, because the inode cannot be unlocked
662 	 * once it is a part of the transaction.
663 	 */
664 	if (mask & XFS_AT_SIZE) {
665 		code = 0;
666 		if (vap->va_size > ip->i_d.di_size)
667 			code = xfs_igrow_start(ip, vap->va_size, credp);
668 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
669 		if (!code)
670 			code = xfs_itruncate_data(ip, vap->va_size);
671 		if (code) {
672 			ASSERT(tp == NULL);
673 			lock_flags &= ~XFS_ILOCK_EXCL;
674 			ASSERT(lock_flags == XFS_IOLOCK_EXCL);
675 			goto error_return;
676 		}
677 		tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
678 		if ((code = xfs_trans_reserve(tp, 0,
679 					     XFS_ITRUNCATE_LOG_RES(mp), 0,
680 					     XFS_TRANS_PERM_LOG_RES,
681 					     XFS_ITRUNCATE_LOG_COUNT))) {
682 			xfs_trans_cancel(tp, 0);
683 			if (need_iolock)
684 				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
685 			return code;
686 		}
687 		commit_flags = XFS_TRANS_RELEASE_LOG_RES;
688 		xfs_ilock(ip, XFS_ILOCK_EXCL);
689 	}
690 
691 	if (tp) {
692 		xfs_trans_ijoin(tp, ip, lock_flags);
693 		xfs_trans_ihold(tp, ip);
694 	}
695 
696 	/* determine whether mandatory locking mode changes */
697 	mandlock_before = MANDLOCK(vp, ip->i_d.di_mode);
698 
699 	/*
700 	 * Truncate file.  Must have write permission and not be a directory.
701 	 */
702 	if (mask & XFS_AT_SIZE) {
703 		if (vap->va_size > ip->i_d.di_size) {
704 			xfs_igrow_finish(tp, ip, vap->va_size,
705 			    !(flags & ATTR_DMI));
706 		} else if ((vap->va_size <= ip->i_d.di_size) ||
707 			   ((vap->va_size == 0) && ip->i_d.di_nextents)) {
708 			/*
709 			 * signal a sync transaction unless
710 			 * we're truncating an already unlinked
711 			 * file on a wsync filesystem
712 			 */
713 			code = xfs_itruncate_finish(&tp, ip,
714 					    (xfs_fsize_t)vap->va_size,
715 					    XFS_DATA_FORK,
716 					    ((ip->i_d.di_nlink != 0 ||
717 					      !(mp->m_flags & XFS_MOUNT_WSYNC))
718 					     ? 1 : 0));
719 			if (code) {
720 				goto abort_return;
721 			}
722 		}
723 		/*
724 		 * Have to do this even if the file's size doesn't change.
725 		 */
726 		timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
727 	}
728 
729 	/*
730 	 * Change file access modes.
731 	 */
732 	if (mask & XFS_AT_MODE) {
733 		ip->i_d.di_mode &= S_IFMT;
734 		ip->i_d.di_mode |= vap->va_mode & ~S_IFMT;
735 
736 		xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
737 		timeflags |= XFS_ICHGTIME_CHG;
738 	}
739 
740 	/*
741 	 * Change file ownership.  Must be the owner or privileged.
742 	 * If the system was configured with the "restricted_chown"
743 	 * option, the owner is not permitted to give away the file,
744 	 * and can change the group id only to a group of which he
745 	 * or she is a member.
746 	 */
747 	if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
748 		/*
749 		 * CAP_FSETID overrides the following restrictions:
750 		 *
751 		 * The set-user-ID and set-group-ID bits of a file will be
752 		 * cleared upon successful return from chown()
753 		 */
754 		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
755 		    !capable(CAP_FSETID)) {
756 			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
757 		}
758 
759 		/*
760 		 * Change the ownerships and register quota modifications
761 		 * in the transaction.
762 		 */
763 		if (iuid != uid) {
764 			if (XFS_IS_UQUOTA_ON(mp)) {
765 				ASSERT(mask & XFS_AT_UID);
766 				ASSERT(udqp);
767 				olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
768 							&ip->i_udquot, udqp);
769 			}
770 			ip->i_d.di_uid = uid;
771 		}
772 		if (igid != gid) {
773 			if (XFS_IS_GQUOTA_ON(mp)) {
774 				ASSERT(mask & XFS_AT_GID);
775 				ASSERT(gdqp);
776 				olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
777 							&ip->i_gdquot, gdqp);
778 			}
779 			ip->i_d.di_gid = gid;
780 		}
781 		if (iprojid != projid) {
782 			ip->i_d.di_projid = projid;
783 			/*
784 			 * We may have to rev the inode as well as
785 			 * the superblock version number since projids didn't
786 			 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
787 			 */
788 			if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
789 				xfs_bump_ino_vers2(tp, ip);
790 		}
791 
792 		xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
793 		timeflags |= XFS_ICHGTIME_CHG;
794 	}
795 
796 
797 	/*
798 	 * Change file access or modified times.
799 	 */
800 	if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
801 		if (mask & XFS_AT_ATIME) {
802 			ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec;
803 			ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec;
804 			ip->i_update_core = 1;
805 			timeflags &= ~XFS_ICHGTIME_ACC;
806 		}
807 		if (mask & XFS_AT_MTIME) {
808 			ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec;
809 			ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec;
810 			timeflags &= ~XFS_ICHGTIME_MOD;
811 			timeflags |= XFS_ICHGTIME_CHG;
812 		}
813 		if (tp && (flags & ATTR_UTIME))
814 			xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
815 	}
816 
817 	/*
818 	 * Change XFS-added attributes.
819 	 */
820 	if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
821 		if (mask & XFS_AT_EXTSIZE) {
822 			/*
823 			 * Converting bytes to fs blocks.
824 			 */
825 			ip->i_d.di_extsize = vap->va_extsize >>
826 				mp->m_sb.sb_blocklog;
827 		}
828 		if (mask & XFS_AT_XFLAGS) {
829 			uint	di_flags;
830 
831 			/* can't set PREALLOC this way, just preserve it */
832 			di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
833 			if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
834 				di_flags |= XFS_DIFLAG_IMMUTABLE;
835 			if (vap->va_xflags & XFS_XFLAG_APPEND)
836 				di_flags |= XFS_DIFLAG_APPEND;
837 			if (vap->va_xflags & XFS_XFLAG_SYNC)
838 				di_flags |= XFS_DIFLAG_SYNC;
839 			if (vap->va_xflags & XFS_XFLAG_NOATIME)
840 				di_flags |= XFS_DIFLAG_NOATIME;
841 			if (vap->va_xflags & XFS_XFLAG_NODUMP)
842 				di_flags |= XFS_DIFLAG_NODUMP;
843 			if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
844 				if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
845 					di_flags |= XFS_DIFLAG_RTINHERIT;
846 				if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
847 					di_flags |= XFS_DIFLAG_NOSYMLINKS;
848 			} else {
849 				if (vap->va_xflags & XFS_XFLAG_REALTIME) {
850 					di_flags |= XFS_DIFLAG_REALTIME;
851 					ip->i_iocore.io_flags |= XFS_IOCORE_RT;
852 				} else {
853 					ip->i_iocore.io_flags &= ~XFS_IOCORE_RT;
854 				}
855 			}
856 			ip->i_d.di_flags = di_flags;
857 		}
858 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
859 		timeflags |= XFS_ICHGTIME_CHG;
860 	}
861 
862 	/*
863 	 * Change file inode change time only if XFS_AT_CTIME set
864 	 * AND we have been called by a DMI function.
865 	 */
866 
867 	if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) {
868 		ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec;
869 		ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec;
870 		ip->i_update_core = 1;
871 		timeflags &= ~XFS_ICHGTIME_CHG;
872 	}
873 
874 	/*
875 	 * Send out timestamp changes that need to be set to the
876 	 * current time.  Not done when called by a DMI function.
877 	 */
878 	if (timeflags && !(flags & ATTR_DMI))
879 		xfs_ichgtime(ip, timeflags);
880 
881 	XFS_STATS_INC(xs_ig_attrchg);
882 
883 	/*
884 	 * If this is a synchronous mount, make sure that the
885 	 * transaction goes to disk before returning to the user.
886 	 * This is slightly sub-optimal in that truncates require
887 	 * two sync transactions instead of one for wsync filesytems.
888 	 * One for the truncate and one for the timestamps since we
889 	 * don't want to change the timestamps unless we're sure the
890 	 * truncate worked.  Truncates are less than 1% of the laddis
891 	 * mix so this probably isn't worth the trouble to optimize.
892 	 */
893 	code = 0;
894 	if (tp) {
895 		if (mp->m_flags & XFS_MOUNT_WSYNC)
896 			xfs_trans_set_sync(tp);
897 
898 		code = xfs_trans_commit(tp, commit_flags, NULL);
899 	}
900 
901 	/*
902 	 * If the (regular) file's mandatory locking mode changed, then
903 	 * notify the vnode.  We do this under the inode lock to prevent
904 	 * racing calls to vop_vnode_change.
905 	 */
906 	mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
907 	if (mandlock_before != mandlock_after) {
908 		VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING,
909 				 mandlock_after);
910 	}
911 
912 	xfs_iunlock(ip, lock_flags);
913 
914 	/*
915 	 * Release any dquot(s) the inode had kept before chown.
916 	 */
917 	XFS_QM_DQRELE(mp, olddquot1);
918 	XFS_QM_DQRELE(mp, olddquot2);
919 	XFS_QM_DQRELE(mp, udqp);
920 	XFS_QM_DQRELE(mp, gdqp);
921 
922 	if (code) {
923 		return code;
924 	}
925 
926 	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_ATTRIBUTE) &&
927 	    !(flags & ATTR_DMI)) {
928 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL,
929 					NULL, DM_RIGHT_NULL, NULL, NULL,
930 					0, 0, AT_DELAY_FLAG(flags));
931 	}
932 	return 0;
933 
934  abort_return:
935 	commit_flags |= XFS_TRANS_ABORT;
936 	/* FALLTHROUGH */
937  error_return:
938 	XFS_QM_DQRELE(mp, udqp);
939 	XFS_QM_DQRELE(mp, gdqp);
940 	if (tp) {
941 		xfs_trans_cancel(tp, commit_flags);
942 	}
943 	if (lock_flags != 0) {
944 		xfs_iunlock(ip, lock_flags);
945 	}
946 	return code;
947 }
948 
949 
950 /*
951  * xfs_access
952  * Null conversion from vnode mode bits to inode mode bits, as in efs.
953  */
954 STATIC int
xfs_access(bhv_desc_t * bdp,int mode,cred_t * credp)955 xfs_access(
956 	bhv_desc_t	*bdp,
957 	int		mode,
958 	cred_t		*credp)
959 {
960 	xfs_inode_t	*ip;
961 	int		error;
962 
963 	vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__,
964 					       (inst_t *)__return_address);
965 
966 	ip = XFS_BHVTOI(bdp);
967 	xfs_ilock(ip, XFS_ILOCK_SHARED);
968 	error = xfs_iaccess(ip, mode, credp);
969 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
970 	return error;
971 }
972 
973 
974 /*
975  * xfs_readlink
976  *
977  */
978 STATIC int
xfs_readlink(bhv_desc_t * bdp,uio_t * uiop,int ioflags,cred_t * credp)979 xfs_readlink(
980 	bhv_desc_t	*bdp,
981 	uio_t		*uiop,
982 	int		ioflags,
983 	cred_t		*credp)
984 {
985 	xfs_inode_t     *ip;
986 	int		count;
987 	xfs_off_t	offset;
988 	int		pathlen;
989 	vnode_t		*vp;
990 	int		error = 0;
991 	xfs_mount_t	*mp;
992 	int             nmaps;
993 	xfs_bmbt_irec_t mval[SYMLINK_MAPS];
994 	xfs_daddr_t	d;
995 	int		byte_cnt;
996 	int		n;
997 	xfs_buf_t	*bp;
998 
999 	vp = BHV_TO_VNODE(bdp);
1000 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
1001 
1002 	ip = XFS_BHVTOI(bdp);
1003 	mp = ip->i_mount;
1004 
1005 	if (XFS_FORCED_SHUTDOWN(mp))
1006 		return XFS_ERROR(EIO);
1007 
1008 	xfs_ilock(ip, XFS_ILOCK_SHARED);
1009 
1010 	ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
1011 
1012 	offset = uiop->uio_offset;
1013 	count = uiop->uio_resid;
1014 
1015 	if (offset < 0) {
1016 		error = XFS_ERROR(EINVAL);
1017 		goto error_return;
1018 	}
1019 	if (count <= 0) {
1020 		error = 0;
1021 		goto error_return;
1022 	}
1023 
1024 	if (!(ioflags & IO_INVIS)) {
1025 		xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
1026 	}
1027 
1028 	/*
1029 	 * See if the symlink is stored inline.
1030 	 */
1031 	pathlen = (int)ip->i_d.di_size;
1032 
1033 	if (ip->i_df.if_flags & XFS_IFINLINE) {
1034 		error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop);
1035 	}
1036 	else {
1037 		/*
1038 		 * Symlink not inline.  Call bmap to get it in.
1039 		 */
1040 		nmaps = SYMLINK_MAPS;
1041 
1042 		error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen),
1043 				  0, NULL, 0, mval, &nmaps, NULL);
1044 
1045 		if (error) {
1046 			goto error_return;
1047 		}
1048 
1049 		for (n = 0; n < nmaps; n++) {
1050 			d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
1051 			byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
1052 			bp = xfs_buf_read(mp->m_ddev_targp, d,
1053 				      BTOBB(byte_cnt), 0);
1054 			error = XFS_BUF_GETERROR(bp);
1055 			if (error) {
1056 				xfs_ioerror_alert("xfs_readlink",
1057 					  ip->i_mount, bp, XFS_BUF_ADDR(bp));
1058 				xfs_buf_relse(bp);
1059 				goto error_return;
1060 			}
1061 			if (pathlen < byte_cnt)
1062 				byte_cnt = pathlen;
1063 			pathlen -= byte_cnt;
1064 
1065 			error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop);
1066 			xfs_buf_relse (bp);
1067 		}
1068 
1069 	}
1070 
1071 
1072 error_return:
1073 
1074 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
1075 
1076 	return error;
1077 }
1078 
1079 
1080 /*
1081  * xfs_fsync
1082  *
1083  * This is called to sync the inode and its data out to disk.
1084  * We need to hold the I/O lock while flushing the data, and
1085  * the inode lock while flushing the inode.  The inode lock CANNOT
1086  * be held while flushing the data, so acquire after we're done
1087  * with that.
1088  */
1089 STATIC int
xfs_fsync(bhv_desc_t * bdp,int flag,cred_t * credp,xfs_off_t start,xfs_off_t stop)1090 xfs_fsync(
1091 	bhv_desc_t	*bdp,
1092 	int		flag,
1093 	cred_t		*credp,
1094 	xfs_off_t	start,
1095 	xfs_off_t	stop)
1096 {
1097 	xfs_inode_t	*ip;
1098 	xfs_trans_t	*tp;
1099 	int		error;
1100 
1101 	vn_trace_entry(BHV_TO_VNODE(bdp),
1102 			__FUNCTION__, (inst_t *)__return_address);
1103 
1104 	ip = XFS_BHVTOI(bdp);
1105 
1106 	ASSERT(start >= 0 && stop >= -1);
1107 
1108 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
1109 		return XFS_ERROR(EIO);
1110 
1111 	/*
1112 	 * We always need to make sure that the required inode state
1113 	 * is safe on disk.  The vnode might be clean but because
1114 	 * of committed transactions that haven't hit the disk yet.
1115 	 * Likewise, there could be unflushed non-transactional
1116 	 * changes to the inode core that have to go to disk.
1117 	 *
1118 	 * The following code depends on one assumption:  that
1119 	 * any transaction that changes an inode logs the core
1120 	 * because it has to change some field in the inode core
1121 	 * (typically nextents or nblocks).  That assumption
1122 	 * implies that any transactions against an inode will
1123 	 * catch any non-transactional updates.  If inode-altering
1124 	 * transactions exist that violate this assumption, the
1125 	 * code breaks.  Right now, it figures that if the involved
1126 	 * update_* field is clear and the inode is unpinned, the
1127 	 * inode is clean.  Either it's been flushed or it's been
1128 	 * committed and the commit has hit the disk unpinning the inode.
1129 	 * (Note that xfs_inode_item_format() called at commit clears
1130 	 * the update_* fields.)
1131 	 */
1132 	xfs_ilock(ip, XFS_ILOCK_SHARED);
1133 
1134 	/* If we are flushing data then we care about update_size
1135 	 * being set, otherwise we care about update_core
1136 	 */
1137 	if ((flag & FSYNC_DATA) ?
1138 			(ip->i_update_size == 0) :
1139 			(ip->i_update_core == 0)) {
1140 		/*
1141 		 * Timestamps/size haven't changed since last inode
1142 		 * flush or inode transaction commit.  That means
1143 		 * either nothing got written or a transaction
1144 		 * committed which caught the updates.	If the
1145 		 * latter happened and the transaction hasn't
1146 		 * hit the disk yet, the inode will be still
1147 		 * be pinned.  If it is, force the log.
1148 		 */
1149 
1150 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
1151 
1152 		if (xfs_ipincount(ip)) {
1153 			xfs_log_force(ip->i_mount, (xfs_lsn_t)0,
1154 				      XFS_LOG_FORCE |
1155 				      ((flag & FSYNC_WAIT)
1156 				       ? XFS_LOG_SYNC : 0));
1157 		}
1158 		error = 0;
1159 	} else	{
1160 		/*
1161 		 * Kick off a transaction to log the inode
1162 		 * core to get the updates.  Make it
1163 		 * sync if FSYNC_WAIT is passed in (which
1164 		 * is done by everybody but specfs).  The
1165 		 * sync transaction will also force the log.
1166 		 */
1167 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
1168 		tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
1169 		if ((error = xfs_trans_reserve(tp, 0,
1170 				XFS_FSYNC_TS_LOG_RES(ip->i_mount),
1171 				0, 0, 0)))  {
1172 			xfs_trans_cancel(tp, 0);
1173 			return error;
1174 		}
1175 		xfs_ilock(ip, XFS_ILOCK_EXCL);
1176 
1177 		/*
1178 		 * Note - it's possible that we might have pushed
1179 		 * ourselves out of the way during trans_reserve
1180 		 * which would flush the inode.	 But there's no
1181 		 * guarantee that the inode buffer has actually
1182 		 * gone out yet (it's delwri).	Plus the buffer
1183 		 * could be pinned anyway if it's part of an
1184 		 * inode in another recent transaction.	 So we
1185 		 * play it safe and fire off the transaction anyway.
1186 		 */
1187 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1188 		xfs_trans_ihold(tp, ip);
1189 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1190 		if (flag & FSYNC_WAIT)
1191 			xfs_trans_set_sync(tp);
1192 		error = xfs_trans_commit(tp, 0, NULL);
1193 
1194 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
1195 	}
1196 	return error;
1197 }
1198 
1199 /*
1200  * This is called by xfs_inactive to free any blocks beyond eof,
1201  * when the link count isn't zero.
1202  */
1203 STATIC int
xfs_inactive_free_eofblocks(xfs_mount_t * mp,xfs_inode_t * ip)1204 xfs_inactive_free_eofblocks(
1205 	xfs_mount_t	*mp,
1206 	xfs_inode_t	*ip)
1207 {
1208 	xfs_trans_t	*tp;
1209 	int		error;
1210 	xfs_fileoff_t	end_fsb;
1211 	xfs_fileoff_t	last_fsb;
1212 	xfs_filblks_t	map_len;
1213 	int		nimaps;
1214 	xfs_bmbt_irec_t	imap;
1215 
1216 	/*
1217 	 * Figure out if there are any blocks beyond the end
1218 	 * of the file.  If not, then there is nothing to do.
1219 	 */
1220 	end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_d.di_size));
1221 	last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1222 	map_len = last_fsb - end_fsb;
1223 	if (map_len <= 0)
1224 		return (0);
1225 
1226 	nimaps = 1;
1227 	xfs_ilock(ip, XFS_ILOCK_SHARED);
1228 	error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0,
1229 			  NULL, 0, &imap, &nimaps, NULL);
1230 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
1231 
1232 	if (!error && (nimaps != 0) &&
1233 	    (imap.br_startblock != HOLESTARTBLOCK)) {
1234 		/*
1235 		 * Attach the dquots to the inode up front.
1236 		 */
1237 		if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
1238 			return (error);
1239 
1240 		/*
1241 		 * There are blocks after the end of file.
1242 		 * Free them up now by truncating the file to
1243 		 * its current size.
1244 		 */
1245 		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1246 
1247 		/*
1248 		 * Do the xfs_itruncate_start() call before
1249 		 * reserving any log space because
1250 		 * itruncate_start will call into the buffer
1251 		 * cache and we can't
1252 		 * do that within a transaction.
1253 		 */
1254 		xfs_ilock(ip, XFS_IOLOCK_EXCL);
1255 		xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
1256 				    ip->i_d.di_size);
1257 
1258 		error = xfs_trans_reserve(tp, 0,
1259 					  XFS_ITRUNCATE_LOG_RES(mp),
1260 					  0, XFS_TRANS_PERM_LOG_RES,
1261 					  XFS_ITRUNCATE_LOG_COUNT);
1262 		if (error) {
1263 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
1264 			xfs_trans_cancel(tp, 0);
1265 			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1266 			return (error);
1267 		}
1268 
1269 		xfs_ilock(ip, XFS_ILOCK_EXCL);
1270 		xfs_trans_ijoin(tp, ip,
1271 				XFS_IOLOCK_EXCL |
1272 				XFS_ILOCK_EXCL);
1273 		xfs_trans_ihold(tp, ip);
1274 
1275 		error = xfs_itruncate_finish(&tp, ip,
1276 					     ip->i_d.di_size,
1277 					     XFS_DATA_FORK,
1278 					     0);
1279 		/*
1280 		 * If we get an error at this point we
1281 		 * simply don't bother truncating the file.
1282 		 */
1283 		if (error) {
1284 			xfs_trans_cancel(tp,
1285 					 (XFS_TRANS_RELEASE_LOG_RES |
1286 					  XFS_TRANS_ABORT));
1287 		} else {
1288 			error = xfs_trans_commit(tp,
1289 						XFS_TRANS_RELEASE_LOG_RES,
1290 						NULL);
1291 		}
1292 		xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1293 	}
1294 	return (error);
1295 }
1296 
1297 /*
1298  * Free a symlink that has blocks associated with it.
1299  */
1300 STATIC int
xfs_inactive_symlink_rmt(xfs_inode_t * ip,xfs_trans_t ** tpp)1301 xfs_inactive_symlink_rmt(
1302 	xfs_inode_t	*ip,
1303 	xfs_trans_t	**tpp)
1304 {
1305 	xfs_buf_t	*bp;
1306 	int		committed;
1307 	int		done;
1308 	int		error;
1309 	xfs_fsblock_t	first_block;
1310 	xfs_bmap_free_t	free_list;
1311 	int		i;
1312 	xfs_mount_t	*mp;
1313 	xfs_bmbt_irec_t	mval[SYMLINK_MAPS];
1314 	int		nmaps;
1315 	xfs_trans_t	*ntp;
1316 	int		size;
1317 	xfs_trans_t	*tp;
1318 
1319 	tp = *tpp;
1320 	mp = ip->i_mount;
1321 	ASSERT(ip->i_d.di_size > XFS_IFORK_DSIZE(ip));
1322 	/*
1323 	 * We're freeing a symlink that has some
1324 	 * blocks allocated to it.  Free the
1325 	 * blocks here.  We know that we've got
1326 	 * either 1 or 2 extents and that we can
1327 	 * free them all in one bunmapi call.
1328 	 */
1329 	ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
1330 	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
1331 			XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
1332 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
1333 		xfs_trans_cancel(tp, 0);
1334 		*tpp = NULL;
1335 		return error;
1336 	}
1337 	/*
1338 	 * Lock the inode, fix the size, and join it to the transaction.
1339 	 * Hold it so in the normal path, we still have it locked for
1340 	 * the second transaction.  In the error paths we need it
1341 	 * held so the cancel won't rele it, see below.
1342 	 */
1343 	xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1344 	size = (int)ip->i_d.di_size;
1345 	ip->i_d.di_size = 0;
1346 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1347 	xfs_trans_ihold(tp, ip);
1348 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1349 	/*
1350 	 * Find the block(s) so we can inval and unmap them.
1351 	 */
1352 	done = 0;
1353 	XFS_BMAP_INIT(&free_list, &first_block);
1354 	nmaps = sizeof(mval) / sizeof(mval[0]);
1355 	if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size),
1356 			XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps,
1357 			&free_list)))
1358 		goto error0;
1359 	/*
1360 	 * Invalidate the block(s).
1361 	 */
1362 	for (i = 0; i < nmaps; i++) {
1363 		bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
1364 			XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
1365 			XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
1366 		xfs_trans_binval(tp, bp);
1367 	}
1368 	/*
1369 	 * Unmap the dead block(s) to the free_list.
1370 	 */
1371 	if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
1372 			&first_block, &free_list, &done)))
1373 		goto error1;
1374 	ASSERT(done);
1375 	/*
1376 	 * Commit the first transaction.  This logs the EFI and the inode.
1377 	 */
1378 	if ((error = xfs_bmap_finish(&tp, &free_list, first_block, &committed)))
1379 		goto error1;
1380 	/*
1381 	 * The transaction must have been committed, since there were
1382 	 * actually extents freed by xfs_bunmapi.  See xfs_bmap_finish.
1383 	 * The new tp has the extent freeing and EFDs.
1384 	 */
1385 	ASSERT(committed);
1386 	/*
1387 	 * The first xact was committed, so add the inode to the new one.
1388 	 * Mark it dirty so it will be logged and moved forward in the log as
1389 	 * part of every commit.
1390 	 */
1391 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1392 	xfs_trans_ihold(tp, ip);
1393 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1394 	/*
1395 	 * Get a new, empty transaction to return to our caller.
1396 	 */
1397 	ntp = xfs_trans_dup(tp);
1398 	/*
1399 	 * Commit the transaction containing extent freeing and EFD's.
1400 	 * If we get an error on the commit here or on the reserve below,
1401 	 * we need to unlock the inode since the new transaction doesn't
1402 	 * have the inode attached.
1403 	 */
1404 	error = xfs_trans_commit(tp, 0, NULL);
1405 	tp = ntp;
1406 	if (error) {
1407 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
1408 		goto error0;
1409 	}
1410 	/*
1411 	 * Remove the memory for extent descriptions (just bookkeeping).
1412 	 */
1413 	if (ip->i_df.if_bytes)
1414 		xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);
1415 	ASSERT(ip->i_df.if_bytes == 0);
1416 	/*
1417 	 * Put an itruncate log reservation in the new transaction
1418 	 * for our caller.
1419 	 */
1420 	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
1421 			XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
1422 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
1423 		goto error0;
1424 	}
1425 	/*
1426 	 * Return with the inode locked but not joined to the transaction.
1427 	 */
1428 	*tpp = tp;
1429 	return 0;
1430 
1431  error1:
1432 	xfs_bmap_cancel(&free_list);
1433  error0:
1434 	/*
1435 	 * Have to come here with the inode locked and either
1436 	 * (held and in the transaction) or (not in the transaction).
1437 	 * If the inode isn't held then cancel would iput it, but
1438 	 * that's wrong since this is inactive and the vnode ref
1439 	 * count is 0 already.
1440 	 * Cancel won't do anything to the inode if held, but it still
1441 	 * needs to be locked until the cancel is done, if it was
1442 	 * joined to the transaction.
1443 	 */
1444 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1445 	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1446 	*tpp = NULL;
1447 	return error;
1448 
1449 }
1450 
1451 STATIC int
xfs_inactive_symlink_local(xfs_inode_t * ip,xfs_trans_t ** tpp)1452 xfs_inactive_symlink_local(
1453 	xfs_inode_t	*ip,
1454 	xfs_trans_t	**tpp)
1455 {
1456 	int		error;
1457 
1458 	ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip));
1459 	/*
1460 	 * We're freeing a symlink which fit into
1461 	 * the inode.  Just free the memory used
1462 	 * to hold the old symlink.
1463 	 */
1464 	error = xfs_trans_reserve(*tpp, 0,
1465 				  XFS_ITRUNCATE_LOG_RES(ip->i_mount),
1466 				  0, XFS_TRANS_PERM_LOG_RES,
1467 				  XFS_ITRUNCATE_LOG_COUNT);
1468 
1469 	if (error) {
1470 		xfs_trans_cancel(*tpp, 0);
1471 		*tpp = NULL;
1472 		return (error);
1473 	}
1474 	xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1475 
1476 	/*
1477 	 * Zero length symlinks _can_ exist.
1478 	 */
1479 	if (ip->i_df.if_bytes > 0) {
1480 		xfs_idata_realloc(ip,
1481 				  -(ip->i_df.if_bytes),
1482 				  XFS_DATA_FORK);
1483 		ASSERT(ip->i_df.if_bytes == 0);
1484 	}
1485 	return (0);
1486 }
1487 
1488 /*
1489  *
1490  */
1491 STATIC int
xfs_inactive_attrs(xfs_inode_t * ip,xfs_trans_t ** tpp)1492 xfs_inactive_attrs(
1493 	xfs_inode_t	*ip,
1494 	xfs_trans_t	**tpp)
1495 {
1496 	xfs_trans_t	*tp;
1497 	int		error;
1498 	xfs_mount_t	*mp;
1499 
1500 	ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
1501 	tp = *tpp;
1502 	mp = ip->i_mount;
1503 	ASSERT(ip->i_d.di_forkoff != 0);
1504 	xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
1505 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1506 
1507 	error = xfs_attr_inactive(ip);
1508 	if (error) {
1509 		*tpp = NULL;
1510 		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1511 		return (error); /* goto out*/
1512 	}
1513 
1514 	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1515 	error = xfs_trans_reserve(tp, 0,
1516 				  XFS_IFREE_LOG_RES(mp),
1517 				  0, XFS_TRANS_PERM_LOG_RES,
1518 				  XFS_INACTIVE_LOG_COUNT);
1519 	if (error) {
1520 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
1521 		xfs_trans_cancel(tp, 0);
1522 		*tpp = NULL;
1523 		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1524 		return (error);
1525 	}
1526 
1527 	xfs_ilock(ip, XFS_ILOCK_EXCL);
1528 	xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1529 	xfs_trans_ihold(tp, ip);
1530 	xfs_idestroy_fork(ip, XFS_ATTR_FORK);
1531 
1532 	ASSERT(ip->i_d.di_anextents == 0);
1533 
1534 	*tpp = tp;
1535 	return (0);
1536 }
1537 
1538 STATIC int
xfs_release(bhv_desc_t * bdp)1539 xfs_release(
1540 	bhv_desc_t	*bdp)
1541 {
1542 	xfs_inode_t	*ip;
1543 	vnode_t		*vp;
1544 	xfs_mount_t	*mp;
1545 	int		error;
1546 
1547 	vp = BHV_TO_VNODE(bdp);
1548 	ip = XFS_BHVTOI(bdp);
1549 
1550 	if ((vp->v_type != VREG) || (ip->i_d.di_mode == 0)) {
1551 		return 0;
1552 	}
1553 
1554 	/* If this is a read-only mount, don't do this (would generate I/O) */
1555 	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
1556 		return 0;
1557 
1558 #ifdef HAVE_REFCACHE
1559 	/* If we are in the NFS reference cache then don't do this now */
1560 	if (ip->i_refcache)
1561 		return 0;
1562 #endif
1563 
1564 	mp = ip->i_mount;
1565 
1566 	if (ip->i_d.di_nlink != 0) {
1567 		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1568 		     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) &&
1569 		     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
1570 		    (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)))) {
1571 			if ((error = xfs_inactive_free_eofblocks(mp, ip)))
1572 				return (error);
1573 			/* Update linux inode block count after free above */
1574 			LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp,
1575 				ip->i_d.di_nblocks + ip->i_delayed_blks);
1576 		}
1577 	}
1578 
1579 	return 0;
1580 }
1581 
1582 /*
1583  * xfs_inactive
1584  *
1585  * This is called when the vnode reference count for the vnode
1586  * goes to zero.  If the file has been unlinked, then it must
1587  * now be truncated.  Also, we clear all of the read-ahead state
1588  * kept for the inode here since the file is now closed.
1589  */
1590 STATIC int
xfs_inactive(bhv_desc_t * bdp,cred_t * credp)1591 xfs_inactive(
1592 	bhv_desc_t	*bdp,
1593 	cred_t		*credp)
1594 {
1595 	xfs_inode_t	*ip;
1596 	vnode_t		*vp;
1597 	xfs_bmap_free_t	free_list;
1598 	xfs_fsblock_t	first_block;
1599 	int		committed;
1600 	xfs_trans_t	*tp;
1601 	xfs_mount_t	*mp;
1602 	int		error;
1603 	int		truncate;
1604 
1605 	vp = BHV_TO_VNODE(bdp);
1606 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
1607 
1608 	ip = XFS_BHVTOI(bdp);
1609 
1610 	/*
1611 	 * If the inode is already free, then there can be nothing
1612 	 * to clean up here.
1613 	 */
1614 	if (ip->i_d.di_mode == 0 || VN_BAD(vp)) {
1615 		ASSERT(ip->i_df.if_real_bytes == 0);
1616 		ASSERT(ip->i_df.if_broot_bytes == 0);
1617 		return VN_INACTIVE_CACHE;
1618 	}
1619 
1620 	/*
1621 	 * Only do a truncate if it's a regular file with
1622 	 * some actual space in it.  It's OK to look at the
1623 	 * inode's fields without the lock because we're the
1624 	 * only one with a reference to the inode.
1625 	 */
1626 	truncate = ((ip->i_d.di_nlink == 0) &&
1627 	    ((ip->i_d.di_size != 0) || (ip->i_d.di_nextents > 0)) &&
1628 	    ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
1629 
1630 	mp = ip->i_mount;
1631 
1632 	if (ip->i_d.di_nlink == 0 &&
1633 	    DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_DESTROY)) {
1634 		(void) XFS_SEND_DESTROY(mp, vp, DM_RIGHT_NULL);
1635 	}
1636 
1637 	error = 0;
1638 
1639 	/* If this is a read-only mount, don't do this (would generate I/O) */
1640 	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
1641 		goto out;
1642 
1643 	if (ip->i_d.di_nlink != 0) {
1644 		if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
1645 		     ((ip->i_d.di_size > 0) || (VN_CACHED(vp) > 0)) &&
1646 		     (ip->i_df.if_flags & XFS_IFEXTENTS))  &&
1647 		    (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)) ||
1648 		     (ip->i_delayed_blks != 0))) {
1649 			if ((error = xfs_inactive_free_eofblocks(mp, ip)))
1650 				return (VN_INACTIVE_CACHE);
1651 			/* Update linux inode block count after free above */
1652 			LINVFS_GET_IP(vp)->i_blocks = XFS_FSB_TO_BB(mp,
1653 				ip->i_d.di_nblocks + ip->i_delayed_blks);
1654 		}
1655 		goto out;
1656 	}
1657 
1658 	ASSERT(ip->i_d.di_nlink == 0);
1659 
1660 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
1661 		return (VN_INACTIVE_CACHE);
1662 
1663 	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1664 	if (truncate) {
1665 		/*
1666 		 * Do the xfs_itruncate_start() call before
1667 		 * reserving any log space because itruncate_start
1668 		 * will call into the buffer cache and we can't
1669 		 * do that within a transaction.
1670 		 */
1671 		xfs_ilock(ip, XFS_IOLOCK_EXCL);
1672 
1673 		xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
1674 
1675 		error = xfs_trans_reserve(tp, 0,
1676 					  XFS_ITRUNCATE_LOG_RES(mp),
1677 					  0, XFS_TRANS_PERM_LOG_RES,
1678 					  XFS_ITRUNCATE_LOG_COUNT);
1679 		if (error) {
1680 			/* Don't call itruncate_cleanup */
1681 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
1682 			xfs_trans_cancel(tp, 0);
1683 			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1684 			return (VN_INACTIVE_CACHE);
1685 		}
1686 
1687 		xfs_ilock(ip, XFS_ILOCK_EXCL);
1688 		xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1689 		xfs_trans_ihold(tp, ip);
1690 
1691 		/*
1692 		 * normally, we have to run xfs_itruncate_finish sync.
1693 		 * But if filesystem is wsync and we're in the inactive
1694 		 * path, then we know that nlink == 0, and that the
1695 		 * xaction that made nlink == 0 is permanently committed
1696 		 * since xfs_remove runs as a synchronous transaction.
1697 		 */
1698 		error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK,
1699 				(!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0));
1700 
1701 		if (error) {
1702 			xfs_trans_cancel(tp,
1703 				XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
1704 			xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1705 			return (VN_INACTIVE_CACHE);
1706 		}
1707 	} else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) {
1708 
1709 		/*
1710 		 * If we get an error while cleaning up a
1711 		 * symlink we bail out.
1712 		 */
1713 		error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ?
1714 			xfs_inactive_symlink_rmt(ip, &tp) :
1715 			xfs_inactive_symlink_local(ip, &tp);
1716 
1717 		if (error) {
1718 			ASSERT(tp == NULL);
1719 			return (VN_INACTIVE_CACHE);
1720 		}
1721 
1722 		xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1723 		xfs_trans_ihold(tp, ip);
1724 	} else {
1725 		error = xfs_trans_reserve(tp, 0,
1726 					  XFS_IFREE_LOG_RES(mp),
1727 					  0, XFS_TRANS_PERM_LOG_RES,
1728 					  XFS_INACTIVE_LOG_COUNT);
1729 		if (error) {
1730 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
1731 			xfs_trans_cancel(tp, 0);
1732 			return (VN_INACTIVE_CACHE);
1733 		}
1734 
1735 		xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
1736 		xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1737 		xfs_trans_ihold(tp, ip);
1738 	}
1739 
1740 	/*
1741 	 * If there are attributes associated with the file
1742 	 * then blow them away now.  The code calls a routine
1743 	 * that recursively deconstructs the attribute fork.
1744 	 * We need to just commit the current transaction
1745 	 * because we can't use it for xfs_attr_inactive().
1746 	 */
1747 	if (ip->i_d.di_anextents > 0) {
1748 		error = xfs_inactive_attrs(ip, &tp);
1749 		/*
1750 		 * If we got an error, the transaction is already
1751 		 * cancelled, and the inode is unlocked. Just get out.
1752 		 */
1753 		 if (error)
1754 			 return (VN_INACTIVE_CACHE);
1755 	} else if (ip->i_afp) {
1756 		xfs_idestroy_fork(ip, XFS_ATTR_FORK);
1757 	}
1758 
1759 	/*
1760 	 * Free the inode.
1761 	 */
1762 	XFS_BMAP_INIT(&free_list, &first_block);
1763 	error = xfs_ifree(tp, ip, &free_list);
1764 	if (error) {
1765 		/*
1766 		 * If we fail to free the inode, shut down.  The cancel
1767 		 * might do that, we need to make sure.  Otherwise the
1768 		 * inode might be lost for a long time or forever.
1769 		 */
1770 		if (!XFS_FORCED_SHUTDOWN(mp)) {
1771 			cmn_err(CE_NOTE,
1772 		"xfs_inactive:	xfs_ifree() returned an error = %d on %s",
1773 				error, mp->m_fsname);
1774 			xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR);
1775 		}
1776 		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
1777 	} else {
1778 		/*
1779 		 * Credit the quota account(s). The inode is gone.
1780 		 */
1781 		XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
1782 
1783 		/*
1784 		 * Just ignore errors at this point.  There is
1785 		 * nothing we can do except to try to keep going.
1786 		 */
1787 		(void) xfs_bmap_finish(&tp,  &free_list, first_block,
1788 				       &committed);
1789 		(void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
1790 	}
1791 	/*
1792 	 * Release the dquots held by inode, if any.
1793 	 */
1794 	XFS_QM_DQDETACH(mp, ip);
1795 
1796 	xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
1797 
1798  out:
1799 	return VN_INACTIVE_CACHE;
1800 }
1801 
1802 
1803 /*
1804  * xfs_lookup
1805  */
1806 STATIC int
xfs_lookup(bhv_desc_t * dir_bdp,vname_t * dentry,vnode_t ** vpp,int flags,vnode_t * rdir,cred_t * credp)1807 xfs_lookup(
1808 	bhv_desc_t		*dir_bdp,
1809 	vname_t			*dentry,
1810 	vnode_t			**vpp,
1811 	int			flags,
1812 	vnode_t			*rdir,
1813 	cred_t			*credp)
1814 {
1815 	xfs_inode_t		*dp, *ip;
1816 	xfs_ino_t		e_inum;
1817 	int			error;
1818 	uint			lock_mode;
1819 	vnode_t			*dir_vp;
1820 
1821 	dir_vp = BHV_TO_VNODE(dir_bdp);
1822 	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
1823 
1824 	dp = XFS_BHVTOI(dir_bdp);
1825 
1826 	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
1827 		return XFS_ERROR(EIO);
1828 
1829 	lock_mode = xfs_ilock_map_shared(dp);
1830 	error = xfs_dir_lookup_int(dir_bdp, lock_mode, dentry, &e_inum, &ip);
1831 	if (!error) {
1832 		*vpp = XFS_ITOV(ip);
1833 		ITRACE(ip);
1834 	}
1835 	xfs_iunlock_map_shared(dp, lock_mode);
1836 	return error;
1837 }
1838 
1839 
1840 /*
1841  * xfs_create (create a new file).
1842  */
1843 STATIC int
xfs_create(bhv_desc_t * dir_bdp,vname_t * dentry,vattr_t * vap,vnode_t ** vpp,cred_t * credp)1844 xfs_create(
1845 	bhv_desc_t		*dir_bdp,
1846 	vname_t			*dentry,
1847 	vattr_t			*vap,
1848 	vnode_t			**vpp,
1849 	cred_t			*credp)
1850 {
1851 	char			*name = VNAME(dentry);
1852 	vnode_t			*dir_vp;
1853 	xfs_inode_t		*dp, *ip;
1854 	vnode_t		        *vp=NULL;
1855 	xfs_trans_t		*tp;
1856 	xfs_mount_t	        *mp;
1857 	xfs_dev_t		rdev;
1858 	int                     error;
1859 	xfs_bmap_free_t		free_list;
1860 	xfs_fsblock_t		first_block;
1861 	boolean_t		dp_joined_to_trans;
1862 	int			dm_event_sent = 0;
1863 	uint			cancel_flags;
1864 	int			committed;
1865 	xfs_prid_t		prid;
1866 	struct xfs_dquot	*udqp, *gdqp;
1867 	uint			resblks;
1868 	int			dm_di_mode;
1869 	int			namelen;
1870 
1871 	ASSERT(!*vpp);
1872 	dir_vp = BHV_TO_VNODE(dir_bdp);
1873 	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
1874 
1875 	dp = XFS_BHVTOI(dir_bdp);
1876 	mp = dp->i_mount;
1877 
1878 	dm_di_mode = vap->va_mode|VTTOIF(vap->va_type);
1879 	namelen = VNAMELEN(dentry);
1880 
1881 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
1882 		error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
1883 				dir_vp, DM_RIGHT_NULL, NULL,
1884 				DM_RIGHT_NULL, name, NULL,
1885 				dm_di_mode, 0, 0);
1886 
1887 		if (error)
1888 			return error;
1889 		dm_event_sent = 1;
1890 	}
1891 
1892 	if (XFS_FORCED_SHUTDOWN(mp))
1893 		return XFS_ERROR(EIO);
1894 
1895 	/* Return through std_return after this point. */
1896 
1897 	udqp = gdqp = NULL;
1898 	if (vap->va_mask & XFS_AT_PROJID)
1899 		prid = (xfs_prid_t)vap->va_projid;
1900 	else
1901 		prid = (xfs_prid_t)dfltprid;
1902 
1903 	/*
1904 	 * Make sure that we have allocated dquot(s) on disk.
1905 	 */
1906 	error = XFS_QM_DQVOPALLOC(mp, dp,
1907 			current_fsuid(credp), current_fsgid(credp),
1908 			XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp);
1909 	if (error)
1910 		goto std_return;
1911 
1912 	ip = NULL;
1913 	dp_joined_to_trans = B_FALSE;
1914 
1915 	tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
1916 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1917 	resblks = XFS_CREATE_SPACE_RES(mp, namelen);
1918 	/*
1919 	 * Initially assume that the file does not exist and
1920 	 * reserve the resources for that case.  If that is not
1921 	 * the case we'll drop the one we have and get a more
1922 	 * appropriate transaction later.
1923 	 */
1924 	error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
1925 			XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
1926 	if (error == ENOSPC) {
1927 		resblks = 0;
1928 		error = xfs_trans_reserve(tp, 0, XFS_CREATE_LOG_RES(mp), 0,
1929 				XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
1930 	}
1931 	if (error) {
1932 		cancel_flags = 0;
1933 		dp = NULL;
1934 		goto error_return;
1935 	}
1936 
1937 	xfs_ilock(dp, XFS_ILOCK_EXCL);
1938 
1939 	XFS_BMAP_INIT(&free_list, &first_block);
1940 
1941 	ASSERT(ip == NULL);
1942 
1943 	/*
1944 	 * Reserve disk quota and the inode.
1945 	 */
1946 	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
1947 	if (error)
1948 		goto error_return;
1949 
1950 	if (resblks == 0 &&
1951 	    (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen)))
1952 		goto error_return;
1953 	rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0;
1954 	error = xfs_dir_ialloc(&tp, dp,
1955 			MAKEIMODE(vap->va_type,vap->va_mode), 1,
1956 			rdev, credp, prid, resblks > 0,
1957 			&ip, &committed);
1958 	if (error) {
1959 		if (error == ENOSPC)
1960 			goto error_return;
1961 		goto abort_return;
1962 	}
1963 	ITRACE(ip);
1964 
1965 	/*
1966 	 * At this point, we've gotten a newly allocated inode.
1967 	 * It is locked (and joined to the transaction).
1968 	 */
1969 
1970 	ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE));
1971 
1972 	/*
1973 	 * Now we join the directory inode to the transaction.
1974 	 * We do not do it earlier because xfs_dir_ialloc
1975 	 * might commit the previous transaction (and release
1976 	 * all the locks).
1977 	 */
1978 
1979 	VN_HOLD(dir_vp);
1980 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
1981 	dp_joined_to_trans = B_TRUE;
1982 
1983 	error = XFS_DIR_CREATENAME(mp, tp, dp, name, namelen, ip->i_ino,
1984 		&first_block, &free_list,
1985 		resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
1986 	if (error) {
1987 		ASSERT(error != ENOSPC);
1988 		goto abort_return;
1989 	}
1990 	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
1991 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1992 
1993 	/*
1994 	 * If this is a synchronous mount, make sure that the
1995 	 * create transaction goes to disk before returning to
1996 	 * the user.
1997 	 */
1998 	if (mp->m_flags & XFS_MOUNT_WSYNC) {
1999 		xfs_trans_set_sync(tp);
2000 	}
2001 
2002 	dp->i_gen++;
2003 
2004 	/*
2005 	 * Attach the dquot(s) to the inodes and modify them incore.
2006 	 * These ids of the inode couldn't have changed since the new
2007 	 * inode has been locked ever since it was created.
2008 	 */
2009 	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
2010 
2011 	/*
2012 	 * xfs_trans_commit normally decrements the vnode ref count
2013 	 * when it unlocks the inode. Since we want to return the
2014 	 * vnode to the caller, we bump the vnode ref count now.
2015 	 */
2016 	IHOLD(ip);
2017 	vp = XFS_ITOV(ip);
2018 
2019 	error = xfs_bmap_finish(&tp, &free_list, first_block, &committed);
2020 	if (error) {
2021 		xfs_bmap_cancel(&free_list);
2022 		goto abort_rele;
2023 	}
2024 
2025 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
2026 	if (error) {
2027 		IRELE(ip);
2028 		tp = NULL;
2029 		goto error_return;
2030 	}
2031 
2032 	XFS_QM_DQRELE(mp, udqp);
2033 	XFS_QM_DQRELE(mp, gdqp);
2034 
2035 	/*
2036 	 * Propogate the fact that the vnode changed after the
2037 	 * xfs_inode locks have been released.
2038 	 */
2039 	VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3);
2040 
2041 	*vpp = vp;
2042 
2043 	/* Fallthrough to std_return with error = 0  */
2044 
2045 std_return:
2046 	if ( (*vpp || (error != 0 && dm_event_sent != 0)) &&
2047 			DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp),
2048 							DM_EVENT_POSTCREATE)) {
2049 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
2050 			dir_vp, DM_RIGHT_NULL,
2051 			*vpp ? vp:NULL,
2052 			DM_RIGHT_NULL, name, NULL,
2053 			dm_di_mode, error, 0);
2054 	}
2055 	return error;
2056 
2057  abort_return:
2058 	cancel_flags |= XFS_TRANS_ABORT;
2059 	/* FALLTHROUGH */
2060  error_return:
2061 
2062 	if (tp != NULL)
2063 		xfs_trans_cancel(tp, cancel_flags);
2064 
2065 	if (!dp_joined_to_trans && (dp != NULL))
2066 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
2067 	XFS_QM_DQRELE(mp, udqp);
2068 	XFS_QM_DQRELE(mp, gdqp);
2069 
2070 	goto std_return;
2071 
2072  abort_rele:
2073 	/*
2074 	 * Wait until after the current transaction is aborted to
2075 	 * release the inode.  This prevents recursive transactions
2076 	 * and deadlocks from xfs_inactive.
2077 	 */
2078 	cancel_flags |= XFS_TRANS_ABORT;
2079 	xfs_trans_cancel(tp, cancel_flags);
2080 	IRELE(ip);
2081 
2082 	XFS_QM_DQRELE(mp, udqp);
2083 	XFS_QM_DQRELE(mp, gdqp);
2084 
2085 	goto std_return;
2086 }
2087 
2088 #ifdef DEBUG
2089 /*
2090  * Some counters to see if (and how often) we are hitting some deadlock
2091  * prevention code paths.
2092  */
2093 
2094 int xfs_rm_locks;
2095 int xfs_rm_lock_delays;
2096 int xfs_rm_attempts;
2097 #endif
2098 
2099 /*
2100  * The following routine will lock the inodes associated with the
2101  * directory and the named entry in the directory. The locks are
2102  * acquired in increasing inode number.
2103  *
2104  * If the entry is "..", then only the directory is locked. The
2105  * vnode ref count will still include that from the .. entry in
2106  * this case.
2107  *
2108  * There is a deadlock we need to worry about. If the locked directory is
2109  * in the AIL, it might be blocking up the log. The next inode we lock
2110  * could be already locked by another thread waiting for log space (e.g
2111  * a permanent log reservation with a long running transaction (see
2112  * xfs_itruncate_finish)). To solve this, we must check if the directory
2113  * is in the ail and use lock_nowait. If we can't lock, we need to
2114  * drop the inode lock on the directory and try again. xfs_iunlock will
2115  * potentially push the tail if we were holding up the log.
2116  */
2117 STATIC int
xfs_lock_dir_and_entry(xfs_inode_t * dp,vname_t * dentry,xfs_inode_t * ip)2118 xfs_lock_dir_and_entry(
2119 	xfs_inode_t	*dp,
2120 	vname_t		*dentry,
2121 	xfs_inode_t	*ip)	/* inode of entry 'name' */
2122 {
2123 	int		attempts;
2124 	xfs_ino_t	e_inum;
2125 	xfs_inode_t	*ips[2];
2126 	xfs_log_item_t	*lp;
2127 
2128 #ifdef DEBUG
2129 	xfs_rm_locks++;
2130 #endif
2131 	attempts = 0;
2132 
2133 again:
2134 	xfs_ilock(dp, XFS_ILOCK_EXCL);
2135 
2136 	e_inum = ip->i_ino;
2137 
2138 	ITRACE(ip);
2139 
2140 	/*
2141 	 * We want to lock in increasing inum. Since we've already
2142 	 * acquired the lock on the directory, we may need to release
2143 	 * if if the inum of the entry turns out to be less.
2144 	 */
2145 	if (e_inum > dp->i_ino) {
2146 		/*
2147 		 * We are already in the right order, so just
2148 		 * lock on the inode of the entry.
2149 		 * We need to use nowait if dp is in the AIL.
2150 		 */
2151 
2152 		lp = (xfs_log_item_t *)dp->i_itemp;
2153 		if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
2154 			if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
2155 				attempts++;
2156 #ifdef DEBUG
2157 				xfs_rm_attempts++;
2158 #endif
2159 
2160 				/*
2161 				 * Unlock dp and try again.
2162 				 * xfs_iunlock will try to push the tail
2163 				 * if the inode is in the AIL.
2164 				 */
2165 
2166 				xfs_iunlock(dp, XFS_ILOCK_EXCL);
2167 
2168 				if ((attempts % 5) == 0) {
2169 					delay(1); /* Don't just spin the CPU */
2170 #ifdef DEBUG
2171 					xfs_rm_lock_delays++;
2172 #endif
2173 				}
2174 				goto again;
2175 			}
2176 		} else {
2177 			xfs_ilock(ip, XFS_ILOCK_EXCL);
2178 		}
2179 	} else if (e_inum < dp->i_ino) {
2180 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
2181 
2182 		ips[0] = ip;
2183 		ips[1] = dp;
2184 		xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
2185 	}
2186 	/* else	 e_inum == dp->i_ino */
2187 	/*     This can happen if we're asked to lock /x/..
2188 	 *     the entry is "..", which is also the parent directory.
2189 	 */
2190 
2191 	return 0;
2192 }
2193 
2194 #ifdef DEBUG
2195 int xfs_locked_n;
2196 int xfs_small_retries;
2197 int xfs_middle_retries;
2198 int xfs_lots_retries;
2199 int xfs_lock_delays;
2200 #endif
2201 
2202 /*
2203  * The following routine will lock n inodes in exclusive mode.
2204  * We assume the caller calls us with the inodes in i_ino order.
2205  *
2206  * We need to detect deadlock where an inode that we lock
2207  * is in the AIL and we start waiting for another inode that is locked
2208  * by a thread in a long running transaction (such as truncate). This can
2209  * result in deadlock since the long running trans might need to wait
2210  * for the inode we just locked in order to push the tail and free space
2211  * in the log.
2212  */
2213 void
xfs_lock_inodes(xfs_inode_t ** ips,int inodes,int first_locked,uint lock_mode)2214 xfs_lock_inodes(
2215 	xfs_inode_t	**ips,
2216 	int		inodes,
2217 	int		first_locked,
2218 	uint		lock_mode)
2219 {
2220 	int		attempts = 0, i, j, try_lock;
2221 	xfs_log_item_t	*lp;
2222 
2223 	ASSERT(ips && (inodes >= 2)); /* we need at least two */
2224 
2225 	if (first_locked) {
2226 		try_lock = 1;
2227 		i = 1;
2228 	} else {
2229 		try_lock = 0;
2230 		i = 0;
2231 	}
2232 
2233 again:
2234 	for (; i < inodes; i++) {
2235 		ASSERT(ips[i]);
2236 
2237 		if (i && (ips[i] == ips[i-1]))	/* Already locked */
2238 			continue;
2239 
2240 		/*
2241 		 * If try_lock is not set yet, make sure all locked inodes
2242 		 * are not in the AIL.
2243 		 * If any are, set try_lock to be used later.
2244 		 */
2245 
2246 		if (!try_lock) {
2247 			for (j = (i - 1); j >= 0 && !try_lock; j--) {
2248 				lp = (xfs_log_item_t *)ips[j]->i_itemp;
2249 				if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
2250 					try_lock++;
2251 				}
2252 			}
2253 		}
2254 
2255 		/*
2256 		 * If any of the previous locks we have locked is in the AIL,
2257 		 * we must TRY to get the second and subsequent locks. If
2258 		 * we can't get any, we must release all we have
2259 		 * and try again.
2260 		 */
2261 
2262 		if (try_lock) {
2263 			/* try_lock must be 0 if i is 0. */
2264 			/*
2265 			 * try_lock means we have an inode locked
2266 			 * that is in the AIL.
2267 			 */
2268 			ASSERT(i != 0);
2269 			if (!xfs_ilock_nowait(ips[i], lock_mode)) {
2270 				attempts++;
2271 
2272 				/*
2273 				 * Unlock all previous guys and try again.
2274 				 * xfs_iunlock will try to push the tail
2275 				 * if the inode is in the AIL.
2276 				 */
2277 
2278 				for(j = i - 1; j >= 0; j--) {
2279 
2280 					/*
2281 					 * Check to see if we've already
2282 					 * unlocked this one.
2283 					 * Not the first one going back,
2284 					 * and the inode ptr is the same.
2285 					 */
2286 					if ((j != (i - 1)) && ips[j] ==
2287 								ips[j+1])
2288 						continue;
2289 
2290 					xfs_iunlock(ips[j], lock_mode);
2291 				}
2292 
2293 				if ((attempts % 5) == 0) {
2294 					delay(1); /* Don't just spin the CPU */
2295 #ifdef DEBUG
2296 					xfs_lock_delays++;
2297 #endif
2298 				}
2299 				i = 0;
2300 				try_lock = 0;
2301 				goto again;
2302 			}
2303 		} else {
2304 			xfs_ilock(ips[i], lock_mode);
2305 		}
2306 	}
2307 
2308 #ifdef DEBUG
2309 	if (attempts) {
2310 		if (attempts < 5) xfs_small_retries++;
2311 		else if (attempts < 100) xfs_middle_retries++;
2312 		else xfs_lots_retries++;
2313 	} else {
2314 		xfs_locked_n++;
2315 	}
2316 #endif
2317 }
2318 
2319 #ifdef	DEBUG
2320 #define	REMOVE_DEBUG_TRACE(x)	{remove_which_error_return = (x);}
2321 int remove_which_error_return = 0;
2322 #else /* ! DEBUG */
2323 #define	REMOVE_DEBUG_TRACE(x)
2324 #endif	/* ! DEBUG */
2325 
2326 
2327 /*
2328  * xfs_remove
2329  *
2330  */
2331 STATIC int
xfs_remove(bhv_desc_t * dir_bdp,vname_t * dentry,cred_t * credp)2332 xfs_remove(
2333 	bhv_desc_t		*dir_bdp,
2334 	vname_t			*dentry,
2335 	cred_t			*credp)
2336 {
2337 	vnode_t			*dir_vp;
2338 	char			*name = VNAME(dentry);
2339 	xfs_inode_t             *dp, *ip;
2340 	xfs_trans_t             *tp = NULL;
2341 	xfs_mount_t		*mp;
2342 	int                     error = 0;
2343 	xfs_bmap_free_t         free_list;
2344 	xfs_fsblock_t           first_block;
2345 	int			cancel_flags;
2346 	int			committed;
2347 	int			dm_di_mode = 0;
2348 	int			link_zero;
2349 	uint			resblks;
2350 	int			namelen;
2351 
2352 	dir_vp = BHV_TO_VNODE(dir_bdp);
2353 	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
2354 
2355 	dp = XFS_BHVTOI(dir_bdp);
2356 	mp = dp->i_mount;
2357 
2358 	if (XFS_FORCED_SHUTDOWN(mp))
2359 		return XFS_ERROR(EIO);
2360 
2361 	namelen = VNAMELEN(dentry);
2362 
2363 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) {
2364 		error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, dir_vp,
2365 					DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
2366 					name, NULL, 0, 0, 0);
2367 		if (error)
2368 			return error;
2369 	}
2370 
2371 	/* From this point on, return through std_return */
2372 	ip = NULL;
2373 
2374 	/*
2375 	 * We need to get a reference to ip before we get our log
2376 	 * reservation. The reason for this is that we cannot call
2377 	 * xfs_iget for an inode for which we do not have a reference
2378 	 * once we've acquired a log reservation. This is because the
2379 	 * inode we are trying to get might be in xfs_inactive going
2380 	 * for a log reservation. Since we'll have to wait for the
2381 	 * inactive code to complete before returning from xfs_iget,
2382 	 * we need to make sure that we don't have log space reserved
2383 	 * when we call xfs_iget.  Instead we get an unlocked referece
2384 	 * to the inode before getting our log reservation.
2385 	 */
2386 	error = xfs_get_dir_entry(dentry, &ip);
2387 	if (error) {
2388 		REMOVE_DEBUG_TRACE(__LINE__);
2389 		goto std_return;
2390 	}
2391 
2392 	dm_di_mode = ip->i_d.di_mode;
2393 
2394 	vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
2395 
2396 	ITRACE(ip);
2397 
2398 	error = XFS_QM_DQATTACH(mp, dp, 0);
2399 	if (!error && dp != ip)
2400 		error = XFS_QM_DQATTACH(mp, ip, 0);
2401 	if (error) {
2402 		REMOVE_DEBUG_TRACE(__LINE__);
2403 		IRELE(ip);
2404 		goto std_return;
2405 	}
2406 
2407 	tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
2408 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2409 	/*
2410 	 * We try to get the real space reservation first,
2411 	 * allowing for directory btree deletion(s) implying
2412 	 * possible bmap insert(s).  If we can't get the space
2413 	 * reservation then we use 0 instead, and avoid the bmap
2414 	 * btree insert(s) in the directory code by, if the bmap
2415 	 * insert tries to happen, instead trimming the LAST
2416 	 * block from the directory.
2417 	 */
2418 	resblks = XFS_REMOVE_SPACE_RES(mp);
2419 	error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
2420 			XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
2421 	if (error == ENOSPC) {
2422 		resblks = 0;
2423 		error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
2424 				XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT);
2425 	}
2426 	if (error) {
2427 		ASSERT(error != ENOSPC);
2428 		REMOVE_DEBUG_TRACE(__LINE__);
2429 		xfs_trans_cancel(tp, 0);
2430 		IRELE(ip);
2431 		return error;
2432 	}
2433 
2434 	error = xfs_lock_dir_and_entry(dp, dentry, ip);
2435 	if (error) {
2436 		REMOVE_DEBUG_TRACE(__LINE__);
2437 		xfs_trans_cancel(tp, cancel_flags);
2438 		IRELE(ip);
2439 		goto std_return;
2440 	}
2441 
2442 	/*
2443 	 * At this point, we've gotten both the directory and the entry
2444 	 * inodes locked.
2445 	 */
2446 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2447 	if (dp != ip) {
2448 		/*
2449 		 * Increment vnode ref count only in this case since
2450 		 * there's an extra vnode reference in the case where
2451 		 * dp == ip.
2452 		 */
2453 		IHOLD(dp);
2454 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2455 	}
2456 
2457 	/*
2458 	 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
2459 	 */
2460 	XFS_BMAP_INIT(&free_list, &first_block);
2461 	error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, ip->i_ino,
2462 		&first_block, &free_list, 0);
2463 	if (error) {
2464 		ASSERT(error != ENOENT);
2465 		REMOVE_DEBUG_TRACE(__LINE__);
2466 		goto error1;
2467 	}
2468 	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2469 
2470 	dp->i_gen++;
2471 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2472 
2473 	error = xfs_droplink(tp, ip);
2474 	if (error) {
2475 		REMOVE_DEBUG_TRACE(__LINE__);
2476 		goto error1;
2477 	}
2478 
2479 	/* Determine if this is the last link while
2480 	 * we are in the transaction.
2481 	 */
2482 	link_zero = (ip)->i_d.di_nlink==0;
2483 
2484 	/*
2485 	 * Take an extra ref on the inode so that it doesn't
2486 	 * go to xfs_inactive() from within the commit.
2487 	 */
2488 	IHOLD(ip);
2489 
2490 	/*
2491 	 * If this is a synchronous mount, make sure that the
2492 	 * remove transaction goes to disk before returning to
2493 	 * the user.
2494 	 */
2495 	if (mp->m_flags & XFS_MOUNT_WSYNC) {
2496 		xfs_trans_set_sync(tp);
2497 	}
2498 
2499 	error = xfs_bmap_finish(&tp, &free_list, first_block, &committed);
2500 	if (error) {
2501 		REMOVE_DEBUG_TRACE(__LINE__);
2502 		goto error_rele;
2503 	}
2504 
2505 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
2506 	if (error) {
2507 		IRELE(ip);
2508 		goto std_return;
2509 	}
2510 
2511 	/*
2512 	 * Before we drop our extra reference to the inode, purge it
2513 	 * from the refcache if it is there.  By waiting until afterwards
2514 	 * to do the IRELE, we ensure that we won't go inactive in the
2515 	 * xfs_refcache_purge_ip routine (although that would be OK).
2516 	 */
2517 	xfs_refcache_purge_ip(ip);
2518 
2519 	vn_trace_exit(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
2520 
2521 	/*
2522 	 * Let interposed file systems know about removed links.
2523 	 */
2524 	VOP_LINK_REMOVED(XFS_ITOV(ip), dir_vp, link_zero);
2525 
2526 	IRELE(ip);
2527 
2528 /*	Fall through to std_return with error = 0 */
2529  std_return:
2530 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp,
2531 						DM_EVENT_POSTREMOVE)) {
2532 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
2533 				dir_vp, DM_RIGHT_NULL,
2534 				NULL, DM_RIGHT_NULL,
2535 				name, NULL, dm_di_mode, error, 0);
2536 	}
2537 	return error;
2538 
2539  error1:
2540 	xfs_bmap_cancel(&free_list);
2541 	cancel_flags |= XFS_TRANS_ABORT;
2542 	xfs_trans_cancel(tp, cancel_flags);
2543 	goto std_return;
2544 
2545  error_rele:
2546 	/*
2547 	 * In this case make sure to not release the inode until after
2548 	 * the current transaction is aborted.  Releasing it beforehand
2549 	 * can cause us to go to xfs_inactive and start a recursive
2550 	 * transaction which can easily deadlock with the current one.
2551 	 */
2552 	xfs_bmap_cancel(&free_list);
2553 	cancel_flags |= XFS_TRANS_ABORT;
2554 	xfs_trans_cancel(tp, cancel_flags);
2555 
2556 	/*
2557 	 * Before we drop our extra reference to the inode, purge it
2558 	 * from the refcache if it is there.  By waiting until afterwards
2559 	 * to do the IRELE, we ensure that we won't go inactive in the
2560 	 * xfs_refcache_purge_ip routine (although that would be OK).
2561 	 */
2562 	xfs_refcache_purge_ip(ip);
2563 
2564 	IRELE(ip);
2565 
2566 	goto std_return;
2567 }
2568 
2569 
2570 /*
2571  * xfs_link
2572  *
2573  */
2574 STATIC int
xfs_link(bhv_desc_t * target_dir_bdp,vnode_t * src_vp,vname_t * dentry,cred_t * credp)2575 xfs_link(
2576 	bhv_desc_t		*target_dir_bdp,
2577 	vnode_t			*src_vp,
2578 	vname_t			*dentry,
2579 	cred_t			*credp)
2580 {
2581 	xfs_inode_t		*tdp, *sip;
2582 	xfs_trans_t		*tp;
2583 	xfs_mount_t		*mp;
2584 	xfs_inode_t		*ips[2];
2585 	int			error;
2586 	xfs_bmap_free_t         free_list;
2587 	xfs_fsblock_t           first_block;
2588 	int			cancel_flags;
2589 	int			committed;
2590 	vnode_t			*target_dir_vp;
2591 	bhv_desc_t		*src_bdp;
2592 	int			resblks;
2593 	char			*target_name = VNAME(dentry);
2594 	int			target_namelen;
2595 
2596 	target_dir_vp = BHV_TO_VNODE(target_dir_bdp);
2597 	vn_trace_entry(target_dir_vp, __FUNCTION__, (inst_t *)__return_address);
2598 	vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address);
2599 
2600 	target_namelen = VNAMELEN(dentry);
2601 	if (src_vp->v_type == VDIR)
2602 		return XFS_ERROR(EPERM);
2603 
2604 	/*
2605 	 * For now, manually find the XFS behavior descriptor for
2606 	 * the source vnode.  If it doesn't exist then something
2607 	 * is wrong and we should just return an error.
2608 	 * Eventually we need to figure out how link is going to
2609 	 * work in the face of stacked vnodes.
2610 	 */
2611 	src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops);
2612 	if (src_bdp == NULL) {
2613 		return XFS_ERROR(EXDEV);
2614 	}
2615 	sip = XFS_BHVTOI(src_bdp);
2616 	tdp = XFS_BHVTOI(target_dir_bdp);
2617 	mp = tdp->i_mount;
2618 	if (XFS_FORCED_SHUTDOWN(mp))
2619 		return XFS_ERROR(EIO);
2620 
2621 	if (DM_EVENT_ENABLED(src_vp->v_vfsp, tdp, DM_EVENT_LINK)) {
2622 		error = XFS_SEND_NAMESP(mp, DM_EVENT_LINK,
2623 					target_dir_vp, DM_RIGHT_NULL,
2624 					src_vp, DM_RIGHT_NULL,
2625 					target_name, NULL, 0, 0, 0);
2626 		if (error)
2627 			return error;
2628 	}
2629 
2630 	/* Return through std_return after this point. */
2631 
2632 	error = XFS_QM_DQATTACH(mp, sip, 0);
2633 	if (!error && sip != tdp)
2634 		error = XFS_QM_DQATTACH(mp, tdp, 0);
2635 	if (error)
2636 		goto std_return;
2637 
2638 	tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
2639 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2640 	resblks = XFS_LINK_SPACE_RES(mp, target_namelen);
2641 	error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0,
2642 			XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
2643 	if (error == ENOSPC) {
2644 		resblks = 0;
2645 		error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0,
2646 				XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
2647 	}
2648 	if (error) {
2649 		cancel_flags = 0;
2650 		goto error_return;
2651 	}
2652 
2653 	if (sip->i_ino < tdp->i_ino) {
2654 		ips[0] = sip;
2655 		ips[1] = tdp;
2656 	} else {
2657 		ips[0] = tdp;
2658 		ips[1] = sip;
2659 	}
2660 
2661 	xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
2662 
2663 	/*
2664 	 * Increment vnode ref counts since xfs_trans_commit &
2665 	 * xfs_trans_cancel will both unlock the inodes and
2666 	 * decrement the associated ref counts.
2667 	 */
2668 	VN_HOLD(src_vp);
2669 	VN_HOLD(target_dir_vp);
2670 	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
2671 	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
2672 
2673 	/*
2674 	 * If the source has too many links, we can't make any more to it.
2675 	 */
2676 	if (sip->i_d.di_nlink >= XFS_MAXLINK) {
2677 		error = XFS_ERROR(EMLINK);
2678 		goto error_return;
2679 	}
2680 
2681 	if (resblks == 0 &&
2682 	    (error = XFS_DIR_CANENTER(mp, tp, tdp, target_name,
2683 			target_namelen)))
2684 		goto error_return;
2685 
2686 	XFS_BMAP_INIT(&free_list, &first_block);
2687 
2688 	error = XFS_DIR_CREATENAME(mp, tp, tdp, target_name, target_namelen,
2689 				   sip->i_ino, &first_block, &free_list,
2690 				   resblks);
2691 	if (error)
2692 		goto abort_return;
2693 	xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2694 	tdp->i_gen++;
2695 	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
2696 
2697 	error = xfs_bumplink(tp, sip);
2698 	if (error) {
2699 		goto abort_return;
2700 	}
2701 
2702 	/*
2703 	 * If this is a synchronous mount, make sure that the
2704 	 * link transaction goes to disk before returning to
2705 	 * the user.
2706 	 */
2707 	if (mp->m_flags & XFS_MOUNT_WSYNC) {
2708 		xfs_trans_set_sync(tp);
2709 	}
2710 
2711 	error = xfs_bmap_finish (&tp, &free_list, first_block, &committed);
2712 	if (error) {
2713 		xfs_bmap_cancel(&free_list);
2714 		goto abort_return;
2715 	}
2716 
2717 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
2718 	if (error) {
2719 		goto std_return;
2720 	}
2721 
2722 	/* Fall through to std_return with error = 0. */
2723 std_return:
2724 	if (DM_EVENT_ENABLED(src_vp->v_vfsp, sip,
2725 						DM_EVENT_POSTLINK)) {
2726 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTLINK,
2727 				target_dir_vp, DM_RIGHT_NULL,
2728 				src_vp, DM_RIGHT_NULL,
2729 				target_name, NULL, 0, error, 0);
2730 	}
2731 	return error;
2732 
2733  abort_return:
2734 	cancel_flags |= XFS_TRANS_ABORT;
2735 	/* FALLTHROUGH */
2736  error_return:
2737 	xfs_trans_cancel(tp, cancel_flags);
2738 
2739 	goto std_return;
2740 }
2741 /*
2742  * xfs_mkdir
2743  *
2744  */
2745 STATIC int
xfs_mkdir(bhv_desc_t * dir_bdp,vname_t * dentry,vattr_t * vap,vnode_t ** vpp,cred_t * credp)2746 xfs_mkdir(
2747 	bhv_desc_t		*dir_bdp,
2748 	vname_t			*dentry,
2749 	vattr_t			*vap,
2750 	vnode_t			**vpp,
2751 	cred_t			*credp)
2752 {
2753 	char			*dir_name = VNAME(dentry);
2754 	xfs_inode_t             *dp;
2755 	xfs_inode_t		*cdp;	/* inode of created dir */
2756 	vnode_t			*cvp;	/* vnode of created dir */
2757 	xfs_trans_t		*tp;
2758 	xfs_mount_t		*mp;
2759 	int			cancel_flags;
2760 	int			error;
2761 	int			committed;
2762 	xfs_bmap_free_t         free_list;
2763 	xfs_fsblock_t           first_block;
2764 	vnode_t			*dir_vp;
2765 	boolean_t		dp_joined_to_trans;
2766 	boolean_t		created = B_FALSE;
2767 	int			dm_event_sent = 0;
2768 	xfs_prid_t		prid;
2769 	struct xfs_dquot	*udqp, *gdqp;
2770 	uint			resblks;
2771 	int			dm_di_mode;
2772 	int			dir_namelen;
2773 
2774 	dir_vp = BHV_TO_VNODE(dir_bdp);
2775 	dp = XFS_BHVTOI(dir_bdp);
2776 	mp = dp->i_mount;
2777 
2778 	if (XFS_FORCED_SHUTDOWN(mp))
2779 		return XFS_ERROR(EIO);
2780 
2781 	dir_namelen = VNAMELEN(dentry);
2782 
2783 	tp = NULL;
2784 	dp_joined_to_trans = B_FALSE;
2785 	dm_di_mode = vap->va_mode|VTTOIF(vap->va_type);
2786 
2787 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
2788 		error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
2789 					dir_vp, DM_RIGHT_NULL, NULL,
2790 					DM_RIGHT_NULL, dir_name, NULL,
2791 					dm_di_mode, 0, 0);
2792 		if (error)
2793 			return error;
2794 		dm_event_sent = 1;
2795 	}
2796 
2797 	/* Return through std_return after this point. */
2798 
2799 	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
2800 
2801 	mp = dp->i_mount;
2802 	udqp = gdqp = NULL;
2803 	if (vap->va_mask & XFS_AT_PROJID)
2804 		prid = (xfs_prid_t)vap->va_projid;
2805 	else
2806 		prid = (xfs_prid_t)dfltprid;
2807 
2808 	/*
2809 	 * Make sure that we have allocated dquot(s) on disk.
2810 	 */
2811 	error = XFS_QM_DQVOPALLOC(mp, dp,
2812 			current_fsuid(credp), current_fsgid(credp),
2813 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
2814 	if (error)
2815 		goto std_return;
2816 
2817 	tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
2818 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2819 	resblks = XFS_MKDIR_SPACE_RES(mp, dir_namelen);
2820 	error = xfs_trans_reserve(tp, resblks, XFS_MKDIR_LOG_RES(mp), 0,
2821 				  XFS_TRANS_PERM_LOG_RES, XFS_MKDIR_LOG_COUNT);
2822 	if (error == ENOSPC) {
2823 		resblks = 0;
2824 		error = xfs_trans_reserve(tp, 0, XFS_MKDIR_LOG_RES(mp), 0,
2825 					  XFS_TRANS_PERM_LOG_RES,
2826 					  XFS_MKDIR_LOG_COUNT);
2827 	}
2828 	if (error) {
2829 		cancel_flags = 0;
2830 		dp = NULL;
2831 		goto error_return;
2832 	}
2833 
2834 	xfs_ilock(dp, XFS_ILOCK_EXCL);
2835 
2836 	/*
2837 	 * Check for directory link count overflow.
2838 	 */
2839 	if (dp->i_d.di_nlink >= XFS_MAXLINK) {
2840 		error = XFS_ERROR(EMLINK);
2841 		goto error_return;
2842 	}
2843 
2844 	/*
2845 	 * Reserve disk quota and the inode.
2846 	 */
2847 	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
2848 	if (error)
2849 		goto error_return;
2850 
2851 	if (resblks == 0 &&
2852 	    (error = XFS_DIR_CANENTER(mp, tp, dp, dir_name, dir_namelen)))
2853 		goto error_return;
2854 	/*
2855 	 * create the directory inode.
2856 	 */
2857 	error = xfs_dir_ialloc(&tp, dp,
2858 			MAKEIMODE(vap->va_type,vap->va_mode), 2,
2859 			0, credp, prid, resblks > 0,
2860 		&cdp, NULL);
2861 	if (error) {
2862 		if (error == ENOSPC)
2863 			goto error_return;
2864 		goto abort_return;
2865 	}
2866 	ITRACE(cdp);
2867 
2868 	/*
2869 	 * Now we add the directory inode to the transaction.
2870 	 * We waited until now since xfs_dir_ialloc might start
2871 	 * a new transaction.  Had we joined the transaction
2872 	 * earlier, the locks might have gotten released.
2873 	 */
2874 	VN_HOLD(dir_vp);
2875 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2876 	dp_joined_to_trans = B_TRUE;
2877 
2878 	XFS_BMAP_INIT(&free_list, &first_block);
2879 
2880 	error = XFS_DIR_CREATENAME(mp, tp, dp, dir_name, dir_namelen,
2881 			cdp->i_ino, &first_block, &free_list,
2882 			resblks ? resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
2883 	if (error) {
2884 		ASSERT(error != ENOSPC);
2885 		goto error1;
2886 	}
2887 	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2888 
2889 	/*
2890 	 * Bump the in memory version number of the parent directory
2891 	 * so that other processes accessing it will recognize that
2892 	 * the directory has changed.
2893 	 */
2894 	dp->i_gen++;
2895 
2896 	error = XFS_DIR_INIT(mp, tp, cdp, dp);
2897 	if (error) {
2898 		goto error2;
2899 	}
2900 
2901 	cdp->i_gen = 1;
2902 	error = xfs_bumplink(tp, dp);
2903 	if (error) {
2904 		goto error2;
2905 	}
2906 
2907 	cvp = XFS_ITOV(cdp);
2908 
2909 	created = B_TRUE;
2910 
2911 	*vpp = cvp;
2912 	IHOLD(cdp);
2913 
2914 	/*
2915 	 * Attach the dquots to the new inode and modify the icount incore.
2916 	 */
2917 	XFS_QM_DQVOPCREATE(mp, tp, cdp, udqp, gdqp);
2918 
2919 	/*
2920 	 * If this is a synchronous mount, make sure that the
2921 	 * mkdir transaction goes to disk before returning to
2922 	 * the user.
2923 	 */
2924 	if (mp->m_flags & XFS_MOUNT_WSYNC) {
2925 		xfs_trans_set_sync(tp);
2926 	}
2927 
2928 	error = xfs_bmap_finish(&tp, &free_list, first_block, &committed);
2929 	if (error) {
2930 		IRELE(cdp);
2931 		goto error2;
2932 	}
2933 
2934 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
2935 	XFS_QM_DQRELE(mp, udqp);
2936 	XFS_QM_DQRELE(mp, gdqp);
2937 	if (error) {
2938 		IRELE(cdp);
2939 	}
2940 
2941 	/* Fall through to std_return with error = 0 or errno from
2942 	 * xfs_trans_commit. */
2943 
2944 std_return:
2945 	if ( (created || (error != 0 && dm_event_sent != 0)) &&
2946 			DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp),
2947 						DM_EVENT_POSTCREATE)) {
2948 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTCREATE,
2949 					dir_vp, DM_RIGHT_NULL,
2950 					created ? XFS_ITOV(cdp):NULL,
2951 					DM_RIGHT_NULL,
2952 					dir_name, NULL,
2953 					dm_di_mode, error, 0);
2954 	}
2955 	return error;
2956 
2957  error2:
2958  error1:
2959 	xfs_bmap_cancel(&free_list);
2960  abort_return:
2961 	cancel_flags |= XFS_TRANS_ABORT;
2962  error_return:
2963 	xfs_trans_cancel(tp, cancel_flags);
2964 	XFS_QM_DQRELE(mp, udqp);
2965 	XFS_QM_DQRELE(mp, gdqp);
2966 
2967 	if (!dp_joined_to_trans && (dp != NULL)) {
2968 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
2969 	}
2970 
2971 	goto std_return;
2972 }
2973 
2974 
2975 /*
2976  * xfs_rmdir
2977  *
2978  */
2979 STATIC int
xfs_rmdir(bhv_desc_t * dir_bdp,vname_t * dentry,cred_t * credp)2980 xfs_rmdir(
2981 	bhv_desc_t		*dir_bdp,
2982 	vname_t			*dentry,
2983 	cred_t			*credp)
2984 {
2985 	char			*name = VNAME(dentry);
2986 	xfs_inode_t             *dp;
2987 	xfs_inode_t             *cdp;   /* child directory */
2988 	xfs_trans_t             *tp;
2989 	xfs_mount_t		*mp;
2990 	int                     error;
2991 	xfs_bmap_free_t         free_list;
2992 	xfs_fsblock_t           first_block;
2993 	int			cancel_flags;
2994 	int			committed;
2995 	vnode_t			*dir_vp;
2996 	int			dm_di_mode = 0;
2997 	int			last_cdp_link;
2998 	int			namelen;
2999 	uint			resblks;
3000 
3001 	dir_vp = BHV_TO_VNODE(dir_bdp);
3002 	dp = XFS_BHVTOI(dir_bdp);
3003 	mp = dp->i_mount;
3004 
3005 	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
3006 
3007 	if (XFS_FORCED_SHUTDOWN(XFS_BHVTOI(dir_bdp)->i_mount))
3008 		return XFS_ERROR(EIO);
3009 	namelen = VNAMELEN(dentry);
3010 
3011 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_REMOVE)) {
3012 		error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
3013 					dir_vp, DM_RIGHT_NULL,
3014 					NULL, DM_RIGHT_NULL,
3015 					name, NULL, 0, 0, 0);
3016 		if (error)
3017 			return XFS_ERROR(error);
3018 	}
3019 
3020 	/* Return through std_return after this point. */
3021 
3022 	cdp = NULL;
3023 
3024 	/*
3025 	 * We need to get a reference to cdp before we get our log
3026 	 * reservation.  The reason for this is that we cannot call
3027 	 * xfs_iget for an inode for which we do not have a reference
3028 	 * once we've acquired a log reservation.  This is because the
3029 	 * inode we are trying to get might be in xfs_inactive going
3030 	 * for a log reservation.  Since we'll have to wait for the
3031 	 * inactive code to complete before returning from xfs_iget,
3032 	 * we need to make sure that we don't have log space reserved
3033 	 * when we call xfs_iget.  Instead we get an unlocked referece
3034 	 * to the inode before getting our log reservation.
3035 	 */
3036 	error = xfs_get_dir_entry(dentry, &cdp);
3037 	if (error) {
3038 		REMOVE_DEBUG_TRACE(__LINE__);
3039 		goto std_return;
3040 	}
3041 	mp = dp->i_mount;
3042 	dm_di_mode = cdp->i_d.di_mode;
3043 
3044 	/*
3045 	 * Get the dquots for the inodes.
3046 	 */
3047 	error = XFS_QM_DQATTACH(mp, dp, 0);
3048 	if (!error && dp != cdp)
3049 		error = XFS_QM_DQATTACH(mp, cdp, 0);
3050 	if (error) {
3051 		IRELE(cdp);
3052 		REMOVE_DEBUG_TRACE(__LINE__);
3053 		goto std_return;
3054 	}
3055 
3056 	tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
3057 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
3058 	/*
3059 	 * We try to get the real space reservation first,
3060 	 * allowing for directory btree deletion(s) implying
3061 	 * possible bmap insert(s).  If we can't get the space
3062 	 * reservation then we use 0 instead, and avoid the bmap
3063 	 * btree insert(s) in the directory code by, if the bmap
3064 	 * insert tries to happen, instead trimming the LAST
3065 	 * block from the directory.
3066 	 */
3067 	resblks = XFS_REMOVE_SPACE_RES(mp);
3068 	error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
3069 			XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
3070 	if (error == ENOSPC) {
3071 		resblks = 0;
3072 		error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
3073 				XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
3074 	}
3075 	if (error) {
3076 		ASSERT(error != ENOSPC);
3077 		cancel_flags = 0;
3078 		IRELE(cdp);
3079 		goto error_return;
3080 	}
3081 	XFS_BMAP_INIT(&free_list, &first_block);
3082 
3083 	/*
3084 	 * Now lock the child directory inode and the parent directory
3085 	 * inode in the proper order.  This will take care of validating
3086 	 * that the directory entry for the child directory inode has
3087 	 * not changed while we were obtaining a log reservation.
3088 	 */
3089 	error = xfs_lock_dir_and_entry(dp, dentry, cdp);
3090 	if (error) {
3091 		xfs_trans_cancel(tp, cancel_flags);
3092 		IRELE(cdp);
3093 		goto std_return;
3094 	}
3095 
3096 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
3097 	if (dp != cdp) {
3098 		/*
3099 		 * Only increment the parent directory vnode count if
3100 		 * we didn't bump it in looking up cdp.  The only time
3101 		 * we don't bump it is when we're looking up ".".
3102 		 */
3103 		VN_HOLD(dir_vp);
3104 	}
3105 
3106 	ITRACE(cdp);
3107 	xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
3108 
3109 	ASSERT(cdp->i_d.di_nlink >= 2);
3110 	if (cdp->i_d.di_nlink != 2) {
3111 		error = XFS_ERROR(ENOTEMPTY);
3112 		goto error_return;
3113 	}
3114 	if (!XFS_DIR_ISEMPTY(mp, cdp)) {
3115 		error = XFS_ERROR(ENOTEMPTY);
3116 		goto error_return;
3117 	}
3118 
3119 	error = XFS_DIR_REMOVENAME(mp, tp, dp, name, namelen, cdp->i_ino,
3120 		&first_block, &free_list, resblks);
3121 	if (error) {
3122 		goto error1;
3123 	}
3124 
3125 	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
3126 
3127 	/*
3128 	 * Bump the in memory generation count on the parent
3129 	 * directory so that other can know that it has changed.
3130 	 */
3131 	dp->i_gen++;
3132 
3133 	/*
3134 	 * Drop the link from cdp's "..".
3135 	 */
3136 	error = xfs_droplink(tp, dp);
3137 	if (error) {
3138 		goto error1;
3139 	}
3140 
3141 	/*
3142 	 * Drop the link from dp to cdp.
3143 	 */
3144 	error = xfs_droplink(tp, cdp);
3145 	if (error) {
3146 		goto error1;
3147 	}
3148 
3149 	/*
3150 	 * Drop the "." link from cdp to self.
3151 	 */
3152 	error = xfs_droplink(tp, cdp);
3153 	if (error) {
3154 		goto error1;
3155 	}
3156 
3157 	/* Determine these before committing transaction */
3158 	last_cdp_link = (cdp)->i_d.di_nlink==0;
3159 
3160 	/*
3161 	 * Take an extra ref on the child vnode so that it
3162 	 * does not go to xfs_inactive() from within the commit.
3163 	 */
3164 	IHOLD(cdp);
3165 
3166 	/*
3167 	 * If this is a synchronous mount, make sure that the
3168 	 * rmdir transaction goes to disk before returning to
3169 	 * the user.
3170 	 */
3171 	if (mp->m_flags & XFS_MOUNT_WSYNC) {
3172 		xfs_trans_set_sync(tp);
3173 	}
3174 
3175 	error = xfs_bmap_finish (&tp, &free_list, first_block, &committed);
3176 	if (error) {
3177 		xfs_bmap_cancel(&free_list);
3178 		xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
3179 				 XFS_TRANS_ABORT));
3180 		IRELE(cdp);
3181 		goto std_return;
3182 	}
3183 
3184 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
3185 	if (error) {
3186 		IRELE(cdp);
3187 		goto std_return;
3188 	}
3189 
3190 
3191 	/*
3192 	 * Let interposed file systems know about removed links.
3193 	 */
3194 	VOP_LINK_REMOVED(XFS_ITOV(cdp), dir_vp, last_cdp_link);
3195 
3196 	IRELE(cdp);
3197 
3198 	/* Fall through to std_return with error = 0 or the errno
3199 	 * from xfs_trans_commit. */
3200 std_return:
3201 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_POSTREMOVE)) {
3202 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
3203 					dir_vp, DM_RIGHT_NULL,
3204 					NULL, DM_RIGHT_NULL,
3205 					name, NULL, dm_di_mode,
3206 					error, 0);
3207 	}
3208 	return error;
3209 
3210  error1:
3211 	xfs_bmap_cancel(&free_list);
3212 	cancel_flags |= XFS_TRANS_ABORT;
3213  error_return:
3214 	xfs_trans_cancel(tp, cancel_flags);
3215 	goto std_return;
3216 }
3217 
3218 
3219 /*
3220  * xfs_readdir
3221  *
3222  * Read dp's entries starting at uiop->uio_offset and translate them into
3223  * bufsize bytes worth of struct dirents starting at bufbase.
3224  */
3225 STATIC int
xfs_readdir(bhv_desc_t * dir_bdp,uio_t * uiop,cred_t * credp,int * eofp)3226 xfs_readdir(
3227 	bhv_desc_t	*dir_bdp,
3228 	uio_t		*uiop,
3229 	cred_t		*credp,
3230 	int		*eofp)
3231 {
3232 	xfs_inode_t	*dp;
3233 	xfs_trans_t	*tp = NULL;
3234 	int		error = 0;
3235 	uint		lock_mode;
3236 	xfs_off_t	start_offset;
3237 
3238 	vn_trace_entry(BHV_TO_VNODE(dir_bdp), __FUNCTION__,
3239 					       (inst_t *)__return_address);
3240 	dp = XFS_BHVTOI(dir_bdp);
3241 
3242 	if (XFS_FORCED_SHUTDOWN(dp->i_mount)) {
3243 		return XFS_ERROR(EIO);
3244 	}
3245 
3246 	lock_mode = xfs_ilock_map_shared(dp);
3247 	start_offset = uiop->uio_offset;
3248 	error = XFS_DIR_GETDENTS(dp->i_mount, tp, dp, uiop, eofp);
3249 	if (start_offset != uiop->uio_offset) {
3250 		xfs_ichgtime(dp, XFS_ICHGTIME_ACC);
3251 	}
3252 	xfs_iunlock_map_shared(dp, lock_mode);
3253 	return error;
3254 }
3255 
3256 
3257 /*
3258  * xfs_symlink
3259  *
3260  */
3261 STATIC int
xfs_symlink(bhv_desc_t * dir_bdp,vname_t * dentry,vattr_t * vap,char * target_path,vnode_t ** vpp,cred_t * credp)3262 xfs_symlink(
3263 	bhv_desc_t		*dir_bdp,
3264 	vname_t			*dentry,
3265 	vattr_t			*vap,
3266 	char			*target_path,
3267 	vnode_t			**vpp,
3268 	cred_t			*credp)
3269 {
3270 	xfs_trans_t		*tp;
3271 	xfs_mount_t		*mp;
3272 	xfs_inode_t		*dp;
3273 	xfs_inode_t		*ip;
3274 	int			error;
3275 	int			pathlen;
3276 	xfs_bmap_free_t		free_list;
3277 	xfs_fsblock_t		first_block;
3278 	boolean_t		dp_joined_to_trans;
3279 	vnode_t			*dir_vp;
3280 	uint			cancel_flags;
3281 	int			committed;
3282 	xfs_fileoff_t		first_fsb;
3283 	xfs_filblks_t		fs_blocks;
3284 	int			nmaps;
3285 	xfs_bmbt_irec_t		mval[SYMLINK_MAPS];
3286 	xfs_daddr_t		d;
3287 	char			*cur_chunk;
3288 	int			byte_cnt;
3289 	int			n;
3290 	xfs_buf_t		*bp;
3291 	xfs_prid_t		prid;
3292 	struct xfs_dquot	*udqp, *gdqp;
3293 	uint			resblks;
3294 	char			*link_name = VNAME(dentry);
3295 	int			link_namelen;
3296 
3297 	*vpp = NULL;
3298 	dir_vp = BHV_TO_VNODE(dir_bdp);
3299 	dp = XFS_BHVTOI(dir_bdp);
3300 	dp_joined_to_trans = B_FALSE;
3301 	error = 0;
3302 	ip = NULL;
3303 	tp = NULL;
3304 
3305 	vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address);
3306 
3307 	mp = dp->i_mount;
3308 
3309 	if (XFS_FORCED_SHUTDOWN(mp))
3310 		return XFS_ERROR(EIO);
3311 
3312 	link_namelen = VNAMELEN(dentry);
3313 
3314 	/*
3315 	 * Check component lengths of the target path name.
3316 	 */
3317 	pathlen = strlen(target_path);
3318 	if (pathlen >= MAXPATHLEN)      /* total string too long */
3319 		return XFS_ERROR(ENAMETOOLONG);
3320 	if (pathlen >= MAXNAMELEN) {    /* is any component too long? */
3321 		int len, total;
3322 		char *path;
3323 
3324 		for(total = 0, path = target_path; total < pathlen;) {
3325 			/*
3326 			 * Skip any slashes.
3327 			 */
3328 			while(*path == '/') {
3329 				total++;
3330 				path++;
3331 			}
3332 
3333 			/*
3334 			 * Count up to the next slash or end of path.
3335 			 * Error out if the component is bigger than MAXNAMELEN.
3336 			 */
3337 			for(len = 0; *path != '/' && total < pathlen;total++, path++) {
3338 				if (++len >= MAXNAMELEN) {
3339 					error = ENAMETOOLONG;
3340 					return error;
3341 				}
3342 			}
3343 		}
3344 	}
3345 
3346 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_SYMLINK)) {
3347 		error = XFS_SEND_NAMESP(mp, DM_EVENT_SYMLINK, dir_vp,
3348 					DM_RIGHT_NULL, NULL, DM_RIGHT_NULL,
3349 					link_name, target_path, 0, 0, 0);
3350 		if (error)
3351 			return error;
3352 	}
3353 
3354 	/* Return through std_return after this point. */
3355 
3356 	udqp = gdqp = NULL;
3357 	if (vap->va_mask & XFS_AT_PROJID)
3358 		prid = (xfs_prid_t)vap->va_projid;
3359 	else
3360 		prid = (xfs_prid_t)dfltprid;
3361 
3362 	/*
3363 	 * Make sure that we have allocated dquot(s) on disk.
3364 	 */
3365 	error = XFS_QM_DQVOPALLOC(mp, dp,
3366 			current_fsuid(credp), current_fsgid(credp),
3367 			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
3368 	if (error)
3369 		goto std_return;
3370 
3371 	tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
3372 	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
3373 	/*
3374 	 * The symlink will fit into the inode data fork?
3375 	 * There can't be any attributes so we get the whole variable part.
3376 	 */
3377 	if (pathlen <= XFS_LITINO(mp))
3378 		fs_blocks = 0;
3379 	else
3380 		fs_blocks = XFS_B_TO_FSB(mp, pathlen);
3381 	resblks = XFS_SYMLINK_SPACE_RES(mp, link_namelen, fs_blocks);
3382 	error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
3383 			XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
3384 	if (error == ENOSPC && fs_blocks == 0) {
3385 		resblks = 0;
3386 		error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0,
3387 				XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
3388 	}
3389 	if (error) {
3390 		cancel_flags = 0;
3391 		dp = NULL;
3392 		goto error_return;
3393 	}
3394 
3395 	xfs_ilock(dp, XFS_ILOCK_EXCL);
3396 
3397 	/*
3398 	 * Check whether the directory allows new symlinks or not.
3399 	 */
3400 	if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
3401 		error = XFS_ERROR(EPERM);
3402 		goto error_return;
3403 	}
3404 
3405 	/*
3406 	 * Reserve disk quota : blocks and inode.
3407 	 */
3408 	error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
3409 	if (error)
3410 		goto error_return;
3411 
3412 	/*
3413 	 * Check for ability to enter directory entry, if no space reserved.
3414 	 */
3415 	if (resblks == 0 &&
3416 	    (error = XFS_DIR_CANENTER(mp, tp, dp, link_name, link_namelen)))
3417 		goto error_return;
3418 	/*
3419 	 * Initialize the bmap freelist prior to calling either
3420 	 * bmapi or the directory create code.
3421 	 */
3422 	XFS_BMAP_INIT(&free_list, &first_block);
3423 
3424 	/*
3425 	 * Allocate an inode for the symlink.
3426 	 */
3427 	error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (vap->va_mode&~S_IFMT),
3428 			       1, 0, credp, prid, resblks > 0, &ip, NULL);
3429 	if (error) {
3430 		if (error == ENOSPC)
3431 			goto error_return;
3432 		goto error1;
3433 	}
3434 	ITRACE(ip);
3435 
3436 	VN_HOLD(dir_vp);
3437 	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
3438 	dp_joined_to_trans = B_TRUE;
3439 
3440 	/*
3441 	 * Also attach the dquot(s) to it, if applicable.
3442 	 */
3443 	XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
3444 
3445 	if (resblks)
3446 		resblks -= XFS_IALLOC_SPACE_RES(mp);
3447 	/*
3448 	 * If the symlink will fit into the inode, write it inline.
3449 	 */
3450 	if (pathlen <= XFS_IFORK_DSIZE(ip)) {
3451 		xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK);
3452 		memcpy(ip->i_df.if_u1.if_data, target_path, pathlen);
3453 		ip->i_d.di_size = pathlen;
3454 
3455 		/*
3456 		 * The inode was initially created in extent format.
3457 		 */
3458 		ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT);
3459 		ip->i_df.if_flags |= XFS_IFINLINE;
3460 
3461 		ip->i_d.di_format = XFS_DINODE_FMT_LOCAL;
3462 		xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
3463 
3464 	} else {
3465 		first_fsb = 0;
3466 		nmaps = SYMLINK_MAPS;
3467 
3468 		error = xfs_bmapi(tp, ip, first_fsb, fs_blocks,
3469 				  XFS_BMAPI_WRITE | XFS_BMAPI_METADATA,
3470 				  &first_block, resblks, mval, &nmaps,
3471 				  &free_list);
3472 		if (error) {
3473 			goto error1;
3474 		}
3475 
3476 		if (resblks)
3477 			resblks -= fs_blocks;
3478 		ip->i_d.di_size = pathlen;
3479 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3480 
3481 		cur_chunk = target_path;
3482 		for (n = 0; n < nmaps; n++) {
3483 			d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
3484 			byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
3485 			bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
3486 					       BTOBB(byte_cnt), 0);
3487 			ASSERT(bp && !XFS_BUF_GETERROR(bp));
3488 			if (pathlen < byte_cnt) {
3489 				byte_cnt = pathlen;
3490 			}
3491 			pathlen -= byte_cnt;
3492 
3493 			memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt);
3494 			cur_chunk += byte_cnt;
3495 
3496 			xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1);
3497 		}
3498 	}
3499 
3500 	/*
3501 	 * Create the directory entry for the symlink.
3502 	 */
3503 	error = XFS_DIR_CREATENAME(mp, tp, dp, link_name, link_namelen,
3504 			ip->i_ino, &first_block, &free_list, resblks);
3505 	if (error) {
3506 		goto error1;
3507 	}
3508 	xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
3509 	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
3510 
3511 	/*
3512 	 * Bump the in memory version number of the parent directory
3513 	 * so that other processes accessing it will recognize that
3514 	 * the directory has changed.
3515 	 */
3516 	dp->i_gen++;
3517 
3518 	/*
3519 	 * If this is a synchronous mount, make sure that the
3520 	 * symlink transaction goes to disk before returning to
3521 	 * the user.
3522 	 */
3523 	if (mp->m_flags & XFS_MOUNT_WSYNC) {
3524 		xfs_trans_set_sync(tp);
3525 	}
3526 
3527 	/*
3528 	 * xfs_trans_commit normally decrements the vnode ref count
3529 	 * when it unlocks the inode. Since we want to return the
3530 	 * vnode to the caller, we bump the vnode ref count now.
3531 	 */
3532 	IHOLD(ip);
3533 
3534 	error = xfs_bmap_finish(&tp, &free_list, first_block, &committed);
3535 	if (error) {
3536 		goto error2;
3537 	}
3538 	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
3539 	XFS_QM_DQRELE(mp, udqp);
3540 	XFS_QM_DQRELE(mp, gdqp);
3541 
3542 	/* Fall through to std_return with error = 0 or errno from
3543 	 * xfs_trans_commit	*/
3544 std_return:
3545 	if (DM_EVENT_ENABLED(dir_vp->v_vfsp, XFS_BHVTOI(dir_bdp),
3546 			     DM_EVENT_POSTSYMLINK)) {
3547 		(void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTSYMLINK,
3548 					dir_vp, DM_RIGHT_NULL,
3549 					error ? NULL : XFS_ITOV(ip),
3550 					DM_RIGHT_NULL, link_name, target_path,
3551 					0, error, 0);
3552 	}
3553 
3554 	if (!error) {
3555 		vnode_t *vp;
3556 
3557 		ASSERT(ip);
3558 		vp = XFS_ITOV(ip);
3559 		*vpp = vp;
3560 	}
3561 	return error;
3562 
3563  error2:
3564 	IRELE(ip);
3565  error1:
3566 	xfs_bmap_cancel(&free_list);
3567 	cancel_flags |= XFS_TRANS_ABORT;
3568  error_return:
3569 	xfs_trans_cancel(tp, cancel_flags);
3570 	XFS_QM_DQRELE(mp, udqp);
3571 	XFS_QM_DQRELE(mp, gdqp);
3572 
3573 	if (!dp_joined_to_trans && (dp != NULL)) {
3574 		xfs_iunlock(dp, XFS_ILOCK_EXCL);
3575 	}
3576 
3577 	goto std_return;
3578 }
3579 
3580 
3581 /*
3582  * xfs_fid2
3583  *
3584  * A fid routine that takes a pointer to a previously allocated
3585  * fid structure (like xfs_fast_fid) but uses a 64 bit inode number.
3586  */
3587 STATIC int
xfs_fid2(bhv_desc_t * bdp,fid_t * fidp)3588 xfs_fid2(
3589 	bhv_desc_t	*bdp,
3590 	fid_t		*fidp)
3591 {
3592 	xfs_inode_t	*ip;
3593 	xfs_fid2_t	*xfid;
3594 
3595 	vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__,
3596 				       (inst_t *)__return_address);
3597 	ASSERT(sizeof(fid_t) >= sizeof(xfs_fid2_t));
3598 
3599 	xfid = (xfs_fid2_t *)fidp;
3600 	ip = XFS_BHVTOI(bdp);
3601 	xfid->fid_len = sizeof(xfs_fid2_t) - sizeof(xfid->fid_len);
3602 	xfid->fid_pad = 0;
3603 	/*
3604 	 * use memcpy because the inode is a long long and there's no
3605 	 * assurance that xfid->fid_ino is properly aligned.
3606 	 */
3607 	memcpy(&xfid->fid_ino, &ip->i_ino, sizeof(xfid->fid_ino));
3608 	xfid->fid_gen = ip->i_d.di_gen;
3609 
3610 	return 0;
3611 }
3612 
3613 
3614 /*
3615  * xfs_rwlock
3616  */
3617 int
xfs_rwlock(bhv_desc_t * bdp,vrwlock_t locktype)3618 xfs_rwlock(
3619 	bhv_desc_t	*bdp,
3620 	vrwlock_t	locktype)
3621 {
3622 	xfs_inode_t	*ip;
3623 	vnode_t		*vp;
3624 
3625 	vp = BHV_TO_VNODE(bdp);
3626 	if (vp->v_type == VDIR)
3627 		return 1;
3628 	ip = XFS_BHVTOI(bdp);
3629 	if (locktype == VRWLOCK_WRITE) {
3630 		xfs_ilock(ip, XFS_IOLOCK_EXCL);
3631 	} else if (locktype == VRWLOCK_TRY_READ) {
3632 		return (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED));
3633 	} else if (locktype == VRWLOCK_TRY_WRITE) {
3634 		return (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL));
3635 	} else {
3636 		ASSERT((locktype == VRWLOCK_READ) ||
3637 		       (locktype == VRWLOCK_WRITE_DIRECT));
3638 		xfs_ilock(ip, XFS_IOLOCK_SHARED);
3639 	}
3640 
3641 	return 1;
3642 }
3643 
3644 
3645 /*
3646  * xfs_rwunlock
3647  */
3648 void
xfs_rwunlock(bhv_desc_t * bdp,vrwlock_t locktype)3649 xfs_rwunlock(
3650 	bhv_desc_t	*bdp,
3651 	vrwlock_t	locktype)
3652 {
3653 	xfs_inode_t     *ip;
3654 	vnode_t		*vp;
3655 
3656 	vp = BHV_TO_VNODE(bdp);
3657 	if (vp->v_type == VDIR)
3658 		return;
3659 	ip = XFS_BHVTOI(bdp);
3660 	if (locktype == VRWLOCK_WRITE) {
3661 		/*
3662 		 * In the write case, we may have added a new entry to
3663 		 * the reference cache.  This might store a pointer to
3664 		 * an inode to be released in this inode.  If it is there,
3665 		 * clear the pointer and release the inode after unlocking
3666 		 * this one.
3667 		 */
3668 		xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL);
3669 	} else {
3670 		ASSERT((locktype == VRWLOCK_READ) ||
3671 		       (locktype == VRWLOCK_WRITE_DIRECT));
3672 		xfs_iunlock(ip, XFS_IOLOCK_SHARED);
3673 	}
3674 	return;
3675 }
3676 
3677 STATIC int
xfs_inode_flush(bhv_desc_t * bdp,int flags)3678 xfs_inode_flush(
3679 	bhv_desc_t	*bdp,
3680 	int		flags)
3681 {
3682 	xfs_inode_t	*ip;
3683 	xfs_mount_t	*mp;
3684 	int		error = 0;
3685 
3686 	ip = XFS_BHVTOI(bdp);
3687 	mp = ip->i_mount;
3688 
3689 	if (XFS_FORCED_SHUTDOWN(mp))
3690 		return XFS_ERROR(EIO);
3691 
3692 	/* Bypass inodes which have already been cleaned by
3693 	 * the inode flush clustering code inside xfs_iflush
3694 	 */
3695 	if ((ip->i_update_core == 0) &&
3696 	    ((ip->i_itemp == NULL) ||
3697 	     !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)))
3698 		return 0;
3699 
3700 	if (flags & FLUSH_LOG) {
3701 		xfs_inode_log_item_t *iip = ip->i_itemp;
3702 
3703 		if (iip && iip->ili_last_lsn) {
3704 			xlog_t	*log = mp->m_log;
3705 			xfs_lsn_t	sync_lsn;
3706 			int		s, log_flags = XFS_LOG_FORCE;
3707 
3708 			s = GRANT_LOCK(log);
3709 			sync_lsn = log->l_last_sync_lsn;
3710 			GRANT_UNLOCK(log, s);
3711 
3712 			if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0))
3713 				return 0;
3714 
3715 			if (flags & FLUSH_SYNC)
3716 				log_flags |= XFS_LOG_SYNC;
3717 			return xfs_log_force(mp, iip->ili_last_lsn,
3718 						log_flags);
3719 		}
3720 	}
3721 
3722 	/* We make this non-blocking if the inode is contended,
3723 	 * return EAGAIN to indicate to the caller that they
3724 	 * did not succeed. This prevents the flush path from
3725 	 * blocking on inodes inside another operation right
3726 	 * now, they get caught later by xfs_sync.
3727 	 */
3728 	if (flags & FLUSH_INODE) {
3729 		int	flush_flags;
3730 
3731 		if (xfs_ipincount(ip))
3732 			return EAGAIN;
3733 
3734 		if (flags & FLUSH_SYNC) {
3735 			xfs_ilock(ip, XFS_ILOCK_SHARED);
3736 			xfs_iflock(ip);
3737 		} else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
3738 			if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
3739 				xfs_iunlock(ip, XFS_ILOCK_SHARED);
3740 				return EAGAIN;
3741 			}
3742 		} else {
3743 			return EAGAIN;
3744 		}
3745 
3746 		if (flags & FLUSH_SYNC)
3747 			flush_flags = XFS_IFLUSH_SYNC;
3748 		else
3749 			flush_flags = XFS_IFLUSH_ASYNC;
3750 
3751 		error = xfs_iflush(ip, flush_flags);
3752 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
3753 	}
3754 
3755 	return error;
3756 }
3757 
3758 
3759 int
xfs_set_dmattrs(bhv_desc_t * bdp,u_int evmask,u_int16_t state,cred_t * credp)3760 xfs_set_dmattrs (
3761 	bhv_desc_t	*bdp,
3762 	u_int		evmask,
3763 	u_int16_t	state,
3764 	cred_t		*credp)
3765 {
3766 	xfs_inode_t     *ip;
3767 	xfs_trans_t	*tp;
3768 	xfs_mount_t	*mp;
3769 	int		error;
3770 
3771 	if (!capable(CAP_SYS_ADMIN))
3772 		return XFS_ERROR(EPERM);
3773 
3774 	ip = XFS_BHVTOI(bdp);
3775 	mp = ip->i_mount;
3776 
3777 	if (XFS_FORCED_SHUTDOWN(mp))
3778 		return XFS_ERROR(EIO);
3779 
3780 	tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
3781 	error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0);
3782 	if (error) {
3783 		xfs_trans_cancel(tp, 0);
3784 		return error;
3785 	}
3786 	xfs_ilock(ip, XFS_ILOCK_EXCL);
3787 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
3788 
3789 	ip->i_iocore.io_dmevmask = ip->i_d.di_dmevmask = evmask;
3790 	ip->i_iocore.io_dmstate  = ip->i_d.di_dmstate  = state;
3791 
3792 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
3793 	IHOLD(ip);
3794 	error = xfs_trans_commit(tp, 0, NULL);
3795 
3796 	return error;
3797 }
3798 
3799 
3800 /*
3801  * xfs_reclaim
3802  */
3803 STATIC int
xfs_reclaim(bhv_desc_t * bdp)3804 xfs_reclaim(
3805 	bhv_desc_t	*bdp)
3806 {
3807 	xfs_inode_t	*ip;
3808 	vnode_t		*vp;
3809 
3810 	vp = BHV_TO_VNODE(bdp);
3811 	ip = XFS_BHVTOI(bdp);
3812 
3813 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
3814 
3815 	ASSERT(!VN_MAPPED(vp));
3816 
3817 	/* bad inode, get out here ASAP */
3818 	if (VN_BAD(vp)) {
3819 		xfs_ireclaim(ip);
3820 		return 0;
3821 	}
3822 
3823 	if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
3824 		if (ip->i_d.di_size > 0) {
3825 			/*
3826 			 * Flush and invalidate any data left around that is
3827 			 * a part of this file.
3828 			 *
3829 			 * Get the inode's i/o lock so that buffers are pushed
3830 			 * out while holding the proper lock.  We can't hold
3831 			 * the inode lock here since flushing out buffers may
3832 			 * cause us to try to get the lock in xfs_strategy().
3833 			 *
3834 			 * We don't have to call remapf() here, because there
3835 			 * cannot be any mapped file references to this vnode
3836 			 * since it is being reclaimed.
3837 			 */
3838 			xfs_ilock(ip, XFS_IOLOCK_EXCL);
3839 
3840 			/*
3841 			 * If we hit an IO error, we need to make sure that the
3842 			 * buffer and page caches of file data for
3843 			 * the file are tossed away. We don't want to use
3844 			 * VOP_FLUSHINVAL_PAGES here because we don't want dirty
3845 			 * pages to stay attached to the vnode, but be
3846 			 * marked P_BAD. pdflush/vnode_pagebad
3847 			 * hates that.
3848 			 */
3849 			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
3850 				VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_NONE);
3851 			} else {
3852 				VOP_TOSS_PAGES(vp, 0, -1, FI_NONE);
3853 			}
3854 
3855 			ASSERT(VN_CACHED(vp) == 0);
3856 			ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) ||
3857 			       ip->i_delayed_blks == 0);
3858 			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
3859 		} else if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
3860 			/*
3861 			 * di_size field may not be quite accurate if we're
3862 			 * shutting down.
3863 			 */
3864 			VOP_TOSS_PAGES(vp, 0, -1, FI_NONE);
3865 			ASSERT(VN_CACHED(vp) == 0);
3866 		}
3867 	}
3868 
3869 	/* If we have nothing to flush with this inode then complete the
3870 	 * teardown now, otherwise break the link between the xfs inode
3871 	 * and the linux inode and clean up the xfs inode later. This
3872 	 * avoids flushing the inode to disk during the delete operation
3873 	 * itself.
3874 	 */
3875 	if (!ip->i_update_core && (ip->i_itemp == NULL)) {
3876 		xfs_ilock(ip, XFS_ILOCK_EXCL);
3877 		xfs_iflock(ip);
3878 		return xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_SYNC);
3879 	} else {
3880 		xfs_mount_t	*mp = ip->i_mount;
3881 
3882 		/* Protect sync from us */
3883 		XFS_MOUNT_ILOCK(mp);
3884 		vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
3885 		list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
3886 		ip->i_flags |= XFS_IRECLAIMABLE;
3887 		XFS_MOUNT_IUNLOCK(mp);
3888 	}
3889 	return 0;
3890 }
3891 
3892 int
xfs_finish_reclaim(xfs_inode_t * ip,int locked,int sync_mode)3893 xfs_finish_reclaim(
3894 	xfs_inode_t	*ip,
3895 	int		locked,
3896 	int		sync_mode)
3897 {
3898 	xfs_ihash_t	*ih = ip->i_hash;
3899 	vnode_t		*vp = XFS_ITOV_NULL(ip);
3900 	int		error;
3901 
3902 	if (vp && VN_BAD(vp))
3903 		goto reclaim;
3904 
3905 	/* The hash lock here protects a thread in xfs_iget_core from
3906 	 * racing with us on linking the inode back with a vnode.
3907 	 * Once we have the XFS_IRECLAIM flag set it will not touch
3908 	 * us.
3909 	 */
3910 	write_lock(&ih->ih_lock);
3911 	if ((ip->i_flags & XFS_IRECLAIM) ||
3912 	    (!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) {
3913 		write_unlock(&ih->ih_lock);
3914 		if (locked) {
3915 			xfs_ifunlock(ip);
3916 			xfs_iunlock(ip, XFS_ILOCK_EXCL);
3917 		}
3918 		return(1);
3919 	}
3920 	ip->i_flags |= XFS_IRECLAIM;
3921 	write_unlock(&ih->ih_lock);
3922 
3923 	/*
3924 	 * If the inode is still dirty, then flush it out.  If the inode
3925 	 * is not in the AIL, then it will be OK to flush it delwri as
3926 	 * long as xfs_iflush() does not keep any references to the inode.
3927 	 * We leave that decision up to xfs_iflush() since it has the
3928 	 * knowledge of whether it's OK to simply do a delwri flush of
3929 	 * the inode or whether we need to wait until the inode is
3930 	 * pulled from the AIL.
3931 	 * We get the flush lock regardless, though, just to make sure
3932 	 * we don't free it while it is being flushed.
3933 	 */
3934 	if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
3935 		if (!locked) {
3936 			xfs_ilock(ip, XFS_ILOCK_EXCL);
3937 			xfs_iflock(ip);
3938 		}
3939 
3940 		if (ip->i_update_core ||
3941 		    ((ip->i_itemp != NULL) &&
3942 		     (ip->i_itemp->ili_format.ilf_fields != 0))) {
3943 			error = xfs_iflush(ip, sync_mode);
3944 			/*
3945 			 * If we hit an error, typically because of filesystem
3946 			 * shutdown, we don't need to let vn_reclaim to know
3947 			 * because we're gonna reclaim the inode anyway.
3948 			 */
3949 			if (error) {
3950 				xfs_iunlock(ip, XFS_ILOCK_EXCL);
3951 				goto reclaim;
3952 			}
3953 			xfs_iflock(ip); /* synchronize with xfs_iflush_done */
3954 		}
3955 
3956 		ASSERT(ip->i_update_core == 0);
3957 		ASSERT(ip->i_itemp == NULL ||
3958 		       ip->i_itemp->ili_format.ilf_fields == 0);
3959 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
3960 	} else if (locked) {
3961 		/*
3962 		 * We are not interested in doing an iflush if we're
3963 		 * in the process of shutting down the filesystem forcibly.
3964 		 * So, just reclaim the inode.
3965 		 */
3966 		xfs_ifunlock(ip);
3967 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
3968 	}
3969 
3970  reclaim:
3971 	xfs_ireclaim(ip);
3972 	return 0;
3973 }
3974 
3975 int
xfs_finish_reclaim_all(xfs_mount_t * mp,int noblock)3976 xfs_finish_reclaim_all(xfs_mount_t *mp, int noblock)
3977 {
3978 	int		purged;
3979 	xfs_inode_t	*ip, *n;
3980 	int		done = 0;
3981 
3982 	while (!done) {
3983 		purged = 0;
3984 		XFS_MOUNT_ILOCK(mp);
3985 		list_for_each_entry_safe(ip, n, &mp->m_del_inodes, i_reclaim) {
3986 			if (noblock) {
3987 				if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0)
3988 					continue;
3989 				if (xfs_ipincount(ip) ||
3990 				    !xfs_iflock_nowait(ip)) {
3991 					xfs_iunlock(ip, XFS_ILOCK_EXCL);
3992 					continue;
3993 				}
3994 			}
3995 			XFS_MOUNT_IUNLOCK(mp);
3996 			xfs_finish_reclaim(ip, noblock,
3997 				XFS_IFLUSH_DELWRI_ELSE_ASYNC);
3998 			purged = 1;
3999 			break;
4000 		}
4001 
4002 		done = !purged;
4003 	}
4004 
4005 	XFS_MOUNT_IUNLOCK(mp);
4006 	return 0;
4007 }
4008 
4009 /*
4010  * xfs_alloc_file_space()
4011  *      This routine allocates disk space for the given file.
4012  *
4013  *	If alloc_type == 0, this request is for an ALLOCSP type
4014  *	request which will change the file size.  In this case, no
4015  *	DMAPI event will be generated by the call.  A TRUNCATE event
4016  *	will be generated later by xfs_setattr.
4017  *
4018  *	If alloc_type != 0, this request is for a RESVSP type
4019  *	request, and a DMAPI DM_EVENT_WRITE will be generated if the
4020  *	lower block boundary byte address is less than the file's
4021  *	length.
4022  *
4023  * RETURNS:
4024  *       0 on success
4025  *      errno on error
4026  *
4027  */
4028 int
xfs_alloc_file_space(xfs_inode_t * ip,xfs_off_t offset,xfs_off_t len,int alloc_type,int attr_flags)4029 xfs_alloc_file_space(
4030 	xfs_inode_t		*ip,
4031 	xfs_off_t		offset,
4032 	xfs_off_t		len,
4033 	int			alloc_type,
4034 	int			attr_flags)
4035 {
4036 	xfs_filblks_t		allocated_fsb;
4037 	xfs_filblks_t		allocatesize_fsb;
4038 	int			committed;
4039 	xfs_off_t		count;
4040 	xfs_filblks_t		datablocks;
4041 	int			error;
4042 	xfs_fsblock_t		firstfsb;
4043 	xfs_bmap_free_t		free_list;
4044 	xfs_bmbt_irec_t		*imapp;
4045 	xfs_bmbt_irec_t		imaps[1];
4046 	xfs_mount_t		*mp;
4047 	int			numrtextents;
4048 	int			reccount;
4049 	uint			resblks;
4050 	int			rt;
4051 	int			rtextsize;
4052 	xfs_fileoff_t		startoffset_fsb;
4053 	xfs_trans_t		*tp;
4054 	int			xfs_bmapi_flags;
4055 
4056 	vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
4057 	mp = ip->i_mount;
4058 
4059 	if (XFS_FORCED_SHUTDOWN(mp))
4060 		return XFS_ERROR(EIO);
4061 
4062 	/*
4063 	 * determine if this is a realtime file
4064 	 */
4065 	if ((rt = XFS_IS_REALTIME_INODE(ip)) != 0) {
4066 		if (ip->i_d.di_extsize)
4067 			rtextsize = ip->i_d.di_extsize;
4068 		else
4069 			rtextsize = mp->m_sb.sb_rextsize;
4070 	} else
4071 		rtextsize = 0;
4072 
4073 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
4074 		return error;
4075 
4076 	if (len <= 0)
4077 		return XFS_ERROR(EINVAL);
4078 
4079 	count = len;
4080 	error = 0;
4081 	imapp = &imaps[0];
4082 	reccount = 1;
4083 	xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
4084 	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
4085 	allocatesize_fsb = XFS_B_TO_FSB(mp, count);
4086 
4087 	/*	Generate a DMAPI event if needed.	*/
4088 	if (alloc_type != 0 && offset < ip->i_d.di_size &&
4089 			(attr_flags&ATTR_DMI) == 0  &&
4090 			DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) {
4091 		xfs_off_t           end_dmi_offset;
4092 
4093 		end_dmi_offset = offset+len;
4094 		if (end_dmi_offset > ip->i_d.di_size)
4095 			end_dmi_offset = ip->i_d.di_size;
4096 		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip),
4097 			offset, end_dmi_offset - offset,
4098 			0, NULL);
4099 		if (error)
4100 			return(error);
4101 	}
4102 
4103 	/*
4104 	 * allocate file space until done or until there is an error
4105 	 */
4106 retry:
4107 	while (allocatesize_fsb && !error) {
4108 		/*
4109 		 * determine if reserving space on
4110 		 * the data or realtime partition.
4111 		 */
4112 		if (rt) {
4113 			xfs_fileoff_t s, e;
4114 
4115 			s = startoffset_fsb;
4116 			do_div(s, rtextsize);
4117 			s *= rtextsize;
4118 			e = roundup_64(startoffset_fsb + allocatesize_fsb,
4119 				rtextsize);
4120 			numrtextents = (int)(e - s) / mp->m_sb.sb_rextsize;
4121 			datablocks = 0;
4122 		} else {
4123 			datablocks = allocatesize_fsb;
4124 			numrtextents = 0;
4125 		}
4126 
4127 		/*
4128 		 * allocate and setup the transaction
4129 		 */
4130 		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
4131 		resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
4132 		error = xfs_trans_reserve(tp,
4133 					  resblks,
4134 					  XFS_WRITE_LOG_RES(mp),
4135 					  numrtextents,
4136 					  XFS_TRANS_PERM_LOG_RES,
4137 					  XFS_WRITE_LOG_COUNT);
4138 
4139 		/*
4140 		 * check for running out of space
4141 		 */
4142 		if (error) {
4143 			/*
4144 			 * Free the transaction structure.
4145 			 */
4146 			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
4147 			xfs_trans_cancel(tp, 0);
4148 			break;
4149 		}
4150 		xfs_ilock(ip, XFS_ILOCK_EXCL);
4151 		error = XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp,
4152 				ip->i_udquot, ip->i_gdquot, resblks, 0, rt ?
4153 				XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4154 		if (error)
4155 			goto error1;
4156 
4157 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
4158 		xfs_trans_ihold(tp, ip);
4159 
4160 		/*
4161 		 * issue the bmapi() call to allocate the blocks
4162 		 */
4163 		XFS_BMAP_INIT(&free_list, &firstfsb);
4164 		error = xfs_bmapi(tp, ip, startoffset_fsb,
4165 				  allocatesize_fsb, xfs_bmapi_flags,
4166 				  &firstfsb, 0, imapp, &reccount,
4167 				  &free_list);
4168 		if (error) {
4169 			goto error0;
4170 		}
4171 
4172 		/*
4173 		 * complete the transaction
4174 		 */
4175 		error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
4176 		if (error) {
4177 			goto error0;
4178 		}
4179 
4180 		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
4181 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
4182 		if (error) {
4183 			break;
4184 		}
4185 
4186 		allocated_fsb = imapp->br_blockcount;
4187 
4188 		if (reccount == 0) {
4189 			error = XFS_ERROR(ENOSPC);
4190 			break;
4191 		}
4192 
4193 		startoffset_fsb += allocated_fsb;
4194 		allocatesize_fsb -= allocated_fsb;
4195 	}
4196 dmapi_enospc_check:
4197 	if (error == ENOSPC && (attr_flags&ATTR_DMI) == 0 &&
4198 	    DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_NOSPACE)) {
4199 
4200 		error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
4201 				XFS_ITOV(ip), DM_RIGHT_NULL,
4202 				XFS_ITOV(ip), DM_RIGHT_NULL,
4203 				NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */
4204 		if (error == 0)
4205 			goto retry;	/* Maybe DMAPI app. has made space */
4206 		/* else fall through with error from XFS_SEND_DATA */
4207 	}
4208 
4209 	return error;
4210 
4211  error0:
4212 	xfs_bmap_cancel(&free_list);
4213  error1:
4214 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
4215 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4216 	goto dmapi_enospc_check;
4217 }
4218 
4219 /*
4220  * Zero file bytes between startoff and endoff inclusive.
4221  * The iolock is held exclusive and no blocks are buffered.
4222  */
4223 STATIC int
xfs_zero_remaining_bytes(xfs_inode_t * ip,xfs_off_t startoff,xfs_off_t endoff)4224 xfs_zero_remaining_bytes(
4225 	xfs_inode_t		*ip,
4226 	xfs_off_t		startoff,
4227 	xfs_off_t		endoff)
4228 {
4229 	xfs_bmbt_irec_t		imap;
4230 	xfs_fileoff_t		offset_fsb;
4231 	xfs_off_t		lastoffset;
4232 	xfs_off_t		offset;
4233 	xfs_buf_t		*bp;
4234 	xfs_mount_t		*mp = ip->i_mount;
4235 	int			nimap;
4236 	int			error = 0;
4237 
4238 	bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
4239 				ip->i_d.di_flags & XFS_DIFLAG_REALTIME ?
4240 				mp->m_rtdev_targp : mp->m_ddev_targp);
4241 
4242 	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
4243 		offset_fsb = XFS_B_TO_FSBT(mp, offset);
4244 		nimap = 1;
4245 		error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0, NULL, 0, &imap,
4246 			&nimap, NULL);
4247 		if (error || nimap < 1)
4248 			break;
4249 		ASSERT(imap.br_blockcount >= 1);
4250 		ASSERT(imap.br_startoff == offset_fsb);
4251 		lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
4252 		if (lastoffset > endoff)
4253 			lastoffset = endoff;
4254 		if (imap.br_startblock == HOLESTARTBLOCK)
4255 			continue;
4256 		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
4257 		if (imap.br_state == XFS_EXT_UNWRITTEN)
4258 			continue;
4259 		XFS_BUF_UNDONE(bp);
4260 		XFS_BUF_UNWRITE(bp);
4261 		XFS_BUF_READ(bp);
4262 		XFS_BUF_SET_ADDR(bp, XFS_FSB_TO_DB(ip, imap.br_startblock));
4263 		xfsbdstrat(mp, bp);
4264 		if ((error = xfs_iowait(bp))) {
4265 			xfs_ioerror_alert("xfs_zero_remaining_bytes(read)",
4266 					  mp, bp, XFS_BUF_ADDR(bp));
4267 			break;
4268 		}
4269 		memset(XFS_BUF_PTR(bp) +
4270 			(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
4271 		      0, lastoffset - offset + 1);
4272 		XFS_BUF_UNDONE(bp);
4273 		XFS_BUF_UNREAD(bp);
4274 		XFS_BUF_WRITE(bp);
4275 		xfsbdstrat(mp, bp);
4276 		if ((error = xfs_iowait(bp))) {
4277 			xfs_ioerror_alert("xfs_zero_remaining_bytes(write)",
4278 					  mp, bp, XFS_BUF_ADDR(bp));
4279 			break;
4280 		}
4281 	}
4282 	xfs_buf_free(bp);
4283 	return error;
4284 }
4285 
4286 /*
4287  * xfs_free_file_space()
4288  *      This routine frees disk space for the given file.
4289  *
4290  *	This routine is only called by xfs_change_file_space
4291  *	for an UNRESVSP type call.
4292  *
4293  * RETURNS:
4294  *       0 on success
4295  *      errno on error
4296  *
4297  */
4298 STATIC int
xfs_free_file_space(xfs_inode_t * ip,xfs_off_t offset,xfs_off_t len,int attr_flags)4299 xfs_free_file_space(
4300 	xfs_inode_t		*ip,
4301 	xfs_off_t		offset,
4302 	xfs_off_t		len,
4303 	int			attr_flags)
4304 {
4305 	int			committed;
4306 	int			done;
4307 	xfs_off_t		end_dmi_offset;
4308 	xfs_fileoff_t		endoffset_fsb;
4309 	int			error;
4310 	xfs_fsblock_t		firstfsb;
4311 	xfs_bmap_free_t		free_list;
4312 	xfs_off_t		ilen;
4313 	xfs_bmbt_irec_t		imap;
4314 	xfs_off_t		ioffset;
4315 	xfs_extlen_t		mod=0;
4316 	xfs_mount_t		*mp;
4317 	int			nimap;
4318 	uint			resblks;
4319 	int			rounding;
4320 	int			rt;
4321 	xfs_fileoff_t		startoffset_fsb;
4322 	xfs_trans_t		*tp;
4323 	int			need_iolock = (attr_flags & ATTR_DMI) == 0;
4324 
4325 	vn_trace_entry(XFS_ITOV(ip), __FUNCTION__, (inst_t *)__return_address);
4326 	mp = ip->i_mount;
4327 
4328 	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
4329 		return error;
4330 
4331 	error = 0;
4332 	if (len <= 0)	/* if nothing being freed */
4333 		return error;
4334 	rt = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME);
4335 	startoffset_fsb	= XFS_B_TO_FSB(mp, offset);
4336 	end_dmi_offset = offset + len;
4337 	endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
4338 
4339 	if (offset < ip->i_d.di_size &&
4340 	    (attr_flags & ATTR_DMI) == 0 &&
4341 	    DM_EVENT_ENABLED(XFS_MTOVFS(mp), ip, DM_EVENT_WRITE)) {
4342 		if (end_dmi_offset > ip->i_d.di_size)
4343 			end_dmi_offset = ip->i_d.di_size;
4344 		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, XFS_ITOV(ip),
4345 				offset, end_dmi_offset - offset,
4346 				AT_DELAY_FLAG(attr_flags), NULL);
4347 		if (error)
4348 			return(error);
4349 	}
4350 
4351 	if (need_iolock)
4352 		xfs_ilock(ip, XFS_IOLOCK_EXCL);
4353 	rounding = MAX((__uint8_t)(1 << mp->m_sb.sb_blocklog),
4354 			(__uint8_t)NBPP);
4355 	ilen = len + (offset & (rounding - 1));
4356 	ioffset = offset & ~(rounding - 1);
4357 	if (ilen & (rounding - 1))
4358 		ilen = (ilen + rounding) & ~(rounding - 1);
4359 	xfs_inval_cached_pages(XFS_ITOV(ip), &(ip->i_iocore), ioffset, 0, 0);
4360 	/*
4361 	 * Need to zero the stuff we're not freeing, on disk.
4362 	 * If its a realtime file & can't use unwritten extents then we
4363 	 * actually need to zero the extent edges.  Otherwise xfs_bunmapi
4364 	 * will take care of it for us.
4365 	 */
4366 	if (rt && !XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb)) {
4367 		nimap = 1;
4368 		error = xfs_bmapi(NULL, ip, startoffset_fsb, 1, 0, NULL, 0,
4369 			&imap, &nimap, NULL);
4370 		if (error)
4371 			goto out_unlock_iolock;
4372 		ASSERT(nimap == 0 || nimap == 1);
4373 		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
4374 			xfs_daddr_t	block;
4375 
4376 			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
4377 			block = imap.br_startblock;
4378 			mod = do_div(block, mp->m_sb.sb_rextsize);
4379 			if (mod)
4380 				startoffset_fsb += mp->m_sb.sb_rextsize - mod;
4381 		}
4382 		nimap = 1;
4383 		error = xfs_bmapi(NULL, ip, endoffset_fsb - 1, 1, 0, NULL, 0,
4384 			&imap, &nimap, NULL);
4385 		if (error)
4386 			goto out_unlock_iolock;
4387 		ASSERT(nimap == 0 || nimap == 1);
4388 		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
4389 			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
4390 			mod++;
4391 			if (mod && (mod != mp->m_sb.sb_rextsize))
4392 				endoffset_fsb -= mod;
4393 		}
4394 	}
4395 	if ((done = (endoffset_fsb <= startoffset_fsb)))
4396 		/*
4397 		 * One contiguous piece to clear
4398 		 */
4399 		error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
4400 	else {
4401 		/*
4402 		 * Some full blocks, possibly two pieces to clear
4403 		 */
4404 		if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
4405 			error = xfs_zero_remaining_bytes(ip, offset,
4406 				XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
4407 		if (!error &&
4408 		    XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
4409 			error = xfs_zero_remaining_bytes(ip,
4410 				XFS_FSB_TO_B(mp, endoffset_fsb),
4411 				offset + len - 1);
4412 	}
4413 
4414 	/*
4415 	 * free file space until done or until there is an error
4416 	 */
4417 	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
4418 	while (!error && !done) {
4419 
4420 		/*
4421 		 * allocate and setup the transaction
4422 		 */
4423 		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
4424 		error = xfs_trans_reserve(tp,
4425 					  resblks,
4426 					  XFS_WRITE_LOG_RES(mp),
4427 					  0,
4428 					  XFS_TRANS_PERM_LOG_RES,
4429 					  XFS_WRITE_LOG_COUNT);
4430 
4431 		/*
4432 		 * check for running out of space
4433 		 */
4434 		if (error) {
4435 			/*
4436 			 * Free the transaction structure.
4437 			 */
4438 			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
4439 			xfs_trans_cancel(tp, 0);
4440 			break;
4441 		}
4442 		xfs_ilock(ip, XFS_ILOCK_EXCL);
4443 		error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
4444 				ip->i_udquot, ip->i_gdquot, resblks, 0, rt ?
4445 				XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4446 		if (error)
4447 			goto error1;
4448 
4449 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
4450 		xfs_trans_ihold(tp, ip);
4451 
4452 		/*
4453 		 * issue the bunmapi() call to free the blocks
4454 		 */
4455 		XFS_BMAP_INIT(&free_list, &firstfsb);
4456 		error = xfs_bunmapi(tp, ip, startoffset_fsb,
4457 				  endoffset_fsb - startoffset_fsb,
4458 				  0, 2, &firstfsb, &free_list, &done);
4459 		if (error) {
4460 			goto error0;
4461 		}
4462 
4463 		/*
4464 		 * complete the transaction
4465 		 */
4466 		error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
4467 		if (error) {
4468 			goto error0;
4469 		}
4470 
4471 		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
4472 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
4473 	}
4474 
4475  out_unlock_iolock:
4476 	if (need_iolock)
4477 		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
4478 	return error;
4479 
4480  error0:
4481 	xfs_bmap_cancel(&free_list);
4482  error1:
4483 	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
4484 	xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) :
4485 		    XFS_ILOCK_EXCL);
4486 	return error;
4487 }
4488 
4489 /*
4490  * xfs_change_file_space()
4491  *      This routine allocates or frees disk space for the given file.
4492  *      The user specified parameters are checked for alignment and size
4493  *      limitations.
4494  *
4495  * RETURNS:
4496  *       0 on success
4497  *      errno on error
4498  *
4499  */
4500 int
xfs_change_file_space(bhv_desc_t * bdp,int cmd,xfs_flock64_t * bf,xfs_off_t offset,cred_t * credp,int attr_flags)4501 xfs_change_file_space(
4502 	bhv_desc_t	*bdp,
4503 	int		cmd,
4504 	xfs_flock64_t	*bf,
4505 	xfs_off_t	offset,
4506 	cred_t		*credp,
4507 	int		attr_flags)
4508 {
4509 	int		clrprealloc;
4510 	int		error;
4511 	xfs_fsize_t	fsize;
4512 	xfs_inode_t	*ip;
4513 	xfs_mount_t	*mp;
4514 	int		setprealloc;
4515 	xfs_off_t	startoffset;
4516 	xfs_off_t	llen;
4517 	xfs_trans_t	*tp;
4518 	vattr_t		va;
4519 	vnode_t		*vp;
4520 
4521 	vp = BHV_TO_VNODE(bdp);
4522 	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
4523 
4524 	ip = XFS_BHVTOI(bdp);
4525 	mp = ip->i_mount;
4526 
4527 	/*
4528 	 * must be a regular file and have write permission
4529 	 */
4530 	if (vp->v_type != VREG)
4531 		return XFS_ERROR(EINVAL);
4532 
4533 	xfs_ilock(ip, XFS_ILOCK_SHARED);
4534 
4535 	if ((error = xfs_iaccess(ip, S_IWUSR, credp))) {
4536 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
4537 		return error;
4538 	}
4539 
4540 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
4541 
4542 	switch (bf->l_whence) {
4543 	case 0: /*SEEK_SET*/
4544 		break;
4545 	case 1: /*SEEK_CUR*/
4546 		bf->l_start += offset;
4547 		break;
4548 	case 2: /*SEEK_END*/
4549 		bf->l_start += ip->i_d.di_size;
4550 		break;
4551 	default:
4552 		return XFS_ERROR(EINVAL);
4553 	}
4554 
4555 	llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len;
4556 
4557 	if (   (bf->l_start < 0)
4558 	    || (bf->l_start > XFS_MAXIOFFSET(mp))
4559 	    || (bf->l_start + llen < 0)
4560 	    || (bf->l_start + llen > XFS_MAXIOFFSET(mp)))
4561 		return XFS_ERROR(EINVAL);
4562 
4563 	bf->l_whence = 0;
4564 
4565 	startoffset = bf->l_start;
4566 	fsize = ip->i_d.di_size;
4567 
4568 	/*
4569 	 * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve
4570 	 * file space.
4571 	 * These calls do NOT zero the data space allocated to the file,
4572 	 * nor do they change the file size.
4573 	 *
4574 	 * XFS_IOC_ALLOCSP and XFS_IOC_FREESP will allocate and free file
4575 	 * space.
4576 	 * These calls cause the new file data to be zeroed and the file
4577 	 * size to be changed.
4578 	 */
4579 	setprealloc = clrprealloc = 0;
4580 
4581 	switch (cmd) {
4582 	case XFS_IOC_RESVSP:
4583 	case XFS_IOC_RESVSP64:
4584 		error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
4585 								1, attr_flags);
4586 		if (error)
4587 			return error;
4588 		setprealloc = 1;
4589 		break;
4590 
4591 	case XFS_IOC_UNRESVSP:
4592 	case XFS_IOC_UNRESVSP64:
4593 		if ((error = xfs_free_file_space(ip, startoffset, bf->l_len,
4594 								attr_flags)))
4595 			return error;
4596 		break;
4597 
4598 	case XFS_IOC_ALLOCSP:
4599 	case XFS_IOC_ALLOCSP64:
4600 	case XFS_IOC_FREESP:
4601 	case XFS_IOC_FREESP64:
4602 		if (startoffset > fsize) {
4603 			error = xfs_alloc_file_space(ip, fsize,
4604 					startoffset - fsize, 0, attr_flags);
4605 			if (error)
4606 				break;
4607 		}
4608 
4609 		va.va_mask = XFS_AT_SIZE;
4610 		va.va_size = startoffset;
4611 
4612 		error = xfs_setattr(bdp, &va, attr_flags, credp);
4613 
4614 		if (error)
4615 			return error;
4616 
4617 		clrprealloc = 1;
4618 		break;
4619 
4620 	default:
4621 		ASSERT(0);
4622 		return XFS_ERROR(EINVAL);
4623 	}
4624 
4625 	/*
4626 	 * update the inode timestamp, mode, and prealloc flag bits
4627 	 */
4628 	tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
4629 
4630 	if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp),
4631 				      0, 0, 0))) {
4632 		/* ASSERT(0); */
4633 		xfs_trans_cancel(tp, 0);
4634 		return error;
4635 	}
4636 
4637 	xfs_ilock(ip, XFS_ILOCK_EXCL);
4638 
4639 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
4640 	xfs_trans_ihold(tp, ip);
4641 
4642 	if ((attr_flags & ATTR_DMI) == 0) {
4643 		ip->i_d.di_mode &= ~S_ISUID;
4644 
4645 		/*
4646 		 * Note that we don't have to worry about mandatory
4647 		 * file locking being disabled here because we only
4648 		 * clear the S_ISGID bit if the Group execute bit is
4649 		 * on, but if it was on then mandatory locking wouldn't
4650 		 * have been enabled.
4651 		 */
4652 		if (ip->i_d.di_mode & S_IXGRP)
4653 			ip->i_d.di_mode &= ~S_ISGID;
4654 
4655 		xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
4656 	}
4657 	if (setprealloc)
4658 		ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
4659 	else if (clrprealloc)
4660 		ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
4661 
4662 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
4663 	xfs_trans_set_sync(tp);
4664 
4665 	error = xfs_trans_commit(tp, 0, NULL);
4666 
4667 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
4668 
4669 	return error;
4670 }
4671 
4672 vnodeops_t xfs_vnodeops = {
4673 	BHV_IDENTITY_INIT(VN_BHV_XFS,VNODE_POSITION_XFS),
4674 	.vop_open		= xfs_open,
4675 	.vop_read		= xfs_read,
4676 #ifdef HAVE_SENDFILE
4677 	.vop_sendfile		= xfs_sendfile,
4678 #endif
4679 	.vop_write		= xfs_write,
4680 	.vop_ioctl		= xfs_ioctl,
4681 	.vop_getattr		= xfs_getattr,
4682 	.vop_setattr		= xfs_setattr,
4683 	.vop_access		= xfs_access,
4684 	.vop_lookup		= xfs_lookup,
4685 	.vop_create		= xfs_create,
4686 	.vop_remove		= xfs_remove,
4687 	.vop_link		= xfs_link,
4688 	.vop_rename		= xfs_rename,
4689 	.vop_mkdir		= xfs_mkdir,
4690 	.vop_rmdir		= xfs_rmdir,
4691 	.vop_readdir		= xfs_readdir,
4692 	.vop_symlink		= xfs_symlink,
4693 	.vop_readlink		= xfs_readlink,
4694 	.vop_fsync		= xfs_fsync,
4695 	.vop_inactive		= xfs_inactive,
4696 	.vop_fid2		= xfs_fid2,
4697 	.vop_rwlock		= xfs_rwlock,
4698 	.vop_rwunlock		= xfs_rwunlock,
4699 	.vop_bmap		= xfs_bmap,
4700 	.vop_reclaim		= xfs_reclaim,
4701 	.vop_attr_get		= xfs_attr_get,
4702 	.vop_attr_set		= xfs_attr_set,
4703 	.vop_attr_remove	= xfs_attr_remove,
4704 	.vop_attr_list		= xfs_attr_list,
4705 	.vop_link_removed	= (vop_link_removed_t)fs_noval,
4706 	.vop_vnode_change	= (vop_vnode_change_t)fs_noval,
4707 	.vop_tosspages		= fs_tosspages,
4708 	.vop_flushinval_pages	= fs_flushinval_pages,
4709 	.vop_flush_pages	= fs_flush_pages,
4710 	.vop_release		= xfs_release,
4711 	.vop_iflush		= xfs_inode_flush,
4712 };
4713