Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 15:20:36 -0700
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/xfs/xfs_vnodeops.c
1 files changed, 4712 insertions, 0 deletions
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
new file mode 100644
index 00000000000..70092963ca9
--- /dev/null
+++ b/fs/xfs/xfs_vnodeops.c
@@ -0,0 +1,4712 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_macros.h"
+#include "xfs_types.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_itable.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_alloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode_item.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr.h"
+#include "xfs_rw.h"
+#include "xfs_refcache.h"
+#include "xfs_error.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_quota.h"
+#include "xfs_utils.h"
+#include "xfs_trans_space.h"
+#include "xfs_dir_leaf.h"
+#include "xfs_mac.h"
+#include "xfs_log_priv.h"
+
+
+/*
+ * The maximum pathlen is 1024 bytes. Since the minimum file system
+ * blocksize is 512 bytes, we can get a max of 2 extents back from
+ * bmapi.
+ */
+#define SYMLINK_MAPS 2
+
+/*
+ * For xfs, we check that the file isn't too big to be opened by this kernel.
+ * No other open action is required for regular files.  Devices are handled
+ * through the specfs file system, pipes through fifofs.  Device and
+ * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively,
+ * when a new vnode is first looked up or created.
+ */
+STATIC int
+xfs_open(
+	bhv_desc_t	*bdp,
+	cred_t		*credp)
+{
+	int		mode;
+	vnode_t		*vp;
+	xfs_inode_t	*ip;
+
+	vp = BHV_TO_VNODE(bdp);
+	ip = XFS_BHVTOI(bdp);
+
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		return XFS_ERROR(EIO);
+
+	/*
+	 * If it's a directory with any blocks, read-ahead block 0
+	 * as we're almost certain to have the next operation be a read there.
+	 */
+	if (vp->v_type == VDIR && ip->i_d.di_nextents > 0) {
+		mode = xfs_ilock_map_shared(ip);
+		if (ip->i_d.di_nextents > 0)
+			(void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
+		xfs_iunlock(ip, mode);
+	}
+	return 0;
+}
+
+
+/*
+ * xfs_getattr
+ */
+STATIC int
+xfs_getattr(
+	bhv_desc_t	*bdp,
+	vattr_t		*vap,
+	int		flags,
+	cred_t		*credp)
+{
+	xfs_inode_t	*ip;
+	xfs_mount_t	*mp;
+	vnode_t		*vp;
+
+	vp  = BHV_TO_VNODE(bdp);
+	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+
+	ip = XFS_BHVTOI(bdp);
+	mp = ip->i_mount;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	if (!(flags & ATTR_LAZY))
+		xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+	vap->va_size = ip->i_d.di_size;
+	if (vap->va_mask == XFS_AT_SIZE)
+		goto all_done;
+
+	vap->va_nblocks =
+		XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+	vap->va_nodeid = ip->i_ino;
+#if XFS_BIG_INUMS
+	vap->va_nodeid += mp->m_inoadd;
+#endif
+	vap->va_nlink = ip->i_d.di_nlink;
+
+	/*
+	 * Quick exit for non-stat callers
+	 */
+	if ((vap->va_mask &
+	    ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID|
+	      XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0)
+		goto all_done;
+
+	/*
+	 * Copy from in-core inode.
+	 */
+	vap->va_type = vp->v_type;
+	vap->va_mode = ip->i_d.di_mode & MODEMASK;
+	vap->va_uid = ip->i_d.di_uid;
+	vap->va_gid = ip->i_d.di_gid;
+	vap->va_projid = ip->i_d.di_projid;
+
+	/*
+	 * Check vnode type block/char vs. everything else.
+	 * Do it with bitmask because that's faster than looking
+	 * for multiple values individually.
+	 */
+	if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
+		vap->va_rdev = 0;
+
+		if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
+
+#if 0
+			/* Large block sizes confuse various
+			 * user space programs, so letting the
+			 * stripe size through is not a good
+			 * idea for now.
+			 */
+			vap->va_blocksize = mp->m_swidth ?
+				/*
+				 * If the underlying volume is a stripe, then
+				 * return the stripe width in bytes as the
+				 * recommended I/O size.
+				 */
+				(mp->m_swidth << mp->m_sb.sb_blocklog) :
+				/*
+				 * Return the largest of the preferred buffer
+				 * sizes since doing small I/Os into larger
+				 * buffers causes buffers to be decommissioned.
+				 * The value returned is in bytes.
+				 */
+				(1 << (int)MAX(mp->m_readio_log,
+					       mp->m_writeio_log));
+
+#else
+			vap->va_blocksize =
+				/*
+				 * Return the largest of the preferred buffer
+				 * sizes since doing small I/Os into larger
+				 * buffers causes buffers to be decommissioned.
+				 * The value returned is in bytes.
+				 */
+				1 << (int)MAX(mp->m_readio_log,
+					       mp->m_writeio_log);
+#endif
+		} else {
+
+			/*
+			 * If the file blocks are being allocated from a
+			 * realtime partition, then return the inode's
+			 * realtime extent size or the realtime volume's
+			 * extent size.
+			 */
+			vap->va_blocksize = ip->i_d.di_extsize ?
+				(ip->i_d.di_extsize << mp->m_sb.sb_blocklog) :
+				(mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
+		}
+	} else {
+		vap->va_rdev = ip->i_df.if_u2.if_rdev;
+		vap->va_blocksize = BLKDEV_IOSIZE;
+	}
+
+	vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec;
+	vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
+	vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
+	vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
+	vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
+	vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
+
+	/*
+	 * Exit for stat callers.  See if any of the rest of the fields
+	 * to be filled in are needed.
+	 */
+	if ((vap->va_mask &
+	     (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
+	      XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
+		goto all_done;
+
+	/*
+	 * Convert di_flags to xflags.
+	 */
+	vap->va_xflags = xfs_ip2xflags(ip);
+
+	/*
+	 * Exit for inode revalidate.  See if any of the rest of
+	 * the fields to be filled in are needed.
+	 */
+	if ((vap->va_mask &
+	     (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
+	      XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
+		goto all_done;
+
+	vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog;
+	vap->va_nextents =
+		(ip->i_df.if_flags & XFS_IFEXTENTS) ?
+			ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) :
+			ip->i_d.di_nextents;
+	if (ip->i_afp)
+		vap->va_anextents =
+			(ip->i_afp->if_flags & XFS_IFEXTENTS) ?
+				ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) :
+				 ip->i_d.di_anextents;
+	else
+		vap->va_anextents = 0;
+	vap->va_gen = ip->i_d.di_gen;
+
+ all_done:
+	if (!(flags & ATTR_LAZY))
+		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	return 0;
+}
+
+
+/*
+ * xfs_setattr
+ */
+int
+xfs_setattr(
+	bhv_desc_t		*bdp,
+	vattr_t			*vap,
+	int			flags,
+	cred_t			*credp)
+{
+	xfs_inode_t		*ip;
+	xfs_trans_t		*tp;
+	xfs_mount_t		*mp;
+	int			mask;
+	int			code;
+	uint			lock_flags;
+	uint			commit_flags=0;
+	uid_t			uid=0, iuid=0;
+	gid_t			gid=0, igid=0;
+	int			timeflags = 0;
+	vnode_t			*vp;
+	xfs_prid_t		projid=0, iprojid=0;
+	int			mandlock_before, mandlock_after;
+	struct xfs_dquot	*udqp, *gdqp, *olddquot1, *olddquot2;
+	int			file_owner;
+	int			need_iolock = (flags & ATTR_DMI) == 0;
+
+	vp = BHV_TO_VNODE(bdp);
+	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+
+	if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
+		return XFS_ERROR(EROFS);
+
+	/*
+	 * Cannot set certain attributes.
+	 */
+	mask = vap->va_mask;
+	if (mask & XFS_AT_NOSET) {
+		return XFS_ERROR(EINVAL);
+	}
+
+	ip = XFS_BHVTOI(bdp);
+	mp = ip->i_mount;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	/*
+	 * Timestamps do not need to be logged and hence do not
+	 * need to be done within a transaction.
+	 */
+	if (mask & XFS_AT_UPDTIMES) {
+		ASSERT((mask & ~XFS_AT_UPDTIMES) == 0);
+		timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) |
+			    ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) |
+			    ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0);
+		xfs_ichgtime(ip, timeflags);
+		return 0;
+	}
+
+	olddquot1 = olddquot2 = NULL;
+	udqp = gdqp = NULL;
+
+	/*
+	 * If disk quotas is on, we make sure that the dquots do exist on disk,
+	 * before we start any other transactions. Trying to do this later
+	 * is messy. We don't care to take a readlock to look at the ids
+	 * in inode here, because we can't hold it across the trans_reserve.
+	 * If the IDs do change before we take the ilock, we're covered
+	 * because the i_*dquot fields will get updated anyway.
+	 */
+	if (XFS_IS_QUOTA_ON(mp) && (mask & (XFS_AT_UID|XFS_AT_GID))) {
+		uint	qflags = 0;
+
+		if (mask & XFS_AT_UID) {
+			uid = vap->va_uid;
+			qflags |= XFS_QMOPT_UQUOTA;
+		} else {
+			uid = ip->i_d.di_uid;
+		}
+		if (mask & XFS_AT_GID) {
+			gid = vap->va_gid;
+			qflags |= XFS_QMOPT_GQUOTA;
+		}  else {
+			gid = ip->i_d.di_gid;
+		}
+		/*
+		 * We take a reference when we initialize udqp and gdqp,
+		 * so it is important that we never blindly double trip on
+		 * the same variable. See xfs_create() for an example.
+		 */
+		ASSERT(udqp == NULL);
+		ASSERT(gdqp == NULL);
+		code = XFS_QM_DQVOPALLOC(mp, ip, uid,gid, qflags, &udqp, &gdqp);
+		if (code)
+			return (code);
+	}
+
+	/*
+	 * For the other attributes, we acquire the inode lock and
+	 * first do an error checking pass.
+	 */
+	tp = NULL;
+	lock_flags = XFS_ILOCK_EXCL;
+	if (!(mask & XFS_AT_SIZE)) {
+		if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) ||
+		    (mp->m_flags & XFS_MOUNT_WSYNC)) {
+			tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+			commit_flags = 0;
+			if ((code = xfs_trans_reserve(tp, 0,
+						     XFS_ICHANGE_LOG_RES(mp), 0,
+						     0, 0))) {
+				lock_flags = 0;
+				goto error_return;
+			}
+		}
+	} else {
+		if (DM_EVENT_ENABLED (vp->v_vfsp, ip, DM_EVENT_TRUNCATE) &&
+		    !(flags & ATTR_DMI)) {
+			int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
+			code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp,
+				vap->va_size, 0, dmflags, NULL);
+			if (code) {
+				lock_flags = 0;
+				goto error_return;
+			}
+		}
+		if (need_iolock)
+			lock_flags |= XFS_IOLOCK_EXCL;
+	}
+
+	xfs_ilock(ip, lock_flags);
+
+	/* boolean: are we the file owner? */
+	file_owner = (current_fsuid(credp) == ip->i_d.di_uid);
+
+	/*
+	 * Change various properties of a file.
+	 * Only the owner or users with CAP_FOWNER
+	 * capability may do these things.
+	 */
+	if (mask &
+	    (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID|
+	     XFS_AT_GID|XFS_AT_PROJID)) {
+		/*
+		 * CAP_FOWNER overrides the following restrictions:
+		 *
+		 * The user ID of the calling process must be equal
+		 * to the file owner ID, except in cases where the
+		 * CAP_FSETID capability is applicable.
+		 */
+		if (!file_owner && !capable(CAP_FOWNER)) {
+			code = XFS_ERROR(EPERM);
+			goto error_return;
+		}
+
+		/*
+		 * CAP_FSETID overrides the following restrictions:
+		 *
+		 * The effective user ID of the calling process shall match
+		 * the file owner when setting the set-user-ID and
+		 * set-group-ID bits on that file.
+		 *
+		 * The effective group ID or one of the supplementary group
+		 * IDs of the calling process shall match the group owner of
+		 * the file when setting the set-group-ID bit on that file
+		 */
+		if (mask & XFS_AT_MODE) {
+			mode_t m = 0;
+
+			if ((vap->va_mode & S_ISUID) && !file_owner)
+				m |= S_ISUID;
+			if ((vap->va_mode & S_ISGID) &&
+			    !in_group_p((gid_t)ip->i_d.di_gid))
+				m |= S_ISGID;
+#if 0
+			/* Linux allows this, Irix doesn't. */
+			if ((vap->va_mode & S_ISVTX) && vp->v_type != VDIR)
+				m |= S_ISVTX;
+#endif
+			if (m && !capable(CAP_FSETID))
+				vap->va_mode &= ~m;
+		}
+	}
+
+	/*
+	 * Change file ownership.  Must be the owner or privileged.
+	 * If the system was configured with the "restricted_chown"
+	 * option, the owner is not permitted to give away the file,
+	 * and can change the group id only to a group of which he
+	 * or she is a member.
+	 */
+	if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
+		/*
+		 * These IDs could have changed since we last looked at them.
+		 * But, we're assured that if the ownership did change
+		 * while we didn't have the inode locked, inode's dquot(s)
+		 * would have changed also.
+		 */
+		iuid = ip->i_d.di_uid;
+		iprojid = ip->i_d.di_projid;
+		igid = ip->i_d.di_gid;
+		gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
+		uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid;
+		projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
+			 iprojid;
+
+		/*
+		 * CAP_CHOWN overrides the following restrictions:
+		 *
+		 * If _POSIX_CHOWN_RESTRICTED is defined, this capability
+		 * shall override the restriction that a process cannot
+		 * change the user ID of a file it owns and the restriction
+		 * that the group ID supplied to the chown() function
+		 * shall be equal to either the group ID or one of the
+		 * supplementary group IDs of the calling process.
+		 *
+		 * XXX: How does restricted_chown affect projid?
+		 */
+		if (restricted_chown &&
+		    (iuid != uid || (igid != gid &&
+				     !in_group_p((gid_t)gid))) &&
+		    !capable(CAP_CHOWN)) {
+			code = XFS_ERROR(EPERM);
+			goto error_return;
+		}
+		/*
+		 * Do a quota reservation only if uid or gid is actually
+		 * going to change.
+		 */
+		if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+		    (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
+			ASSERT(tp);
+			code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+						capable(CAP_FOWNER) ?
+						XFS_QMOPT_FORCE_RES : 0);
+			if (code)	/* out of quota */
+				goto error_return;
+		}
+	}
+
+	/*
+	 * Truncate file.  Must have write permission and not be a directory.
+	 */
+	if (mask & XFS_AT_SIZE) {
+		/* Short circuit the truncate case for zero length files */
+		if ((vap->va_size == 0) &&
+		   (ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) {
+			xfs_iunlock(ip, XFS_ILOCK_EXCL);
+			lock_flags &= ~XFS_ILOCK_EXCL;
+			if (mask & XFS_AT_CTIME)
+				xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+			code = 0;
+			goto error_return;
+		}
+
+		if (vp->v_type == VDIR) {
+			code = XFS_ERROR(EISDIR);
+			goto error_return;
+		} else if (vp->v_type != VREG) {
+			code = XFS_ERROR(EINVAL);
+			goto error_return;
+		}
+		/*
+		 * Make sure that the dquots are attached to the inode.
+		 */
+		if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED)))
+			goto error_return;
+	}
+
+	/*
+	 * Change file access or modified times.
+	 */
+	if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
+		if (!file_owner) {
+			if ((flags & ATTR_UTIME) &&
+			    !capable(CAP_FOWNER)) {
+				code = XFS_ERROR(EPERM);
+				goto error_return;
+			}
+		}
+	}
+
+	/*
+	 * Change extent size or realtime flag.
+	 */
+	if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
+		/*
+		 * Can't change extent size if any extents are allocated.
+		 */
+		if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
+		    (mask & XFS_AT_EXTSIZE) &&
+		    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
+		     vap->va_extsize) ) {
+			code = XFS_ERROR(EINVAL);	/* EFBIG? */
+			goto error_return;
+		}
+
+		/*
+		 * Can't set extent size unless the file is marked, or
+		 * about to be marked as a realtime file.
+		 *
+		 * This check will be removed when fixed size extents
+		 * with buffered data writes is implemented.
+		 *
+		 */
+		if ((mask & XFS_AT_EXTSIZE)			&&
+		    ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
+		     vap->va_extsize) &&
+		    (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
+		       ((mask & XFS_AT_XFLAGS) &&
+			(vap->va_xflags & XFS_XFLAG_REALTIME))))) {
+			code = XFS_ERROR(EINVAL);
+			goto error_return;
+		}
+
+		/*
+		 * Can't change realtime flag if any extents are allocated.
+		 */
+		if (ip->i_d.di_nextents && (mask & XFS_AT_XFLAGS) &&
+		    (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) !=
+		    (vap->va_xflags & XFS_XFLAG_REALTIME)) {
+			code = XFS_ERROR(EINVAL);	/* EFBIG? */
+			goto error_return;
+		}
+		/*
+		 * Extent size must be a multiple of the appropriate block
+		 * size, if set at all.
+		 */
+		if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) {
+			xfs_extlen_t	size;
+
+			if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
+			    ((mask & XFS_AT_XFLAGS) &&
+			    (vap->va_xflags & XFS_XFLAG_REALTIME))) {
+				size = mp->m_sb.sb_rextsize <<
+				       mp->m_sb.sb_blocklog;
+			} else {
+				size = mp->m_sb.sb_blocksize;
+			}
+			if (vap->va_extsize % size) {
+				code = XFS_ERROR(EINVAL);
+				goto error_return;
+			}
+		}
+		/*
+		 * If realtime flag is set then must have realtime data.
+		 */
+		if ((mask & XFS_AT_XFLAGS) &&
+		    (vap->va_xflags & XFS_XFLAG_REALTIME)) {
+			if ((mp->m_sb.sb_rblocks == 0) ||
+			    (mp->m_sb.sb_rextsize == 0) ||
+			    (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
+				code = XFS_ERROR(EINVAL);
+				goto error_return;
+			}
+		}
+
+		/*
+		 * Can't modify an immutable/append-only file unless
+		 * we have appropriate permission.
+		 */
+		if ((mask & XFS_AT_XFLAGS) &&
+		    (ip->i_d.di_flags &
+				(XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
+		     (vap->va_xflags &
+				(XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+		    !capable(CAP_LINUX_IMMUTABLE)) {
+			code = XFS_ERROR(EPERM);
+			goto error_return;
+		}
+	}
+
+	/*
+	 * Now we can make the changes.  Before we join the inode
+	 * to the transaction, if XFS_AT_SIZE is set then take care of
+	 * the part of the truncation that must be done without the
+	 * inode lock.  This needs to be done before joining the inode
+	 * to the transaction, because the inode cannot be unlocked
+	 * once it is a part of the transaction.
+	 */
+	if (mask & XFS_AT_SIZE) {
+		code = 0;
+		if (vap->va_size > ip->i_d.di_size)
+			code = xfs_igrow_start(ip, vap->va_size, credp);
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		if (!code)
+			code = xfs_itruncate_data(ip, vap->va_size);
+		if (code) {
+			ASSERT(tp == NULL);
+			lock_flags &= ~XFS_ILOCK_EXCL;
+			ASSERT(lock_flags == XFS_IOLOCK_EXCL);
+			goto error_return;
+		}
+		tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
+		if ((code = xfs_trans_reserve(tp, 0,
+					     XFS_ITRUNCATE_LOG_RES(mp), 0,
+					     XFS_TRANS_PERM_LOG_RES,
+					     XFS_ITRUNCATE_LOG_COUNT))) {
+			xfs_trans_cancel(tp, 0);
+			if (need_iolock)
+				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+			return code;
+		}
+		commit_flags = XFS_TRANS_RELEASE_LOG_RES;
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+	}
+
+	if (tp) {
+		xfs_trans_ijoin(tp, ip, lock_flags);
+		xfs_trans_ihold(tp, ip);
+	}
+
+	/* determine whether mandatory locking mode changes */
+	mandlock_before = MANDLOCK(vp, ip->i_d.di_mode);
+
+	/*
+	 * Truncate file.  Must have write permission and not be a directory.
+	 */
+	if (mask & XFS_AT_SIZE) {
+		if (vap->va_size > ip->i_d.di_size) {
+			xfs_igrow_finish(tp, ip, vap->va_size,
+			    !(flags & ATTR_DMI));
+		} else if ((vap->va_size <= ip->i_d.di_size) ||
+			   ((vap->va_size == 0) && ip->i_d.di_nextents)) {
+			/*
+			 * signal a sync transaction unless
+			 * we're truncating an already unlinked
+			 * file on a wsync filesystem
+			 */
+			code = xfs_itruncate_finish(&tp, ip,
+					    (xfs_fsize_t)vap->va_size,
+					    XFS_DATA_FORK,
+					    ((ip->i_d.di_nlink != 0 ||
+					      !(mp->m_flags & XFS_MOUNT_WSYNC))
+					     ? 1 : 0));
+			if (code) {
+				goto abort_return;
+			}
+		}
+		/*
+		 * Have to do this even if the file's size doesn't change.
+		 */
+		timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+	}
+
+	/*
+	 * Change file access modes.
+	 */
+	if (mask & XFS_AT_MODE) {
+		ip->i_d.di_mode &= S_IFMT;
+		ip->i_d.di_mode |= vap->va_mode & ~S_IFMT;
+
+		xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
+		timeflags |= XFS_ICHGTIME_CHG;
+	}
+
+	/*
+	 * Change file ownership.  Must be the owner or privileged.
+	 * If the system was configured with the "restricted_chown"
+	 * option, the owner is not permitted to give away the file,
+	 * and can change the group id only to a group of which he
+	 * or she is a member.
+	 */
+	if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
+		/*
+		 * CAP_FSETID overrides the following restrictions:
+		 *
+		 * The set-user-ID and set-group-ID bits of a file will be
+		 * cleared upon successful return from chown()
+		 */
+		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+		    !capable(CAP_FSETID)) {
+			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+		}
+
+		/*
+		 * Change the ownerships and register quota modifications
+		 * in the transaction.
+		 */
+		if (iuid != uid) {
+			if (XFS_IS_UQUOTA_ON(mp)) {
+				ASSERT(mask & XFS_AT_UID);
+				ASSERT(udqp);
+				olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+							&ip->i_udquot, udqp);
+			}
+			ip->i_d.di_uid = uid;
+		}
+		if (igid != gid) {
+			if (XFS_IS_GQUOTA_ON(mp)) {
+				ASSERT(mask & XFS_AT_GID);
+				ASSERT(gdqp);
+				olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+							&ip->i_gdquot, gdqp);
+			}
+			ip->i_d.di_gid = gid;
+		}
+		if (iprojid != projid) {
+			ip->i_d.di_projid = projid;
+			/*
+			 * We may have to rev the inode as well as
+			 * the superblock version number since projids didn't
+			 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
+			 */
+			if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
+				xfs_bump_ino_vers2(tp, ip);
+		}
+
+		xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
+		timeflags |= XFS_ICHGTIME_CHG;
+	}
+
+
+	/*
+	 * Change file access or modified times.
+	 */
+	if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
+		if (mask & XFS_AT_ATIME) {
+			ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec;
+			ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec;
+			ip->i_update_core = 1;
+			timeflags &= ~XFS_ICHGTIME_ACC;
+		}
+		if (mask & XFS_AT_MTIME) {
+			ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec;
+			ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec;
+			timeflags &= ~XFS_ICHGTIME_MOD;
+			timeflags |= XFS_ICHGTIME_CHG;
+		}
+		if (tp && (flags & ATTR_UTIME))
+			xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
+	}
+
+	/*
+	 * Change XFS-added attributes.
+	 */
+	if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
+		if (mask & XFS_AT_EXTSIZE) {
+			/*
+			 * Converting bytes to fs blocks.
+			 */
+			ip->i_d.di_extsize = vap->va_extsize >>
+				mp->m_sb.sb_blocklog;
+		}
+		if (mask & XFS_AT_XFLAGS) {
+			uint	di_flags;
+
+			/* can't set PREALLOC this way, just preserve it */
+			di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
+			if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
+				di_flags |= XFS_DIFLAG_IMMUTABLE;
+			if (vap->va_xflags & XFS_XFLAG_APPEND)
+				di_flags |= XFS_DIFLAG_APPEND;
+			if (vap->va_xflags & XFS_XFLAG_SYNC)
+				di_flags |= XFS_DIFLAG_SYNC;
+			if (vap->va_xflags & XFS_XFLAG_NOATIME)
+				di_flags |= XFS_DIFLAG_NOATIME;
+			if (vap->va_xflags & XFS_XFLAG_NODUMP)
+				di_flags |= XFS_DIFLAG_NODUMP;
+			if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+				if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
+					di_flags |= XFS_DIFLAG_RTINHERIT;
+				if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
+					di_flags |= XFS_DIFLAG_NOSYMLINKS;
+			} else {
+				if (vap->va_xflags & XFS_XFLAG_REALTIME) {
+					di_flags |= XFS_DIFLAG_REALTIME;
+					ip->i_iocore.io_flags |= XFS_IOCORE_RT;
+				} else {
+					ip->i_iocore.io_flags &= ~XFS_IOCORE_RT;
+				}
+			}
+			ip->i_d.di_flags = di_flags;
+		}
+		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+		timeflags |= XFS_ICHGTIME_CHG;
+	}
+
+	/*
+	 * Change file inode change time only if XFS_AT_CTIME set
+	 * AND we have been called by a DMI function.
+	 */
+
+	if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) {
+		ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec;
+		ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec;
+		ip->i_update_core = 1;
+		timeflags &= ~XFS_ICHGTIME_CHG;
+	}
+
+	/*
+	 * Send out timestamp changes that need to be set to the
+	 * current time.  Not done when called by a DMI function.
+	 */
+	if (timeflags && !(flags & ATTR_DMI))
+		xfs_ichgtime(ip, timeflags);
+
+	XFS_STATS_INC(xs_ig_attrchg);
+
+	/*
+	 * If this is a synchronous mount, make sure that the
+	 * transaction goes to disk before returning to the user.
+	 * This is slightly sub-optimal in that truncates require
+	 * two sync transactions instead of one for wsync filesytems.
+	 * One for the truncate and one for the timestamps since we
+	 * don't want to change the timestamps unless we're sure the
+	 * truncate worked.  Truncates are less than 1% of the laddis
+	 * mix so this probably isn't worth the trouble to optimize.
+	 */
+	code = 0;
+	if (tp) {
+		if (mp->m_flags & XFS_MOUNT_WSYNC)
+			xfs_trans_set_sync(tp);
+
+		code = xfs_trans_commit(tp, commit_flags, NULL);
+	}
+
+	/*
+	 * If the (regular) file's mandatory locking mode changed, then
+	 * notify the vnode.  We do this under the inode lock to prevent
+	 * racing calls to vop_vnode_change.
+	 */
+	mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
+	if (mandlock_before != mandlock_after) {
+		VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING,
+				 mandlock_after);
+	}
+
+	xfs_iunlock(ip, lock_flags);
+
+	/*
+	 * Release any dquot(s) the inode had kept before chown.
+	 */
+	XFS_QM_DQRELE(mp, olddquot1);
+	XFS_QM_DQRELE(mp, olddquot2);
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
+
+	if (code) {
+		return code;
+	}
+
+	if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_ATTRIBUTE) &&
+	    !(flags & ATTR_DMI)) {
+		(void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL,
+					NULL, DM_RIGHT_NULL, NULL, NULL,
+					0, 0, AT_DELAY_FLAG(flags));
+	}
+	return 0;
+
+ abort_return:
+	commit_flags |= XFS_TRANS_ABORT;
+	/* FALLTHROUGH */
+ error_return:
+	XFS_QM_DQRELE(mp, udqp);
+	XFS_QM_DQRELE(mp, gdqp);
+	if (tp) {
+		xfs_trans_cancel(tp, commit_flags);
+	}
+	if (lock_flags != 0) {
+		xfs_iunlock(ip, lock_flags);
+	}
+	return code;
+}
+
+
+/*
+ * xfs_access
+ * Null conversion from vnode mode bits to inode mode bits, as in efs.
+ */
+STATIC int
+xfs_access(
+	bhv_desc_t	*bdp,
+	int		mode,
+	cred_t		*credp)
+{
+	xfs_inode_t	*ip;
+	int		error;
+
+	vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__,
+					       (inst_t *)__return_address);
+
+	ip = XFS_BHVTOI(bdp);
+	xfs_ilock(ip, XFS_ILOCK_SHARED);
+	error = xfs_iaccess(ip, mode, credp);
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+	return error;
+}
+
+
+/*
+ * xfs_readlink
+ *
+ */
+STATIC int
+xfs_readlink(
+	bhv_desc_t	*bdp,
+	uio_t		*uiop,
+	int		ioflags,
+	cred_t		*credp)
+{
+	xfs_inode_t     *ip;
+	int		count;
+	xfs_off_t	offset;
+	int		pathlen;
+	vnode_t		*vp;
+	int		error = 0;
+	xfs_mount_t	*mp;
+	int             nmaps;
+	xfs_bmbt_irec_t mval[SYMLINK_MAPS];
+	xfs_daddr_t	d;
+	int		byte_cnt;
+	int		n;
+	xfs_buf_t	*bp;
+
+	vp = BHV_TO_VNODE(bdp);
+	vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+
+	ip = XFS_BHVTOI(bdp);
+	mp = ip->i_mount;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+	ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
+
+	offset = uiop->uio_offset;
+	count = uiop->uio_resid;
+
+	if (offset < 0) {
+		error = XFS_ERROR(EINVAL);
+		goto error_return;
+	}
+	if (count <= 0) {
+		error = 0;
+		goto error_return;
+	}
+
+	if (!(ioflags & IO_INVIS)) {
+		xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
+	}
+
+	/*
+	 * See if the symlink is stored inline.
+	 */
+	pathlen = (int)ip->i_d.di_size;
+
+	if (ip->i_df.if_flags & XFS_IFINLINE) {
+		error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop);
+	}
+	else {
+		/*
+		 * Symlink not inline.  Call bmap to get it in.
+		 */
+		nmaps = SYMLINK_MAPS;
+
+		error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen),
+				  0, NULL, 0, mval, &nmaps, NULL);
+
+		if (error) {
+			goto error_return;
+		}
+
+		for (n = 0; n < nmaps; n++) {
+			d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
+			byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
+			bp = xfs_buf_read(mp->m_ddev_targp, d,
+				      BTOBB(byte_cnt), 0);
+			error = XFS_BUF_GETERROR(bp);
+			if (error) {
+				xfs_ioerror_alert("xfs_readlink",
+					  ip->i_mount, bp, XFS_BUF_ADDR(bp));
+				xfs_buf_relse(bp);
+				goto error_return;
+			}
+			if (pathlen < byte_cnt)
+				byte_cnt = pathlen;
+			pathlen -= byte_cnt;
+
+			error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop);
+			xfs_buf_relse (bp);
+		}
+
+	}
+
+
+error_return:
+
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+	return error;
+}
+
+
+/*
+ * xfs_fsync
+ *
+ * This is called to sync the inode and its data out to disk.
+ * We need to hold the I/O lock while flushing the data, and
+ * the inode lock while flushing the inode.  The inode lock CANNOT
+ * be held while flushing the data, so acquire after we're done
+ * with that.
+ */
+STATIC int
+xfs_fsync(
+	bhv_desc_t	*bdp,
+	int		flag,
+	cred_t		*credp,
+	xfs_off_t	start,
+	xfs_off_t	stop)
+{
+	xfs_inode_t	*ip;
+	xfs_trans_t	*tp;
+	int		error;
+
+	vn_trace_entry(BHV_TO_VNODE(bdp),
+			__FUNCTION__, (inst_t *)__return_address);
+
+	ip = XFS_BHVTOI(bdp);
+
+	ASSERT(start >= 0 && stop >= -1);
+
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+		return XFS_ERROR(EIO);
+
+	/*
+	 * We always need to make sure that the required inode state
+	 * is safe on disk.  The vnode might be clean but because
+	 * of committed transactions that haven't hit the disk yet.
+	 * Likewise, there could be unflushed non-transactional
+	 * changes to the inode core that have to go to disk.
+	 *
+	 * The following code depends on one assumption:  that
+	 * any transaction that changes an inode logs the core
+	 * because it has to change some field in the inode core
+	 * (typically nextents or nblocks).  That assumption
+	 * implies that any transactions against an inode will
+	 * catch any non-transactional updates.  If inode-altering
+	 * transactions exist that violate this assumption, the
+	 * code breaks.  Right now, it figures that if the involved
+	 * update_* field is clear and the inode is unpinned, the
+	 * inode is clean.  Either it's been flushed or it's been
+	 * committed and the commit has hit the disk unpinning the inode.
+	 * (Note that xfs_inode_item_format() called at commit clears
+	 * the update_* fields.)
+	 */
+	xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+	/* If we are flushing data then we care about update_size
+	 * being set, otherwise we care about update_core
+	 */
+	if ((flag & FSYNC_DATA) ?
+			(ip->i_update_size == 0) :
+			(ip->i_update_core == 0)) {
+		/*
+		 * Timestamps/size haven't changed since last inode
+		 * flush or inode transaction commit.  That means
+		 * either nothing got written or a transaction
+		 * committed which caught the updates.	If the
+		 * latter happened and the transaction hasn't
+		 * hit the disk yet, the inode will be still
+		 * be pinned.  If it is, force the log.
+		 */
+
+		xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+		if (xfs_ipincount(ip))
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 15:20:36 -0700
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/xfs/xfs_vnodeops.c