aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_vnodeops.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/xfs/xfs_vnodeops.c
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'fs/xfs/xfs_vnodeops.c')
-rw-r--r--fs/xfs/xfs_vnodeops.c4712
1 files changed, 4712 insertions, 0 deletions
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
new file mode 100644
index 00000000000..70092963ca9
--- /dev/null
+++ b/fs/xfs/xfs_vnodeops.c
@@ -0,0 +1,4712 @@
+/*
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+#include "xfs_macros.h"
+#include "xfs_types.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_dmapi.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_itable.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_alloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode_item.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr.h"
+#include "xfs_rw.h"
+#include "xfs_refcache.h"
+#include "xfs_error.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_quota.h"
+#include "xfs_utils.h"
+#include "xfs_trans_space.h"
+#include "xfs_dir_leaf.h"
+#include "xfs_mac.h"
+#include "xfs_log_priv.h"
+
+
+/*
+ * The maximum pathlen is 1024 bytes. Since the minimum file system
+ * blocksize is 512 bytes, we can get a max of 2 extents back from
+ * bmapi.
+ */
+#define SYMLINK_MAPS 2
+
+/*
+ * For xfs, we check that the file isn't too big to be opened by this kernel.
+ * No other open action is required for regular files. Devices are handled
+ * through the specfs file system, pipes through fifofs. Device and
+ * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively,
+ * when a new vnode is first looked up or created.
+ */
+STATIC int
+xfs_open(
+ bhv_desc_t *bdp,
+ cred_t *credp)
+{
+ int mode;
+ vnode_t *vp;
+ xfs_inode_t *ip;
+
+ vp = BHV_TO_VNODE(bdp);
+ ip = XFS_BHVTOI(bdp);
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return XFS_ERROR(EIO);
+
+ /*
+ * If it's a directory with any blocks, read-ahead block 0
+ * as we're almost certain to have the next operation be a read there.
+ */
+ if (vp->v_type == VDIR && ip->i_d.di_nextents > 0) {
+ mode = xfs_ilock_map_shared(ip);
+ if (ip->i_d.di_nextents > 0)
+ (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
+ xfs_iunlock(ip, mode);
+ }
+ return 0;
+}
+
+
+/*
+ * xfs_getattr
+ */
+STATIC int
+xfs_getattr(
+ bhv_desc_t *bdp,
+ vattr_t *vap,
+ int flags,
+ cred_t *credp)
+{
+ xfs_inode_t *ip;
+ xfs_mount_t *mp;
+ vnode_t *vp;
+
+ vp = BHV_TO_VNODE(bdp);
+ vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+
+ ip = XFS_BHVTOI(bdp);
+ mp = ip->i_mount;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ if (!(flags & ATTR_LAZY))
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+ vap->va_size = ip->i_d.di_size;
+ if (vap->va_mask == XFS_AT_SIZE)
+ goto all_done;
+
+ vap->va_nblocks =
+ XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
+ vap->va_nodeid = ip->i_ino;
+#if XFS_BIG_INUMS
+ vap->va_nodeid += mp->m_inoadd;
+#endif
+ vap->va_nlink = ip->i_d.di_nlink;
+
+ /*
+ * Quick exit for non-stat callers
+ */
+ if ((vap->va_mask &
+ ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID|
+ XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0)
+ goto all_done;
+
+ /*
+ * Copy from in-core inode.
+ */
+ vap->va_type = vp->v_type;
+ vap->va_mode = ip->i_d.di_mode & MODEMASK;
+ vap->va_uid = ip->i_d.di_uid;
+ vap->va_gid = ip->i_d.di_gid;
+ vap->va_projid = ip->i_d.di_projid;
+
+ /*
+ * Check vnode type block/char vs. everything else.
+ * Do it with bitmask because that's faster than looking
+ * for multiple values individually.
+ */
+ if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
+ vap->va_rdev = 0;
+
+ if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
+
+#if 0
+ /* Large block sizes confuse various
+ * user space programs, so letting the
+ * stripe size through is not a good
+ * idea for now.
+ */
+ vap->va_blocksize = mp->m_swidth ?
+ /*
+ * If the underlying volume is a stripe, then
+ * return the stripe width in bytes as the
+ * recommended I/O size.
+ */
+ (mp->m_swidth << mp->m_sb.sb_blocklog) :
+ /*
+ * Return the largest of the preferred buffer
+ * sizes since doing small I/Os into larger
+ * buffers causes buffers to be decommissioned.
+ * The value returned is in bytes.
+ */
+ (1 << (int)MAX(mp->m_readio_log,
+ mp->m_writeio_log));
+
+#else
+ vap->va_blocksize =
+ /*
+ * Return the largest of the preferred buffer
+ * sizes since doing small I/Os into larger
+ * buffers causes buffers to be decommissioned.
+ * The value returned is in bytes.
+ */
+ 1 << (int)MAX(mp->m_readio_log,
+ mp->m_writeio_log);
+#endif
+ } else {
+
+ /*
+ * If the file blocks are being allocated from a
+ * realtime partition, then return the inode's
+ * realtime extent size or the realtime volume's
+ * extent size.
+ */
+ vap->va_blocksize = ip->i_d.di_extsize ?
+ (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) :
+ (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
+ }
+ } else {
+ vap->va_rdev = ip->i_df.if_u2.if_rdev;
+ vap->va_blocksize = BLKDEV_IOSIZE;
+ }
+
+ vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec;
+ vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
+ vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
+ vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
+ vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
+ vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
+
+ /*
+ * Exit for stat callers. See if any of the rest of the fields
+ * to be filled in are needed.
+ */
+ if ((vap->va_mask &
+ (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
+ XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
+ goto all_done;
+
+ /*
+ * Convert di_flags to xflags.
+ */
+ vap->va_xflags = xfs_ip2xflags(ip);
+
+ /*
+ * Exit for inode revalidate. See if any of the rest of
+ * the fields to be filled in are needed.
+ */
+ if ((vap->va_mask &
+ (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
+ XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
+ goto all_done;
+
+ vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog;
+ vap->va_nextents =
+ (ip->i_df.if_flags & XFS_IFEXTENTS) ?
+ ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) :
+ ip->i_d.di_nextents;
+ if (ip->i_afp)
+ vap->va_anextents =
+ (ip->i_afp->if_flags & XFS_IFEXTENTS) ?
+ ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) :
+ ip->i_d.di_anextents;
+ else
+ vap->va_anextents = 0;
+ vap->va_gen = ip->i_d.di_gen;
+
+ all_done:
+ if (!(flags & ATTR_LAZY))
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ return 0;
+}
+
+
+/*
+ * xfs_setattr
+ */
+int
+xfs_setattr(
+ bhv_desc_t *bdp,
+ vattr_t *vap,
+ int flags,
+ cred_t *credp)
+{
+ xfs_inode_t *ip;
+ xfs_trans_t *tp;
+ xfs_mount_t *mp;
+ int mask;
+ int code;
+ uint lock_flags;
+ uint commit_flags=0;
+ uid_t uid=0, iuid=0;
+ gid_t gid=0, igid=0;
+ int timeflags = 0;
+ vnode_t *vp;
+ xfs_prid_t projid=0, iprojid=0;
+ int mandlock_before, mandlock_after;
+ struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
+ int file_owner;
+ int need_iolock = (flags & ATTR_DMI) == 0;
+
+ vp = BHV_TO_VNODE(bdp);
+ vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+
+ if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
+ return XFS_ERROR(EROFS);
+
+ /*
+ * Cannot set certain attributes.
+ */
+ mask = vap->va_mask;
+ if (mask & XFS_AT_NOSET) {
+ return XFS_ERROR(EINVAL);
+ }
+
+ ip = XFS_BHVTOI(bdp);
+ mp = ip->i_mount;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ /*
+ * Timestamps do not need to be logged and hence do not
+ * need to be done within a transaction.
+ */
+ if (mask & XFS_AT_UPDTIMES) {
+ ASSERT((mask & ~XFS_AT_UPDTIMES) == 0);
+ timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) |
+ ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) |
+ ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0);
+ xfs_ichgtime(ip, timeflags);
+ return 0;
+ }
+
+ olddquot1 = olddquot2 = NULL;
+ udqp = gdqp = NULL;
+
+ /*
+ * If disk quotas is on, we make sure that the dquots do exist on disk,
+ * before we start any other transactions. Trying to do this later
+ * is messy. We don't care to take a readlock to look at the ids
+ * in inode here, because we can't hold it across the trans_reserve.
+ * If the IDs do change before we take the ilock, we're covered
+ * because the i_*dquot fields will get updated anyway.
+ */
+ if (XFS_IS_QUOTA_ON(mp) && (mask & (XFS_AT_UID|XFS_AT_GID))) {
+ uint qflags = 0;
+
+ if (mask & XFS_AT_UID) {
+ uid = vap->va_uid;
+ qflags |= XFS_QMOPT_UQUOTA;
+ } else {
+ uid = ip->i_d.di_uid;
+ }
+ if (mask & XFS_AT_GID) {
+ gid = vap->va_gid;
+ qflags |= XFS_QMOPT_GQUOTA;
+ } else {
+ gid = ip->i_d.di_gid;
+ }
+ /*
+ * We take a reference when we initialize udqp and gdqp,
+ * so it is important that we never blindly double trip on
+ * the same variable. See xfs_create() for an example.
+ */
+ ASSERT(udqp == NULL);
+ ASSERT(gdqp == NULL);
+ code = XFS_QM_DQVOPALLOC(mp, ip, uid,gid, qflags, &udqp, &gdqp);
+ if (code)
+ return (code);
+ }
+
+ /*
+ * For the other attributes, we acquire the inode lock and
+ * first do an error checking pass.
+ */
+ tp = NULL;
+ lock_flags = XFS_ILOCK_EXCL;
+ if (!(mask & XFS_AT_SIZE)) {
+ if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) ||
+ (mp->m_flags & XFS_MOUNT_WSYNC)) {
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+ commit_flags = 0;
+ if ((code = xfs_trans_reserve(tp, 0,
+ XFS_ICHANGE_LOG_RES(mp), 0,
+ 0, 0))) {
+ lock_flags = 0;
+ goto error_return;
+ }
+ }
+ } else {
+ if (DM_EVENT_ENABLED (vp->v_vfsp, ip, DM_EVENT_TRUNCATE) &&
+ !(flags & ATTR_DMI)) {
+ int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
+ code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp,
+ vap->va_size, 0, dmflags, NULL);
+ if (code) {
+ lock_flags = 0;
+ goto error_return;
+ }
+ }
+ if (need_iolock)
+ lock_flags |= XFS_IOLOCK_EXCL;
+ }
+
+ xfs_ilock(ip, lock_flags);
+
+ /* boolean: are we the file owner? */
+ file_owner = (current_fsuid(credp) == ip->i_d.di_uid);
+
+ /*
+ * Change various properties of a file.
+ * Only the owner or users with CAP_FOWNER
+ * capability may do these things.
+ */
+ if (mask &
+ (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID|
+ XFS_AT_GID|XFS_AT_PROJID)) {
+ /*
+ * CAP_FOWNER overrides the following restrictions:
+ *
+ * The user ID of the calling process must be equal
+ * to the file owner ID, except in cases where the
+ * CAP_FSETID capability is applicable.
+ */
+ if (!file_owner && !capable(CAP_FOWNER)) {
+ code = XFS_ERROR(EPERM);
+ goto error_return;
+ }
+
+ /*
+ * CAP_FSETID overrides the following restrictions:
+ *
+ * The effective user ID of the calling process shall match
+ * the file owner when setting the set-user-ID and
+ * set-group-ID bits on that file.
+ *
+ * The effective group ID or one of the supplementary group
+ * IDs of the calling process shall match the group owner of
+ * the file when setting the set-group-ID bit on that file
+ */
+ if (mask & XFS_AT_MODE) {
+ mode_t m = 0;
+
+ if ((vap->va_mode & S_ISUID) && !file_owner)
+ m |= S_ISUID;
+ if ((vap->va_mode & S_ISGID) &&
+ !in_group_p((gid_t)ip->i_d.di_gid))
+ m |= S_ISGID;
+#if 0
+ /* Linux allows this, Irix doesn't. */
+ if ((vap->va_mode & S_ISVTX) && vp->v_type != VDIR)
+ m |= S_ISVTX;
+#endif
+ if (m && !capable(CAP_FSETID))
+ vap->va_mode &= ~m;
+ }
+ }
+
+ /*
+ * Change file ownership. Must be the owner or privileged.
+ * If the system was configured with the "restricted_chown"
+ * option, the owner is not permitted to give away the file,
+ * and can change the group id only to a group of which he
+ * or she is a member.
+ */
+ if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
+ /*
+ * These IDs could have changed since we last looked at them.
+ * But, we're assured that if the ownership did change
+ * while we didn't have the inode locked, inode's dquot(s)
+ * would have changed also.
+ */
+ iuid = ip->i_d.di_uid;
+ iprojid = ip->i_d.di_projid;
+ igid = ip->i_d.di_gid;
+ gid = (mask & XFS_AT_GID) ? vap->va_gid : igid;
+ uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid;
+ projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
+ iprojid;
+
+ /*
+ * CAP_CHOWN overrides the following restrictions:
+ *
+ * If _POSIX_CHOWN_RESTRICTED is defined, this capability
+ * shall override the restriction that a process cannot
+ * change the user ID of a file it owns and the restriction
+ * that the group ID supplied to the chown() function
+ * shall be equal to either the group ID or one of the
+ * supplementary group IDs of the calling process.
+ *
+ * XXX: How does restricted_chown affect projid?
+ */
+ if (restricted_chown &&
+ (iuid != uid || (igid != gid &&
+ !in_group_p((gid_t)gid))) &&
+ !capable(CAP_CHOWN)) {
+ code = XFS_ERROR(EPERM);
+ goto error_return;
+ }
+ /*
+ * Do a quota reservation only if uid or gid is actually
+ * going to change.
+ */
+ if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+ (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
+ ASSERT(tp);
+ code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+ capable(CAP_FOWNER) ?
+ XFS_QMOPT_FORCE_RES : 0);
+ if (code) /* out of quota */
+ goto error_return;
+ }
+ }
+
+ /*
+ * Truncate file. Must have write permission and not be a directory.
+ */
+ if (mask & XFS_AT_SIZE) {
+ /* Short circuit the truncate case for zero length files */
+ if ((vap->va_size == 0) &&
+ (ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ lock_flags &= ~XFS_ILOCK_EXCL;
+ if (mask & XFS_AT_CTIME)
+ xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ code = 0;
+ goto error_return;
+ }
+
+ if (vp->v_type == VDIR) {
+ code = XFS_ERROR(EISDIR);
+ goto error_return;
+ } else if (vp->v_type != VREG) {
+ code = XFS_ERROR(EINVAL);
+ goto error_return;
+ }
+ /*
+ * Make sure that the dquots are attached to the inode.
+ */
+ if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED)))
+ goto error_return;
+ }
+
+ /*
+ * Change file access or modified times.
+ */
+ if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
+ if (!file_owner) {
+ if ((flags & ATTR_UTIME) &&
+ !capable(CAP_FOWNER)) {
+ code = XFS_ERROR(EPERM);
+ goto error_return;
+ }
+ }
+ }
+
+ /*
+ * Change extent size or realtime flag.
+ */
+ if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
+ /*
+ * Can't change extent size if any extents are allocated.
+ */
+ if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
+ (mask & XFS_AT_EXTSIZE) &&
+ ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
+ vap->va_extsize) ) {
+ code = XFS_ERROR(EINVAL); /* EFBIG? */
+ goto error_return;
+ }
+
+ /*
+ * Can't set extent size unless the file is marked, or
+ * about to be marked as a realtime file.
+ *
+ * This check will be removed when fixed size extents
+ * with buffered data writes is implemented.
+ *
+ */
+ if ((mask & XFS_AT_EXTSIZE) &&
+ ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
+ vap->va_extsize) &&
+ (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
+ ((mask & XFS_AT_XFLAGS) &&
+ (vap->va_xflags & XFS_XFLAG_REALTIME))))) {
+ code = XFS_ERROR(EINVAL);
+ goto error_return;
+ }
+
+ /*
+ * Can't change realtime flag if any extents are allocated.
+ */
+ if (ip->i_d.di_nextents && (mask & XFS_AT_XFLAGS) &&
+ (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) !=
+ (vap->va_xflags & XFS_XFLAG_REALTIME)) {
+ code = XFS_ERROR(EINVAL); /* EFBIG? */
+ goto error_return;
+ }
+ /*
+ * Extent size must be a multiple of the appropriate block
+ * size, if set at all.
+ */
+ if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) {
+ xfs_extlen_t size;
+
+ if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ||
+ ((mask & XFS_AT_XFLAGS) &&
+ (vap->va_xflags & XFS_XFLAG_REALTIME))) {
+ size = mp->m_sb.sb_rextsize <<
+ mp->m_sb.sb_blocklog;
+ } else {
+ size = mp->m_sb.sb_blocksize;
+ }
+ if (vap->va_extsize % size) {
+ code = XFS_ERROR(EINVAL);
+ goto error_return;
+ }
+ }
+ /*
+ * If realtime flag is set then must have realtime data.
+ */
+ if ((mask & XFS_AT_XFLAGS) &&
+ (vap->va_xflags & XFS_XFLAG_REALTIME)) {
+ if ((mp->m_sb.sb_rblocks == 0) ||
+ (mp->m_sb.sb_rextsize == 0) ||
+ (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
+ code = XFS_ERROR(EINVAL);
+ goto error_return;
+ }
+ }
+
+ /*
+ * Can't modify an immutable/append-only file unless
+ * we have appropriate permission.
+ */
+ if ((mask & XFS_AT_XFLAGS) &&
+ (ip->i_d.di_flags &
+ (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
+ (vap->va_xflags &
+ (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+ !capable(CAP_LINUX_IMMUTABLE)) {
+ code = XFS_ERROR(EPERM);
+ goto error_return;
+ }
+ }
+
+ /*
+ * Now we can make the changes. Before we join the inode
+ * to the transaction, if XFS_AT_SIZE is set then take care of
+ * the part of the truncation that must be done without the
+ * inode lock. This needs to be done before joining the inode
+ * to the transaction, because the inode cannot be unlocked
+ * once it is a part of the transaction.
+ */
+ if (mask & XFS_AT_SIZE) {
+ code = 0;
+ if (vap->va_size > ip->i_d.di_size)
+ code = xfs_igrow_start(ip, vap->va_size, credp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ if (!code)
+ code = xfs_itruncate_data(ip, vap->va_size);
+ if (code) {
+ ASSERT(tp == NULL);
+ lock_flags &= ~XFS_ILOCK_EXCL;
+ ASSERT(lock_flags == XFS_IOLOCK_EXCL);
+ goto error_return;
+ }
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
+ if ((code = xfs_trans_reserve(tp, 0,
+ XFS_ITRUNCATE_LOG_RES(mp), 0,
+ XFS_TRANS_PERM_LOG_RES,
+ XFS_ITRUNCATE_LOG_COUNT))) {
+ xfs_trans_cancel(tp, 0);
+ if (need_iolock)
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ return code;
+ }
+ commit_flags = XFS_TRANS_RELEASE_LOG_RES;
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ }
+
+ if (tp) {
+ xfs_trans_ijoin(tp, ip, lock_flags);
+ xfs_trans_ihold(tp, ip);
+ }
+
+ /* determine whether mandatory locking mode changes */
+ mandlock_before = MANDLOCK(vp, ip->i_d.di_mode);
+
+ /*
+ * Truncate file. Must have write permission and not be a directory.
+ */
+ if (mask & XFS_AT_SIZE) {
+ if (vap->va_size > ip->i_d.di_size) {
+ xfs_igrow_finish(tp, ip, vap->va_size,
+ !(flags & ATTR_DMI));
+ } else if ((vap->va_size <= ip->i_d.di_size) ||
+ ((vap->va_size == 0) && ip->i_d.di_nextents)) {
+ /*
+ * signal a sync transaction unless
+ * we're truncating an already unlinked
+ * file on a wsync filesystem
+ */
+ code = xfs_itruncate_finish(&tp, ip,
+ (xfs_fsize_t)vap->va_size,
+ XFS_DATA_FORK,
+ ((ip->i_d.di_nlink != 0 ||
+ !(mp->m_flags & XFS_MOUNT_WSYNC))
+ ? 1 : 0));
+ if (code) {
+ goto abort_return;
+ }
+ }
+ /*
+ * Have to do this even if the file's size doesn't change.
+ */
+ timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+ }
+
+ /*
+ * Change file access modes.
+ */
+ if (mask & XFS_AT_MODE) {
+ ip->i_d.di_mode &= S_IFMT;
+ ip->i_d.di_mode |= vap->va_mode & ~S_IFMT;
+
+ xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
+ timeflags |= XFS_ICHGTIME_CHG;
+ }
+
+ /*
+ * Change file ownership. Must be the owner or privileged.
+ * If the system was configured with the "restricted_chown"
+ * option, the owner is not permitted to give away the file,
+ * and can change the group id only to a group of which he
+ * or she is a member.
+ */
+ if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) {
+ /*
+ * CAP_FSETID overrides the following restrictions:
+ *
+ * The set-user-ID and set-group-ID bits of a file will be
+ * cleared upon successful return from chown()
+ */
+ if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+ !capable(CAP_FSETID)) {
+ ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+ }
+
+ /*
+ * Change the ownerships and register quota modifications
+ * in the transaction.
+ */
+ if (iuid != uid) {
+ if (XFS_IS_UQUOTA_ON(mp)) {
+ ASSERT(mask & XFS_AT_UID);
+ ASSERT(udqp);
+ olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+ &ip->i_udquot, udqp);
+ }
+ ip->i_d.di_uid = uid;
+ }
+ if (igid != gid) {
+ if (XFS_IS_GQUOTA_ON(mp)) {
+ ASSERT(mask & XFS_AT_GID);
+ ASSERT(gdqp);
+ olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+ &ip->i_gdquot, gdqp);
+ }
+ ip->i_d.di_gid = gid;
+ }
+ if (iprojid != projid) {
+ ip->i_d.di_projid = projid;
+ /*
+ * We may have to rev the inode as well as
+ * the superblock version number since projids didn't
+ * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
+ */
+ if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
+ xfs_bump_ino_vers2(tp, ip);
+ }
+
+ xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
+ timeflags |= XFS_ICHGTIME_CHG;
+ }
+
+
+ /*
+ * Change file access or modified times.
+ */
+ if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) {
+ if (mask & XFS_AT_ATIME) {
+ ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec;
+ ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec;
+ ip->i_update_core = 1;
+ timeflags &= ~XFS_ICHGTIME_ACC;
+ }
+ if (mask & XFS_AT_MTIME) {
+ ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec;
+ ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec;
+ timeflags &= ~XFS_ICHGTIME_MOD;
+ timeflags |= XFS_ICHGTIME_CHG;
+ }
+ if (tp && (flags & ATTR_UTIME))
+ xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
+ }
+
+ /*
+ * Change XFS-added attributes.
+ */
+ if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
+ if (mask & XFS_AT_EXTSIZE) {
+ /*
+ * Converting bytes to fs blocks.
+ */
+ ip->i_d.di_extsize = vap->va_extsize >>
+ mp->m_sb.sb_blocklog;
+ }
+ if (mask & XFS_AT_XFLAGS) {
+ uint di_flags;
+
+ /* can't set PREALLOC this way, just preserve it */
+ di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
+ if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
+ di_flags |= XFS_DIFLAG_IMMUTABLE;
+ if (vap->va_xflags & XFS_XFLAG_APPEND)
+ di_flags |= XFS_DIFLAG_APPEND;
+ if (vap->va_xflags & XFS_XFLAG_SYNC)
+ di_flags |= XFS_DIFLAG_SYNC;
+ if (vap->va_xflags & XFS_XFLAG_NOATIME)
+ di_flags |= XFS_DIFLAG_NOATIME;
+ if (vap->va_xflags & XFS_XFLAG_NODUMP)
+ di_flags |= XFS_DIFLAG_NODUMP;
+ if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
+ di_flags |= XFS_DIFLAG_RTINHERIT;
+ if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
+ di_flags |= XFS_DIFLAG_NOSYMLINKS;
+ } else {
+ if (vap->va_xflags & XFS_XFLAG_REALTIME) {
+ di_flags |= XFS_DIFLAG_REALTIME;
+ ip->i_iocore.io_flags |= XFS_IOCORE_RT;
+ } else {
+ ip->i_iocore.io_flags &= ~XFS_IOCORE_RT;
+ }
+ }
+ ip->i_d.di_flags = di_flags;
+ }
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ timeflags |= XFS_ICHGTIME_CHG;
+ }
+
+ /*
+ * Change file inode change time only if XFS_AT_CTIME set
+ * AND we have been called by a DMI function.
+ */
+
+ if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) {
+ ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec;
+ ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec;
+ ip->i_update_core = 1;
+ timeflags &= ~XFS_ICHGTIME_CHG;
+ }
+
+ /*
+ * Send out timestamp changes that need to be set to the
+ * current time. Not done when called by a DMI function.
+ */
+ if (timeflags && !(flags & ATTR_DMI))
+ xfs_ichgtime(ip, timeflags);
+
+ XFS_STATS_INC(xs_ig_attrchg);
+
+ /*
+ * If this is a synchronous mount, make sure that the
+ * transaction goes to disk before returning to the user.
+ * This is slightly sub-optimal in that truncates require
+ * two sync transactions instead of one for wsync filesytems.
+ * One for the truncate and one for the timestamps since we
+ * don't want to change the timestamps unless we're sure the
+ * truncate worked. Truncates are less than 1% of the laddis
+ * mix so this probably isn't worth the trouble to optimize.
+ */
+ code = 0;
+ if (tp) {
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
+ xfs_trans_set_sync(tp);
+
+ code = xfs_trans_commit(tp, commit_flags, NULL);
+ }
+
+ /*
+ * If the (regular) file's mandatory locking mode changed, then
+ * notify the vnode. We do this under the inode lock to prevent
+ * racing calls to vop_vnode_change.
+ */
+ mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
+ if (mandlock_before != mandlock_after) {
+ VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING,
+ mandlock_after);
+ }
+
+ xfs_iunlock(ip, lock_flags);
+
+ /*
+ * Release any dquot(s) the inode had kept before chown.
+ */
+ XFS_QM_DQRELE(mp, olddquot1);
+ XFS_QM_DQRELE(mp, olddquot2);
+ XFS_QM_DQRELE(mp, udqp);
+ XFS_QM_DQRELE(mp, gdqp);
+
+ if (code) {
+ return code;
+ }
+
+ if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_ATTRIBUTE) &&
+ !(flags & ATTR_DMI)) {
+ (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL,
+ NULL, DM_RIGHT_NULL, NULL, NULL,
+ 0, 0, AT_DELAY_FLAG(flags));
+ }
+ return 0;
+
+ abort_return:
+ commit_flags |= XFS_TRANS_ABORT;
+ /* FALLTHROUGH */
+ error_return:
+ XFS_QM_DQRELE(mp, udqp);
+ XFS_QM_DQRELE(mp, gdqp);
+ if (tp) {
+ xfs_trans_cancel(tp, commit_flags);
+ }
+ if (lock_flags != 0) {
+ xfs_iunlock(ip, lock_flags);
+ }
+ return code;
+}
+
+
+/*
+ * xfs_access
+ * Null conversion from vnode mode bits to inode mode bits, as in efs.
+ */
+STATIC int
+xfs_access(
+ bhv_desc_t *bdp,
+ int mode,
+ cred_t *credp)
+{
+ xfs_inode_t *ip;
+ int error;
+
+ vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__,
+ (inst_t *)__return_address);
+
+ ip = XFS_BHVTOI(bdp);
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ error = xfs_iaccess(ip, mode, credp);
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ return error;
+}
+
+
+/*
+ * xfs_readlink
+ *
+ */
+STATIC int
+xfs_readlink(
+ bhv_desc_t *bdp,
+ uio_t *uiop,
+ int ioflags,
+ cred_t *credp)
+{
+ xfs_inode_t *ip;
+ int count;
+ xfs_off_t offset;
+ int pathlen;
+ vnode_t *vp;
+ int error = 0;
+ xfs_mount_t *mp;
+ int nmaps;
+ xfs_bmbt_irec_t mval[SYMLINK_MAPS];
+ xfs_daddr_t d;
+ int byte_cnt;
+ int n;
+ xfs_buf_t *bp;
+
+ vp = BHV_TO_VNODE(bdp);
+ vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
+
+ ip = XFS_BHVTOI(bdp);
+ mp = ip->i_mount;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+ ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK);
+
+ offset = uiop->uio_offset;
+ count = uiop->uio_resid;
+
+ if (offset < 0) {
+ error = XFS_ERROR(EINVAL);
+ goto error_return;
+ }
+ if (count <= 0) {
+ error = 0;
+ goto error_return;
+ }
+
+ if (!(ioflags & IO_INVIS)) {
+ xfs_ichgtime(ip, XFS_ICHGTIME_ACC);
+ }
+
+ /*
+ * See if the symlink is stored inline.
+ */
+ pathlen = (int)ip->i_d.di_size;
+
+ if (ip->i_df.if_flags & XFS_IFINLINE) {
+ error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop);
+ }
+ else {
+ /*
+ * Symlink not inline. Call bmap to get it in.
+ */
+ nmaps = SYMLINK_MAPS;
+
+ error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen),
+ 0, NULL, 0, mval, &nmaps, NULL);
+
+ if (error) {
+ goto error_return;
+ }
+
+ for (n = 0; n < nmaps; n++) {
+ d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
+ byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
+ bp = xfs_buf_read(mp->m_ddev_targp, d,
+ BTOBB(byte_cnt), 0);
+ error = XFS_BUF_GETERROR(bp);
+ if (error) {
+ xfs_ioerror_alert("xfs_readlink",
+ ip->i_mount, bp, XFS_BUF_ADDR(bp));
+ xfs_buf_relse(bp);
+ goto error_return;
+ }
+ if (pathlen < byte_cnt)
+ byte_cnt = pathlen;
+ pathlen -= byte_cnt;
+
+ error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop);
+ xfs_buf_relse (bp);
+ }
+
+ }
+
+
+error_return:
+
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+ return error;
+}
+
+
+/*
+ * xfs_fsync
+ *
+ * This is called to sync the inode and its data out to disk.
+ * We need to hold the I/O lock while flushing the data, and
+ * the inode lock while flushing the inode. The inode lock CANNOT
+ * be held while flushing the data, so acquire after we're done
+ * with that.
+ */
+STATIC int
+xfs_fsync(
+ bhv_desc_t *bdp,
+ int flag,
+ cred_t *credp,
+ xfs_off_t start,
+ xfs_off_t stop)
+{
+ xfs_inode_t *ip;
+ xfs_trans_t *tp;
+ int error;
+
+ vn_trace_entry(BHV_TO_VNODE(bdp),
+ __FUNCTION__, (inst_t *)__return_address);
+
+ ip = XFS_BHVTOI(bdp);
+
+ ASSERT(start >= 0 && stop >= -1);
+
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return XFS_ERROR(EIO);
+
+ /*
+ * We always need to make sure that the required inode state
+ * is safe on disk. The vnode might be clean but because
+ * of committed transactions that haven't hit the disk yet.
+ * Likewise, there could be unflushed non-transactional
+ * changes to the inode core that have to go to disk.
+ *
+ * The following code depends on one assumption: that
+ * any transaction that changes an inode logs the core
+ * because it has to change some field in the inode core
+ * (typically nextents or nblocks). That assumption
+ * implies that any transactions against an inode will
+ * catch any non-transactional updates. If inode-altering
+ * transactions exist that violate this assumption, the
+ * code breaks. Right now, it figures that if the involved
+ * update_* field is clear and the inode is unpinned, the
+ * inode is clean. Either it's been flushed or it's been
+ * committed and the commit has hit the disk unpinning the inode.
+ * (Note that xfs_inode_item_format() called at commit clears
+ * the update_* fields.)
+ */
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+
+ /* If we are flushing data then we care about update_size
+ * being set, otherwise we care about update_core
+ */
+ if ((flag & FSYNC_DATA) ?
+ (ip->i_update_size == 0) :
+ (ip->i_update_core == 0)) {
+ /*
+ * Timestamps/size haven't changed since last inode
+ * flush or inode transaction commit. That means
+ * either nothing got written or a transaction
+ * committed which caught the updates. If the
+ * latter happened and the transaction hasn't
+ * hit the disk yet, the inode will be still
+ * be pinned. If it is, force the log.
+ */
+
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+ if (xfs_ipincount(ip))