diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/xfs/xfs_vnodeops.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/xfs/xfs_vnodeops.c')
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 4712 |
1 files changed, 4712 insertions, 0 deletions
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c new file mode 100644 index 00000000000..70092963ca9 --- /dev/null +++ b/fs/xfs/xfs_vnodeops.c @@ -0,0 +1,4712 @@ +/* + * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + */ + +#include "xfs.h" +#include "xfs_macros.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_itable.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_alloc.h" +#include "xfs_attr_sf.h" +#include "xfs_dir_sf.h" +#include "xfs_dir2_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode_item.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" +#include "xfs_rw.h" +#include "xfs_refcache.h" +#include "xfs_error.h" +#include "xfs_bit.h" +#include "xfs_rtalloc.h" +#include "xfs_quota.h" +#include "xfs_utils.h" +#include "xfs_trans_space.h" +#include "xfs_dir_leaf.h" +#include "xfs_mac.h" +#include "xfs_log_priv.h" + + +/* + * The maximum pathlen is 1024 bytes. Since the minimum file system + * blocksize is 512 bytes, we can get a max of 2 extents back from + * bmapi. + */ +#define SYMLINK_MAPS 2 + +/* + * For xfs, we check that the file isn't too big to be opened by this kernel. + * No other open action is required for regular files. Devices are handled + * through the specfs file system, pipes through fifofs. Device and + * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively, + * when a new vnode is first looked up or created. + */ +STATIC int +xfs_open( + bhv_desc_t *bdp, + cred_t *credp) +{ + int mode; + vnode_t *vp; + xfs_inode_t *ip; + + vp = BHV_TO_VNODE(bdp); + ip = XFS_BHVTOI(bdp); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return XFS_ERROR(EIO); + + /* + * If it's a directory with any blocks, read-ahead block 0 + * as we're almost certain to have the next operation be a read there. + */ + if (vp->v_type == VDIR && ip->i_d.di_nextents > 0) { + mode = xfs_ilock_map_shared(ip); + if (ip->i_d.di_nextents > 0) + (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); + xfs_iunlock(ip, mode); + } + return 0; +} + + +/* + * xfs_getattr + */ +STATIC int +xfs_getattr( + bhv_desc_t *bdp, + vattr_t *vap, + int flags, + cred_t *credp) +{ + xfs_inode_t *ip; + xfs_mount_t *mp; + vnode_t *vp; + + vp = BHV_TO_VNODE(bdp); + vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + + ip = XFS_BHVTOI(bdp); + mp = ip->i_mount; + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + if (!(flags & ATTR_LAZY)) + xfs_ilock(ip, XFS_ILOCK_SHARED); + + vap->va_size = ip->i_d.di_size; + if (vap->va_mask == XFS_AT_SIZE) + goto all_done; + + vap->va_nblocks = + XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); + vap->va_nodeid = ip->i_ino; +#if XFS_BIG_INUMS + vap->va_nodeid += mp->m_inoadd; +#endif + vap->va_nlink = ip->i_d.di_nlink; + + /* + * Quick exit for non-stat callers + */ + if ((vap->va_mask & + ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID| + XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0) + goto all_done; + + /* + * Copy from in-core inode. + */ + vap->va_type = vp->v_type; + vap->va_mode = ip->i_d.di_mode & MODEMASK; + vap->va_uid = ip->i_d.di_uid; + vap->va_gid = ip->i_d.di_gid; + vap->va_projid = ip->i_d.di_projid; + + /* + * Check vnode type block/char vs. everything else. + * Do it with bitmask because that's faster than looking + * for multiple values individually. + */ + if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) { + vap->va_rdev = 0; + + if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { + +#if 0 + /* Large block sizes confuse various + * user space programs, so letting the + * stripe size through is not a good + * idea for now. + */ + vap->va_blocksize = mp->m_swidth ? + /* + * If the underlying volume is a stripe, then + * return the stripe width in bytes as the + * recommended I/O size. + */ + (mp->m_swidth << mp->m_sb.sb_blocklog) : + /* + * Return the largest of the preferred buffer + * sizes since doing small I/Os into larger + * buffers causes buffers to be decommissioned. + * The value returned is in bytes. + */ + (1 << (int)MAX(mp->m_readio_log, + mp->m_writeio_log)); + +#else + vap->va_blocksize = + /* + * Return the largest of the preferred buffer + * sizes since doing small I/Os into larger + * buffers causes buffers to be decommissioned. + * The value returned is in bytes. + */ + 1 << (int)MAX(mp->m_readio_log, + mp->m_writeio_log); +#endif + } else { + + /* + * If the file blocks are being allocated from a + * realtime partition, then return the inode's + * realtime extent size or the realtime volume's + * extent size. + */ + vap->va_blocksize = ip->i_d.di_extsize ? + (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) : + (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog); + } + } else { + vap->va_rdev = ip->i_df.if_u2.if_rdev; + vap->va_blocksize = BLKDEV_IOSIZE; + } + + vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec; + vap->va_atime.tv_nsec = ip->i_d.di_atime.t_nsec; + vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec; + vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; + vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec; + vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; + + /* + * Exit for stat callers. See if any of the rest of the fields + * to be filled in are needed. + */ + if ((vap->va_mask & + (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS| + XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0) + goto all_done; + + /* + * Convert di_flags to xflags. + */ + vap->va_xflags = xfs_ip2xflags(ip); + + /* + * Exit for inode revalidate. See if any of the rest of + * the fields to be filled in are needed. + */ + if ((vap->va_mask & + (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS| + XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0) + goto all_done; + + vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog; + vap->va_nextents = + (ip->i_df.if_flags & XFS_IFEXTENTS) ? + ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) : + ip->i_d.di_nextents; + if (ip->i_afp) + vap->va_anextents = + (ip->i_afp->if_flags & XFS_IFEXTENTS) ? + ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) : + ip->i_d.di_anextents; + else + vap->va_anextents = 0; + vap->va_gen = ip->i_d.di_gen; + + all_done: + if (!(flags & ATTR_LAZY)) + xfs_iunlock(ip, XFS_ILOCK_SHARED); + return 0; +} + + +/* + * xfs_setattr + */ +int +xfs_setattr( + bhv_desc_t *bdp, + vattr_t *vap, + int flags, + cred_t *credp) +{ + xfs_inode_t *ip; + xfs_trans_t *tp; + xfs_mount_t *mp; + int mask; + int code; + uint lock_flags; + uint commit_flags=0; + uid_t uid=0, iuid=0; + gid_t gid=0, igid=0; + int timeflags = 0; + vnode_t *vp; + xfs_prid_t projid=0, iprojid=0; + int mandlock_before, mandlock_after; + struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; + int file_owner; + int need_iolock = (flags & ATTR_DMI) == 0; + + vp = BHV_TO_VNODE(bdp); + vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + + if (vp->v_vfsp->vfs_flag & VFS_RDONLY) + return XFS_ERROR(EROFS); + + /* + * Cannot set certain attributes. + */ + mask = vap->va_mask; + if (mask & XFS_AT_NOSET) { + return XFS_ERROR(EINVAL); + } + + ip = XFS_BHVTOI(bdp); + mp = ip->i_mount; + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + /* + * Timestamps do not need to be logged and hence do not + * need to be done within a transaction. + */ + if (mask & XFS_AT_UPDTIMES) { + ASSERT((mask & ~XFS_AT_UPDTIMES) == 0); + timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) | + ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) | + ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0); + xfs_ichgtime(ip, timeflags); + return 0; + } + + olddquot1 = olddquot2 = NULL; + udqp = gdqp = NULL; + + /* + * If disk quotas is on, we make sure that the dquots do exist on disk, + * before we start any other transactions. Trying to do this later + * is messy. We don't care to take a readlock to look at the ids + * in inode here, because we can't hold it across the trans_reserve. + * If the IDs do change before we take the ilock, we're covered + * because the i_*dquot fields will get updated anyway. + */ + if (XFS_IS_QUOTA_ON(mp) && (mask & (XFS_AT_UID|XFS_AT_GID))) { + uint qflags = 0; + + if (mask & XFS_AT_UID) { + uid = vap->va_uid; + qflags |= XFS_QMOPT_UQUOTA; + } else { + uid = ip->i_d.di_uid; + } + if (mask & XFS_AT_GID) { + gid = vap->va_gid; + qflags |= XFS_QMOPT_GQUOTA; + } else { + gid = ip->i_d.di_gid; + } + /* + * We take a reference when we initialize udqp and gdqp, + * so it is important that we never blindly double trip on + * the same variable. See xfs_create() for an example. + */ + ASSERT(udqp == NULL); + ASSERT(gdqp == NULL); + code = XFS_QM_DQVOPALLOC(mp, ip, uid,gid, qflags, &udqp, &gdqp); + if (code) + return (code); + } + + /* + * For the other attributes, we acquire the inode lock and + * first do an error checking pass. + */ + tp = NULL; + lock_flags = XFS_ILOCK_EXCL; + if (!(mask & XFS_AT_SIZE)) { + if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) || + (mp->m_flags & XFS_MOUNT_WSYNC)) { + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); + commit_flags = 0; + if ((code = xfs_trans_reserve(tp, 0, + XFS_ICHANGE_LOG_RES(mp), 0, + 0, 0))) { + lock_flags = 0; + goto error_return; + } + } + } else { + if (DM_EVENT_ENABLED (vp->v_vfsp, ip, DM_EVENT_TRUNCATE) && + !(flags & ATTR_DMI)) { + int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; + code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, vp, + vap->va_size, 0, dmflags, NULL); + if (code) { + lock_flags = 0; + goto error_return; + } + } + if (need_iolock) + lock_flags |= XFS_IOLOCK_EXCL; + } + + xfs_ilock(ip, lock_flags); + + /* boolean: are we the file owner? */ + file_owner = (current_fsuid(credp) == ip->i_d.di_uid); + + /* + * Change various properties of a file. + * Only the owner or users with CAP_FOWNER + * capability may do these things. + */ + if (mask & + (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID| + XFS_AT_GID|XFS_AT_PROJID)) { + /* + * CAP_FOWNER overrides the following restrictions: + * + * The user ID of the calling process must be equal + * to the file owner ID, except in cases where the + * CAP_FSETID capability is applicable. + */ + if (!file_owner && !capable(CAP_FOWNER)) { + code = XFS_ERROR(EPERM); + goto error_return; + } + + /* + * CAP_FSETID overrides the following restrictions: + * + * The effective user ID of the calling process shall match + * the file owner when setting the set-user-ID and + * set-group-ID bits on that file. + * + * The effective group ID or one of the supplementary group + * IDs of the calling process shall match the group owner of + * the file when setting the set-group-ID bit on that file + */ + if (mask & XFS_AT_MODE) { + mode_t m = 0; + + if ((vap->va_mode & S_ISUID) && !file_owner) + m |= S_ISUID; + if ((vap->va_mode & S_ISGID) && + !in_group_p((gid_t)ip->i_d.di_gid)) + m |= S_ISGID; +#if 0 + /* Linux allows this, Irix doesn't. */ + if ((vap->va_mode & S_ISVTX) && vp->v_type != VDIR) + m |= S_ISVTX; +#endif + if (m && !capable(CAP_FSETID)) + vap->va_mode &= ~m; + } + } + + /* + * Change file ownership. Must be the owner or privileged. + * If the system was configured with the "restricted_chown" + * option, the owner is not permitted to give away the file, + * and can change the group id only to a group of which he + * or she is a member. + */ + if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + /* + * These IDs could have changed since we last looked at them. + * But, we're assured that if the ownership did change + * while we didn't have the inode locked, inode's dquot(s) + * would have changed also. + */ + iuid = ip->i_d.di_uid; + iprojid = ip->i_d.di_projid; + igid = ip->i_d.di_gid; + gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; + uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; + projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid : + iprojid; + + /* + * CAP_CHOWN overrides the following restrictions: + * + * If _POSIX_CHOWN_RESTRICTED is defined, this capability + * shall override the restriction that a process cannot + * change the user ID of a file it owns and the restriction + * that the group ID supplied to the chown() function + * shall be equal to either the group ID or one of the + * supplementary group IDs of the calling process. + * + * XXX: How does restricted_chown affect projid? + */ + if (restricted_chown && + (iuid != uid || (igid != gid && + !in_group_p((gid_t)gid))) && + !capable(CAP_CHOWN)) { + code = XFS_ERROR(EPERM); + goto error_return; + } + /* + * Do a quota reservation only if uid or gid is actually + * going to change. + */ + if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || + (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { + ASSERT(tp); + code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, + capable(CAP_FOWNER) ? + XFS_QMOPT_FORCE_RES : 0); + if (code) /* out of quota */ + goto error_return; + } + } + + /* + * Truncate file. Must have write permission and not be a directory. + */ + if (mask & XFS_AT_SIZE) { + /* Short circuit the truncate case for zero length files */ + if ((vap->va_size == 0) && + (ip->i_d.di_size == 0) && (ip->i_d.di_nextents == 0)) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + lock_flags &= ~XFS_ILOCK_EXCL; + if (mask & XFS_AT_CTIME) + xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + code = 0; + goto error_return; + } + + if (vp->v_type == VDIR) { + code = XFS_ERROR(EISDIR); + goto error_return; + } else if (vp->v_type != VREG) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + /* + * Make sure that the dquots are attached to the inode. + */ + if ((code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED))) + goto error_return; + } + + /* + * Change file access or modified times. + */ + if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { + if (!file_owner) { + if ((flags & ATTR_UTIME) && + !capable(CAP_FOWNER)) { + code = XFS_ERROR(EPERM); + goto error_return; + } + } + } + + /* + * Change extent size or realtime flag. + */ + if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { + /* + * Can't change extent size if any extents are allocated. + */ + if ((ip->i_d.di_nextents || ip->i_delayed_blks) && + (mask & XFS_AT_EXTSIZE) && + ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != + vap->va_extsize) ) { + code = XFS_ERROR(EINVAL); /* EFBIG? */ + goto error_return; + } + + /* + * Can't set extent size unless the file is marked, or + * about to be marked as a realtime file. + * + * This check will be removed when fixed size extents + * with buffered data writes is implemented. + * + */ + if ((mask & XFS_AT_EXTSIZE) && + ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != + vap->va_extsize) && + (!((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) || + ((mask & XFS_AT_XFLAGS) && + (vap->va_xflags & XFS_XFLAG_REALTIME))))) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + + /* + * Can't change realtime flag if any extents are allocated. + */ + if (ip->i_d.di_nextents && (mask & XFS_AT_XFLAGS) && + (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != + (vap->va_xflags & XFS_XFLAG_REALTIME)) { + code = XFS_ERROR(EINVAL); /* EFBIG? */ + goto error_return; + } + /* + * Extent size must be a multiple of the appropriate block + * size, if set at all. + */ + if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) { + xfs_extlen_t size; + + if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) || + ((mask & XFS_AT_XFLAGS) && + (vap->va_xflags & XFS_XFLAG_REALTIME))) { + size = mp->m_sb.sb_rextsize << + mp->m_sb.sb_blocklog; + } else { + size = mp->m_sb.sb_blocksize; + } + if (vap->va_extsize % size) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + } + /* + * If realtime flag is set then must have realtime data. + */ + if ((mask & XFS_AT_XFLAGS) && + (vap->va_xflags & XFS_XFLAG_REALTIME)) { + if ((mp->m_sb.sb_rblocks == 0) || + (mp->m_sb.sb_rextsize == 0) || + (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + } + + /* + * Can't modify an immutable/append-only file unless + * we have appropriate permission. + */ + if ((mask & XFS_AT_XFLAGS) && + (ip->i_d.di_flags & + (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || + (vap->va_xflags & + (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && + !capable(CAP_LINUX_IMMUTABLE)) { + code = XFS_ERROR(EPERM); + goto error_return; + } + } + + /* + * Now we can make the changes. Before we join the inode + * to the transaction, if XFS_AT_SIZE is set then take care of + * the part of the truncation that must be done without the + * inode lock. This needs to be done before joining the inode + * to the transaction, because the inode cannot be unlocked + * once it is a part of the transaction. + */ + if (mask & XFS_AT_SIZE) { + code = 0; + if (vap->va_size > ip->i_d.di_size) + code = xfs_igrow_start(ip, vap->va_size, credp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + if (!code) + code = xfs_itruncate_data(ip, vap->va_size); + if (code) { + ASSERT(tp == NULL); + lock_flags &= ~XFS_ILOCK_EXCL; + ASSERT(lock_flags == XFS_IOLOCK_EXCL); + goto error_return; + } + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); + if ((code = xfs_trans_reserve(tp, 0, + XFS_ITRUNCATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_ITRUNCATE_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + if (need_iolock) + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return code; + } + commit_flags = XFS_TRANS_RELEASE_LOG_RES; + xfs_ilock(ip, XFS_ILOCK_EXCL); + } + + if (tp) { + xfs_trans_ijoin(tp, ip, lock_flags); + xfs_trans_ihold(tp, ip); + } + + /* determine whether mandatory locking mode changes */ + mandlock_before = MANDLOCK(vp, ip->i_d.di_mode); + + /* + * Truncate file. Must have write permission and not be a directory. + */ + if (mask & XFS_AT_SIZE) { + if (vap->va_size > ip->i_d.di_size) { + xfs_igrow_finish(tp, ip, vap->va_size, + !(flags & ATTR_DMI)); + } else if ((vap->va_size <= ip->i_d.di_size) || + ((vap->va_size == 0) && ip->i_d.di_nextents)) { + /* + * signal a sync transaction unless + * we're truncating an already unlinked + * file on a wsync filesystem + */ + code = xfs_itruncate_finish(&tp, ip, + (xfs_fsize_t)vap->va_size, + XFS_DATA_FORK, + ((ip->i_d.di_nlink != 0 || + !(mp->m_flags & XFS_MOUNT_WSYNC)) + ? 1 : 0)); + if (code) { + goto abort_return; + } + } + /* + * Have to do this even if the file's size doesn't change. + */ + timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; + } + + /* + * Change file access modes. + */ + if (mask & XFS_AT_MODE) { + ip->i_d.di_mode &= S_IFMT; + ip->i_d.di_mode |= vap->va_mode & ~S_IFMT; + + xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); + timeflags |= XFS_ICHGTIME_CHG; + } + + /* + * Change file ownership. Must be the owner or privileged. + * If the system was configured with the "restricted_chown" + * option, the owner is not permitted to give away the file, + * and can change the group id only to a group of which he + * or she is a member. + */ + if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + /* + * CAP_FSETID overrides the following restrictions: + * + * The set-user-ID and set-group-ID bits of a file will be + * cleared upon successful return from chown() + */ + if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && + !capable(CAP_FSETID)) { + ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); + } + + /* + * Change the ownerships and register quota modifications + * in the transaction. + */ + if (iuid != uid) { + if (XFS_IS_UQUOTA_ON(mp)) { + ASSERT(mask & XFS_AT_UID); + ASSERT(udqp); + olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, + &ip->i_udquot, udqp); + } + ip->i_d.di_uid = uid; + } + if (igid != gid) { + if (XFS_IS_GQUOTA_ON(mp)) { + ASSERT(mask & XFS_AT_GID); + ASSERT(gdqp); + olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, + &ip->i_gdquot, gdqp); + } + ip->i_d.di_gid = gid; + } + if (iprojid != projid) { + ip->i_d.di_projid = projid; + /* + * We may have to rev the inode as well as + * the superblock version number since projids didn't + * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. + */ + if (ip->i_d.di_version == XFS_DINODE_VERSION_1) + xfs_bump_ino_vers2(tp, ip); + } + + xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); + timeflags |= XFS_ICHGTIME_CHG; + } + + + /* + * Change file access or modified times. + */ + if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { + if (mask & XFS_AT_ATIME) { + ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec; + ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec; + ip->i_update_core = 1; + timeflags &= ~XFS_ICHGTIME_ACC; + } + if (mask & XFS_AT_MTIME) { + ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec; + timeflags &= ~XFS_ICHGTIME_MOD; + timeflags |= XFS_ICHGTIME_CHG; + } + if (tp && (flags & ATTR_UTIME)) + xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); + } + + /* + * Change XFS-added attributes. + */ + if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { + if (mask & XFS_AT_EXTSIZE) { + /* + * Converting bytes to fs blocks. + */ + ip->i_d.di_extsize = vap->va_extsize >> + mp->m_sb.sb_blocklog; + } + if (mask & XFS_AT_XFLAGS) { + uint di_flags; + + /* can't set PREALLOC this way, just preserve it */ + di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); + if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) + di_flags |= XFS_DIFLAG_IMMUTABLE; + if (vap->va_xflags & XFS_XFLAG_APPEND) + di_flags |= XFS_DIFLAG_APPEND; + if (vap->va_xflags & XFS_XFLAG_SYNC) + di_flags |= XFS_DIFLAG_SYNC; + if (vap->va_xflags & XFS_XFLAG_NOATIME) + di_flags |= XFS_DIFLAG_NOATIME; + if (vap->va_xflags & XFS_XFLAG_NODUMP) + di_flags |= XFS_DIFLAG_NODUMP; + if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { + if (vap->va_xflags & XFS_XFLAG_RTINHERIT) + di_flags |= XFS_DIFLAG_RTINHERIT; + if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) + di_flags |= XFS_DIFLAG_NOSYMLINKS; + } else { + if (vap->va_xflags & XFS_XFLAG_REALTIME) { + di_flags |= XFS_DIFLAG_REALTIME; + ip->i_iocore.io_flags |= XFS_IOCORE_RT; + } else { + ip->i_iocore.io_flags &= ~XFS_IOCORE_RT; + } + } + ip->i_d.di_flags = di_flags; + } + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + timeflags |= XFS_ICHGTIME_CHG; + } + + /* + * Change file inode change time only if XFS_AT_CTIME set + * AND we have been called by a DMI function. + */ + + if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) { + ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec; + ip->i_update_core = 1; + timeflags &= ~XFS_ICHGTIME_CHG; + } + + /* + * Send out timestamp changes that need to be set to the + * current time. Not done when called by a DMI function. + */ + if (timeflags && !(flags & ATTR_DMI)) + xfs_ichgtime(ip, timeflags); + + XFS_STATS_INC(xs_ig_attrchg); + + /* + * If this is a synchronous mount, make sure that the + * transaction goes to disk before returning to the user. + * This is slightly sub-optimal in that truncates require + * two sync transactions instead of one for wsync filesytems. + * One for the truncate and one for the timestamps since we + * don't want to change the timestamps unless we're sure the + * truncate worked. Truncates are less than 1% of the laddis + * mix so this probably isn't worth the trouble to optimize. + */ + code = 0; + if (tp) { + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + + code = xfs_trans_commit(tp, commit_flags, NULL); + } + + /* + * If the (regular) file's mandatory locking mode changed, then + * notify the vnode. We do this under the inode lock to prevent + * racing calls to vop_vnode_change. + */ + mandlock_after = MANDLOCK(vp, ip->i_d.di_mode); + if (mandlock_before != mandlock_after) { + VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING, + mandlock_after); + } + + xfs_iunlock(ip, lock_flags); + + /* + * Release any dquot(s) the inode had kept before chown. + */ + XFS_QM_DQRELE(mp, olddquot1); + XFS_QM_DQRELE(mp, olddquot2); + XFS_QM_DQRELE(mp, udqp); + XFS_QM_DQRELE(mp, gdqp); + + if (code) { + return code; + } + + if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_ATTRIBUTE) && + !(flags & ATTR_DMI)) { + (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, vp, DM_RIGHT_NULL, + NULL, DM_RIGHT_NULL, NULL, NULL, + 0, 0, AT_DELAY_FLAG(flags)); + } + return 0; + + abort_return: + commit_flags |= XFS_TRANS_ABORT; + /* FALLTHROUGH */ + error_return: + XFS_QM_DQRELE(mp, udqp); + XFS_QM_DQRELE(mp, gdqp); + if (tp) { + xfs_trans_cancel(tp, commit_flags); + } + if (lock_flags != 0) { + xfs_iunlock(ip, lock_flags); + } + return code; +} + + +/* + * xfs_access + * Null conversion from vnode mode bits to inode mode bits, as in efs. + */ +STATIC int +xfs_access( + bhv_desc_t *bdp, + int mode, + cred_t *credp) +{ + xfs_inode_t *ip; + int error; + + vn_trace_entry(BHV_TO_VNODE(bdp), __FUNCTION__, + (inst_t *)__return_address); + + ip = XFS_BHVTOI(bdp); + xfs_ilock(ip, XFS_ILOCK_SHARED); + error = xfs_iaccess(ip, mode, credp); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + return error; +} + + +/* + * xfs_readlink + * + */ +STATIC int +xfs_readlink( + bhv_desc_t *bdp, + uio_t *uiop, + int ioflags, + cred_t *credp) +{ + xfs_inode_t *ip; + int count; + xfs_off_t offset; + int pathlen; + vnode_t *vp; + int error = 0; + xfs_mount_t *mp; + int nmaps; + xfs_bmbt_irec_t mval[SYMLINK_MAPS]; + xfs_daddr_t d; + int byte_cnt; + int n; + xfs_buf_t *bp; + + vp = BHV_TO_VNODE(bdp); + vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); + + ip = XFS_BHVTOI(bdp); + mp = ip->i_mount; + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + xfs_ilock(ip, XFS_ILOCK_SHARED); + + ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFLNK); + + offset = uiop->uio_offset; + count = uiop->uio_resid; + + if (offset < 0) { + error = XFS_ERROR(EINVAL); + goto error_return; + } + if (count <= 0) { + error = 0; + goto error_return; + } + + if (!(ioflags & IO_INVIS)) { + xfs_ichgtime(ip, XFS_ICHGTIME_ACC); + } + + /* + * See if the symlink is stored inline. + */ + pathlen = (int)ip->i_d.di_size; + + if (ip->i_df.if_flags & XFS_IFINLINE) { + error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop); + } + else { + /* + * Symlink not inline. Call bmap to get it in. + */ + nmaps = SYMLINK_MAPS; + + error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), + 0, NULL, 0, mval, &nmaps, NULL); + + if (error) { + goto error_return; + } + + for (n = 0; n < nmaps; n++) { + d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); + byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); + bp = xfs_buf_read(mp->m_ddev_targp, d, + BTOBB(byte_cnt), 0); + error = XFS_BUF_GETERROR(bp); + if (error) { + xfs_ioerror_alert("xfs_readlink", + ip->i_mount, bp, XFS_BUF_ADDR(bp)); + xfs_buf_relse(bp); + goto error_return; + } + if (pathlen < byte_cnt) + byte_cnt = pathlen; + pathlen -= byte_cnt; + + error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop); + xfs_buf_relse (bp); + } + + } + + +error_return: + + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + return error; +} + + +/* + * xfs_fsync + * + * This is called to sync the inode and its data out to disk. + * We need to hold the I/O lock while flushing the data, and + * the inode lock while flushing the inode. The inode lock CANNOT + * be held while flushing the data, so acquire after we're done + * with that. + */ +STATIC int +xfs_fsync( + bhv_desc_t *bdp, + int flag, + cred_t *credp, + xfs_off_t start, + xfs_off_t stop) +{ + xfs_inode_t *ip; + xfs_trans_t *tp; + int error; + + vn_trace_entry(BHV_TO_VNODE(bdp), + __FUNCTION__, (inst_t *)__return_address); + + ip = XFS_BHVTOI(bdp); + + ASSERT(start >= 0 && stop >= -1); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return XFS_ERROR(EIO); + + /* + * We always need to make sure that the required inode state + * is safe on disk. The vnode might be clean but because + * of committed transactions that haven't hit the disk yet. + * Likewise, there could be unflushed non-transactional + * changes to the inode core that have to go to disk. + * + * The following code depends on one assumption: that + * any transaction that changes an inode logs the core + * because it has to change some field in the inode core + * (typically nextents or nblocks). That assumption + * implies that any transactions against an inode will + * catch any non-transactional updates. If inode-altering + * transactions exist that violate this assumption, the + * code breaks. Right now, it figures that if the involved + * update_* field is clear and the inode is unpinned, the + * inode is clean. Either it's been flushed or it's been + * committed and the commit has hit the disk unpinning the inode. + * (Note that xfs_inode_item_format() called at commit clears + * the update_* fields.) + */ + xfs_ilock(ip, XFS_ILOCK_SHARED); + + /* If we are flushing data then we care about update_size + * being set, otherwise we care about update_core + */ + if ((flag & FSYNC_DATA) ? + (ip->i_update_size == 0) : + (ip->i_update_core == 0)) { + /* + * Timestamps/size haven't changed since last inode + * flush or inode transaction commit. That means + * either nothing got written or a transaction + * committed which caught the updates. If the + * latter happened and the transaction hasn't + * hit the disk yet, the inode will be still + * be pinned. If it is, force the log. + */ + + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + if (xfs_ipincount(ip)) |