diff options
Diffstat (limited to 'fs/jfs')
48 files changed, 4093 insertions, 3621 deletions
diff --git a/fs/jfs/Kconfig b/fs/jfs/Kconfig new file mode 100644 index 00000000000..57cef19951d --- /dev/null +++ b/fs/jfs/Kconfig @@ -0,0 +1,50 @@ +config JFS_FS + tristate "JFS filesystem support" + select NLS + select CRC32 + help + This is a port of IBM's Journaled Filesystem . More information is + available in the file <file:Documentation/filesystems/jfs.txt>. + + If you do not intend to use the JFS filesystem, say N. + +config JFS_POSIX_ACL + bool "JFS POSIX Access Control Lists" + depends on JFS_FS + select FS_POSIX_ACL + help + Posix Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the Posix ACLs for + Linux website <http://acl.bestbits.at/>. + + If you don't know what Access Control Lists are, say N + +config JFS_SECURITY + bool "JFS Security Labels" + depends on JFS_FS + help + Security labels support alternative access control models + implemented by security modules like SELinux. This option + enables an extended attribute handler for file security + labels in the jfs filesystem. + + If you are not using a security module that requires using + extended attributes for file security labels, say N. + +config JFS_DEBUG + bool "JFS debugging" + depends on JFS_FS + help + If you are experiencing any problems with the JFS filesystem, say + Y here. This will result in additional debugging messages to be + written to the system log. Under normal circumstances, this + results in very little overhead. + +config JFS_STATISTICS + bool "JFS statistics" + depends on JFS_FS + help + Enabling this option will cause statistics from the JFS file system + to be made available to the user in the /proc/fs/jfs/ directory. diff --git a/fs/jfs/Makefile b/fs/jfs/Makefile index 6f1e0e95587..d20d4737b3e 100644 --- a/fs/jfs/Makefile +++ b/fs/jfs/Makefile @@ -6,10 +6,11 @@ obj-$(CONFIG_JFS_FS) += jfs.o jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \ jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \ - jfs_unicode.o jfs_dtree.o jfs_inode.o \ + jfs_unicode.o jfs_dtree.o jfs_inode.o jfs_discard.o \ jfs_extent.o symlink.o jfs_metapage.o \ - jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o resize.o xattr.o + jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \ + resize.o xattr.o ioctl.o jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o -EXTRA_CFLAGS += -D_JFS_4K +ccflags-y := -D_JFS_4K diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 461e4934ca7..0c8ca830b11 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -5,53 +5,50 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/sched.h> +#include <linux/slab.h> #include <linux/fs.h> -#include <linux/quotaops.h> #include <linux/posix_acl_xattr.h> #include "jfs_incore.h" #include "jfs_txnmgr.h" #include "jfs_xattr.h" #include "jfs_acl.h" -static struct posix_acl *jfs_get_acl(struct inode *inode, int type) +struct posix_acl *jfs_get_acl(struct inode *inode, int type) { struct posix_acl *acl; char *ea_name; - struct jfs_inode_info *ji = JFS_IP(inode); - struct posix_acl **p_acl; int size; char *value = NULL; + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + switch(type) { case ACL_TYPE_ACCESS: ea_name = POSIX_ACL_XATTR_ACCESS; - p_acl = &ji->i_acl; break; case ACL_TYPE_DEFAULT: ea_name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &ji->i_default_acl; break; default: return ERR_PTR(-EINVAL); } - if (*p_acl != JFS_ACL_NOT_CACHED) - return posix_acl_dup(*p_acl); - size = __jfs_getxattr(inode, ea_name, NULL, 0); if (size > 0) { @@ -62,53 +59,53 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type) } if (size < 0) { - if (size == -ENODATA) { - *p_acl = NULL; + if (size == -ENODATA) acl = NULL; - } else + else acl = ERR_PTR(size); } else { - acl = posix_acl_from_xattr(value, size); - if (!IS_ERR(acl)) - *p_acl = posix_acl_dup(acl); + acl = posix_acl_from_xattr(&init_user_ns, value, size); } kfree(value); + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); return acl; } -static int jfs_set_acl(tid_t tid, struct inode *inode, int type, +static int __jfs_set_acl(tid_t tid, struct inode *inode, int type, struct posix_acl *acl) { char *ea_name; - struct jfs_inode_info *ji = JFS_IP(inode); - struct posix_acl **p_acl; int rc; int size = 0; char *value = NULL; - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - switch(type) { - case ACL_TYPE_ACCESS: - ea_name = POSIX_ACL_XATTR_ACCESS; - p_acl = &ji->i_acl; - break; - case ACL_TYPE_DEFAULT: - ea_name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &ji->i_default_acl; - if (!S_ISDIR(inode->i_mode)) - return acl ? -EACCES : 0; - break; - default: - return -EINVAL; + switch (type) { + case ACL_TYPE_ACCESS: + ea_name = POSIX_ACL_XATTR_ACCESS; + if (acl) { + rc = posix_acl_equiv_mode(acl, &inode->i_mode); + if (rc < 0) + return rc; + inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + if (rc == 0) + acl = NULL; + } + break; + case ACL_TYPE_DEFAULT: + ea_name = POSIX_ACL_XATTR_DEFAULT; + break; + default: + return -EINVAL; } + if (acl) { size = posix_acl_xattr_size(acl->a_count); value = kmalloc(size, GFP_KERNEL); if (!value) return -ENOMEM; - rc = posix_acl_to_xattr(acl, value, size); + rc = posix_acl_to_xattr(&init_user_ns, acl, value, size); if (rc < 0) goto out; } @@ -116,128 +113,49 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type, out: kfree(value); - if (!rc) { - if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED)) - posix_acl_release(*p_acl); - *p_acl = posix_acl_dup(acl); - } - return rc; -} - -static int jfs_check_acl(struct inode *inode, int mask) -{ - struct jfs_inode_info *ji = JFS_IP(inode); - - if (ji->i_acl == JFS_ACL_NOT_CACHED) { - struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - posix_acl_release(acl); - } - - if (ji->i_acl) - return posix_acl_permission(inode, ji->i_acl, mask); - return -EAGAIN; -} - -int jfs_permission(struct inode *inode, int mask, struct nameidata *nd) -{ - return generic_permission(inode, mask, jfs_check_acl); -} - -int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) -{ - struct posix_acl *acl = NULL; - struct posix_acl *clone; - mode_t mode; - int rc = 0; - - if (S_ISLNK(inode->i_mode)) - return 0; - - acl = jfs_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) - return PTR_ERR(acl); - - if (acl) { - if (S_ISDIR(inode->i_mode)) { - rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl); - if (rc) - goto cleanup; - } - clone = posix_acl_clone(acl, GFP_KERNEL); - if (!clone) { - rc = -ENOMEM; - goto cleanup; - } - mode = inode->i_mode; - rc = posix_acl_create_masq(clone, &mode); - if (rc >= 0) { - inode->i_mode = mode; - if (rc > 0) - rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, - clone); - } - posix_acl_release(clone); -cleanup: - posix_acl_release(acl); - } else - inode->i_mode &= ~current->fs->umask; + if (!rc) + set_cached_acl(inode, type, acl); return rc; } -static int jfs_acl_chmod(struct inode *inode) +int jfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { - struct posix_acl *acl, *clone; int rc; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl) || !acl) - return PTR_ERR(acl); - - clone = posix_acl_clone(acl, GFP_KERNEL); - posix_acl_release(acl); - if (!clone) - return -ENOMEM; - - rc = posix_acl_chmod_masq(clone, inode->i_mode); - if (!rc) { - tid_t tid = txBegin(inode->i_sb, 0); - down(&JFS_IP(inode)->commit_sem); - rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, clone); - if (!rc) - rc = txCommit(tid, 1, &inode, 0); - txEnd(tid); - up(&JFS_IP(inode)->commit_sem); - } - - posix_acl_release(clone); + tid_t tid; + + tid = txBegin(inode->i_sb, 0); + mutex_lock(&JFS_IP(inode)->commit_mutex); + rc = __jfs_set_acl(tid, inode, type, acl); + if (!rc) + rc = txCommit(tid, 1, &inode, 0); + txEnd(tid); + mutex_unlock(&JFS_IP(inode)->commit_mutex); return rc; } -int jfs_setattr(struct dentry *dentry, struct iattr *iattr) +int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) { - struct inode *inode = dentry->d_inode; - int rc; + struct posix_acl *default_acl, *acl; + int rc = 0; - rc = inode_change_ok(inode, iattr); + rc = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); if (rc) return rc; - if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { - if (DQUOT_TRANSFER(inode, iattr)) - return -EDQUOT; + if (default_acl) { + rc = __jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, default_acl); + posix_acl_release(default_acl); } - rc = inode_setattr(inode, iattr); + if (acl) { + if (!rc) + rc = __jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl); + posix_acl_release(acl); + } - if (!rc && (iattr->ia_valid & ATTR_MODE)) - rc = jfs_acl_chmod(inode); + JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) | + inode->i_mode; return rc; } diff --git a/fs/jfs/endian24.h b/fs/jfs/endian24.h index ab7cd0567c9..fa92f7f1d0d 100644 --- a/fs/jfs/endian24.h +++ b/fs/jfs/endian24.h @@ -1,5 +1,5 @@ /* - * Copyright (c) International Business Machines Corp., 2001 + * Copyright (C) International Business Machines Corp., 2001 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -29,7 +29,7 @@ __u32 __x = (x); \ ((__u32)( \ ((__x & (__u32)0x000000ffUL) << 16) | \ - (__x & (__u32)0x0000ff00UL) | \ + (__x & (__u32)0x0000ff00UL) | \ ((__x & (__u32)0x00ff0000UL) >> 16) )); \ }) diff --git a/fs/jfs/file.c b/fs/jfs/file.c index c2c19c9ed9a..33aa0cc1f8b 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -4,20 +4,23 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/mm.h> #include <linux/fs.h> +#include <linux/posix_acl.h> +#include <linux/quotaops.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_dmap.h" @@ -26,19 +29,26 @@ #include "jfs_acl.h" #include "jfs_debug.h" -int jfs_fsync(struct file *file, struct dentry *dentry, int datasync) +int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; int rc = 0; + rc = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (rc) + return rc; + + mutex_lock(&inode->i_mutex); if (!(inode->i_state & I_DIRTY) || (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { /* Make sure committed changes hit the disk */ jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); + mutex_unlock(&inode->i_mutex); return rc; } rc |= jfs_commit_inode(inode, 1); + mutex_unlock(&inode->i_mutex); return rc ? -EIO : 0; } @@ -47,7 +57,7 @@ static int jfs_open(struct inode *inode, struct file *file) { int rc; - if ((rc = generic_file_open(inode, file))) + if ((rc = dquot_file_open(inode, file))) return rc; /* @@ -64,9 +74,9 @@ static int jfs_open(struct inode *inode, struct file *file) struct jfs_inode_info *ji = JFS_IP(inode); spin_lock_irq(&ji->ag_lock); if (ji->active_ag == -1) { - ji->active_ag = ji->agno; - atomic_inc( - &JFS_SBI(inode->i_sb)->bmap->db_active[ji->agno]); + struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb); + ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb); + atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]); } spin_unlock_irq(&ji->ag_lock); } @@ -88,29 +98,70 @@ static int jfs_release(struct inode *inode, struct file *file) return 0; } -struct inode_operations jfs_file_inode_operations = { - .truncate = jfs_truncate, +int jfs_setattr(struct dentry *dentry, struct iattr *iattr) +{ + struct inode *inode = dentry->d_inode; + int rc; + + rc = inode_change_ok(inode, iattr); + if (rc) + return rc; + + if (is_quota_modification(inode, iattr)) + dquot_initialize(inode); + if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) || + (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) { + rc = dquot_transfer(inode, iattr); + if (rc) + return rc; + } + + if ((iattr->ia_valid & ATTR_SIZE) && + iattr->ia_size != i_size_read(inode)) { + inode_dio_wait(inode); + + rc = inode_newsize_ok(inode, iattr->ia_size); + if (rc) + return rc; + + truncate_setsize(inode, iattr->ia_size); + jfs_truncate(inode); + } + + setattr_copy(inode, iattr); + mark_inode_dirty(inode); + + if (iattr->ia_valid & ATTR_MODE) + rc = posix_acl_chmod(inode, inode->i_mode); + return rc; +} + +const struct inode_operations jfs_file_inode_operations = { .setxattr = jfs_setxattr, .getxattr = jfs_getxattr, .listxattr = jfs_listxattr, .removexattr = jfs_removexattr, -#ifdef CONFIG_JFS_POSIX_ACL .setattr = jfs_setattr, - .permission = jfs_permission, +#ifdef CONFIG_JFS_POSIX_ACL + .get_acl = jfs_get_acl, + .set_acl = jfs_set_acl, #endif }; -struct file_operations jfs_file_operations = { +const struct file_operations jfs_file_operations = { .open = jfs_open, .llseek = generic_file_llseek, - .write = generic_file_write, - .read = generic_file_read, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, - .readv = generic_file_readv, - .writev = generic_file_writev, - .sendfile = generic_file_sendfile, + .splice_read = generic_file_splice_read, + .splice_write = iter_file_splice_write, .fsync = jfs_fsync, .release = jfs_release, + .unlocked_ioctl = jfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = jfs_compat_ioctl, +#endif }; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 9f942ca8e4e..bd3df1ca3c9 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -4,16 +4,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -22,6 +22,8 @@ #include <linux/buffer_head.h> #include <linux/pagemap.h> #include <linux/quotaops.h> +#include <linux/writeback.h> +#include <linux/aio.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -31,11 +33,21 @@ #include "jfs_debug.h" -void jfs_read_inode(struct inode *inode) +struct inode *jfs_iget(struct super_block *sb, unsigned long ino) { - if (diRead(inode)) { - make_bad_inode(inode); - return; + struct inode *inode; + int ret; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ret = diRead(inode); + if (ret < 0) { + iget_failed(inode); + return ERR_PTR(ret); } if (S_ISREG(inode->i_mode)) { @@ -49,12 +61,20 @@ void jfs_read_inode(struct inode *inode) if (inode->i_size >= IDATASIZE) { inode->i_op = &page_symlink_inode_operations; inode->i_mapping->a_ops = &jfs_aops; - } else - inode->i_op = &jfs_symlink_inode_operations; + } else { + inode->i_op = &jfs_fast_symlink_inode_operations; + /* + * The inline data should be null-terminated, but + * don't let on-disk corruption crash the kernel + */ + JFS_IP(inode)->i_inline[inode->i_size] = '\0'; + } } else { inode->i_op = &jfs_file_inode_operations; init_special_inode(inode, inode->i_mode, inode->i_rdev); } + unlock_new_inode(inode); + return inode; } /* @@ -89,22 +109,24 @@ int jfs_commit_inode(struct inode *inode, int wait) } tid = txBegin(inode->i_sb, COMMIT_INODE); - down(&JFS_IP(inode)->commit_sem); + mutex_lock(&JFS_IP(inode)->commit_mutex); /* - * Retest inode state after taking commit_sem + * Retest inode state after taking commit_mutex */ if (inode->i_nlink && test_cflag(COMMIT_Dirty, inode)) rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0); txEnd(tid); - up(&JFS_IP(inode)->commit_sem); + mutex_unlock(&JFS_IP(inode)->commit_mutex); return rc; } -int jfs_write_inode(struct inode *inode, int wait) +int jfs_write_inode(struct inode *inode, struct writeback_control *wbc) { - if (test_cflag(COMMIT_Nolink, inode)) + int wait = wbc->sync_mode == WB_SYNC_ALL; + + if (inode->i_nlink == 0) return 0; /* * If COMMIT_DIRTY is not set, the inode isn't really dirty. @@ -124,31 +146,35 @@ int jfs_write_inode(struct inode *inode, int wait) return 0; } -void jfs_delete_inode(struct inode *inode) +void jfs_evict_inode(struct inode *inode) { - jfs_info("In jfs_delete_inode, inode = 0x%p", inode); + jfs_info("In jfs_evict_inode, inode = 0x%p", inode); - if (!is_bad_inode(inode) && - (JFS_IP(inode)->fileset == FILESYSTEM_I)) { - truncate_inode_pages(&inode->i_data, 0); + if (!inode->i_nlink && !is_bad_inode(inode)) { + dquot_initialize(inode); - if (test_cflag(COMMIT_Freewmap, inode)) - jfs_free_zero_link(inode); + if (JFS_IP(inode)->fileset == FILESYSTEM_I) { + truncate_inode_pages_final(&inode->i_data); - diFree(inode); + if (test_cflag(COMMIT_Freewmap, inode)) + jfs_free_zero_link(inode); - /* - * Free the inode from the quota allocation. - */ - DQUOT_INIT(inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - } + diFree(inode); + /* + * Free the inode from the quota allocation. + */ + dquot_initialize(inode); + dquot_free_inode(inode); + } + } else { + truncate_inode_pages_final(&inode->i_data); + } clear_inode(inode); + dquot_drop(inode); } -void jfs_dirty_inode(struct inode *inode) +void jfs_dirty_inode(struct inode *inode, int flags) { static int noisy = 5; @@ -167,27 +193,26 @@ void jfs_dirty_inode(struct inode *inode) set_cflag(COMMIT_Dirty, inode); } -static int -jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, - struct buffer_head *bh_result, int create) +int jfs_get_block(struct inode *ip, sector_t lblock, + struct buffer_head *bh_result, int create) { s64 lblock64 = lblock; int rc = 0; xad_t xad; s64 xaddr; int xflag; - s32 xlen = max_blocks; + s32 xlen = bh_result->b_size >> ip->i_blkbits; /* * Take appropriate lock on inode */ if (create) - IWRITE_LOCK(ip); + IWRITE_LOCK(ip, RDWRLOCK_NORMAL); else - IREAD_LOCK(ip); + IREAD_LOCK(ip, RDWRLOCK_NORMAL); if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) && - (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) && + (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) && xaddr) { if (xflag & XAD_NOTRECORDED) { if (!create) @@ -227,7 +252,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, #ifdef _JFS_4K if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad))) goto unlock; - rc = extAlloc(ip, xlen, lblock64, &xad, FALSE); + rc = extAlloc(ip, xlen, lblock64, &xad, false); if (rc) goto unlock; @@ -254,15 +279,9 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks, return rc; } -static int jfs_get_block(struct inode *ip, sector_t lblock, - struct buffer_head *bh_result, int create) -{ - return jfs_get_blocks(ip, lblock, 1, bh_result, create); -} - static int jfs_writepage(struct page *page, struct writeback_control *wbc) { - return nobh_writepage(page, jfs_get_block, wbc); + return block_write_full_page(page, jfs_get_block, wbc); } static int jfs_writepages(struct address_space *mapping, @@ -282,10 +301,28 @@ static int jfs_readpages(struct file *file, struct address_space *mapping, return mpage_readpages(mapping, pages, nr_pages, jfs_get_block); } -static int jfs_prepare_write(struct file *file, - struct page *page, unsigned from, unsigned to) +static void jfs_write_failed(struct address_space *mapping, loff_t to) { - return nobh_prepare_write(page, from, to, jfs_get_block); + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, inode->i_size); + jfs_truncate(inode); + } +} + +static int jfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + int ret; + + ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata, + jfs_get_block); + if (unlikely(ret)) + jfs_write_failed(mapping, pos + len); + + return ret; } static sector_t jfs_bmap(struct address_space *mapping, sector_t block) @@ -294,23 +331,38 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block) } static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; struct inode *inode = file->f_mapping->host; + size_t count = iov_iter_count(iter); + ssize_t ret; + + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block); + + /* + * In case of error extending write may have instantiated a few + * blocks outside i_size. Trim these off again. + */ + if (unlikely((rw & WRITE) && ret < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + count; + + if (end > isize) + jfs_write_failed(mapping, end); + } - return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, jfs_get_blocks, NULL); + return ret; } -struct address_space_operations jfs_aops = { +const struct address_space_operations jfs_aops = { .readpage = jfs_readpage, .readpages = jfs_readpages, .writepage = jfs_writepage, .writepages = jfs_writepages, - .sync_page = block_sync_page, - .prepare_write = jfs_prepare_write, - .commit_write = nobh_commit_write, + .write_begin = jfs_write_begin, + .write_end = nobh_write_end, .bmap = jfs_bmap, .direct_IO = jfs_direct_IO, }; @@ -335,18 +387,18 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length) tid = txBegin(ip->i_sb, 0); /* - * The commit_sem cannot be taken before txBegin. + * The commit_mutex cannot be taken before txBegin. * txBegin may block and there is a chance the inode * could be marked dirty and need to be committed * before txBegin unblocks */ - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(ip)->commit_mutex); newsize = xtTruncate(tid, ip, length, COMMIT_TRUNCATE | COMMIT_PWMAP); if (newsize < 0) { txEnd(tid); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); break; } @@ -355,7 +407,7 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length) txCommit(tid, 1, &ip, 0); txEnd(tid); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); } while (newsize > length); /* Truncate isn't always atomic */ } @@ -363,9 +415,9 @@ void jfs_truncate(struct inode *ip) { jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size); - nobh_truncate_page(ip->i_mapping, ip->i_size); + nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block); - IWRITE_LOCK(ip); + IWRITE_LOCK(ip, RDWRLOCK_NORMAL); jfs_truncate_nolock(ip, ip->i_size); IWRITE_UNLOCK(ip); } diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c new file mode 100644 index 00000000000..93a1232894f --- /dev/null +++ b/fs/jfs/ioctl.c @@ -0,0 +1,189 @@ +/* + * linux/fs/jfs/ioctl.c + * + * Copyright (C) 2006 Herbert Poetzl + * adapted from Remy Card's ext2/ioctl.c + */ + +#include <linux/fs.h> +#include <linux/ctype.h> +#include <linux/capability.h> +#include <linux/mount.h> +#include <linux/time.h> +#include <linux/sched.h> +#include <linux/blkdev.h> +#include <asm/current.h> +#include <asm/uaccess.h> + +#include "jfs_filsys.h" +#include "jfs_debug.h" +#include "jfs_incore.h" +#include "jfs_dinode.h" +#include "jfs_inode.h" +#include "jfs_dmap.h" +#include "jfs_discard.h" + +static struct { + long jfs_flag; + long ext2_flag; +} jfs_map[] = { + {JFS_NOATIME_FL, FS_NOATIME_FL}, + {JFS_DIRSYNC_FL, FS_DIRSYNC_FL}, + {JFS_SYNC_FL, FS_SYNC_FL}, + {JFS_SECRM_FL, FS_SECRM_FL}, + {JFS_UNRM_FL, FS_UNRM_FL}, + {JFS_APPEND_FL, FS_APPEND_FL}, + {JFS_IMMUTABLE_FL, FS_IMMUTABLE_FL}, + {0, 0}, +}; + +static long jfs_map_ext2(unsigned long flags, int from) +{ + int index=0; + long mapped=0; + + while (jfs_map[index].jfs_flag) { + if (from) { + if (jfs_map[index].ext2_flag & flags) + mapped |= jfs_map[index].jfs_flag; + } else { + if (jfs_map[index].jfs_flag & flags) + mapped |= jfs_map[index].ext2_flag; + } + index++; + } + return mapped; +} + + +long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct jfs_inode_info *jfs_inode = JFS_IP(inode); + unsigned int flags; + + switch (cmd) { + case JFS_IOC_GETFLAGS: + jfs_get_inode_flags(jfs_inode); + flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE; + flags = jfs_map_ext2(flags, 0); + return put_user(flags, (int __user *) arg); + case JFS_IOC_SETFLAGS: { + unsigned int oldflags; + int err; + + err = mnt_want_write_file(filp); + if (err) + return err; + + if (!inode_owner_or_capable(inode)) { + err = -EACCES; + goto setflags_out; + } + if (get_user(flags, (int __user *) arg)) { + err = -EFAULT; + goto setflags_out; + } + + flags = jfs_map_ext2(flags, 1); + if (!S_ISDIR(inode->i_mode)) + flags &= ~JFS_DIRSYNC_FL; + + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) { + err = -EPERM; + goto setflags_out; + } + + /* Lock against other parallel changes of flags */ + mutex_lock(&inode->i_mutex); + + jfs_get_inode_flags(jfs_inode); + oldflags = jfs_inode->mode2; + + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by + * the relevant capability. + */ + if ((oldflags & JFS_IMMUTABLE_FL) || + ((flags ^ oldflags) & + (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { + if (!capable(CAP_LINUX_IMMUTABLE)) { + mutex_unlock(&inode->i_mutex); + err = -EPERM; + goto setflags_out; + } + } + + flags = flags & JFS_FL_USER_MODIFIABLE; + flags |= oldflags & ~JFS_FL_USER_MODIFIABLE; + jfs_inode->mode2 = flags; + + jfs_set_inode_flags(inode); + mutex_unlock(&inode->i_mutex); + inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(inode); +setflags_out: + mnt_drop_write_file(filp); + return err; + } + + case FITRIM: + { + struct super_block *sb = inode->i_sb; + struct request_queue *q = bdev_get_queue(sb->s_bdev); + struct fstrim_range range; + s64 ret = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!blk_queue_discard(q)) { + jfs_warn("FITRIM not supported on device"); + return -EOPNOTSUPP; + } + + if (copy_from_user(&range, (struct fstrim_range __user *)arg, + sizeof(range))) + return -EFAULT; + + range.minlen = max_t(unsigned int, range.minlen, + q->limits.discard_granularity); + + ret = jfs_ioc_trim(inode, &range); + if (ret < 0) + return ret; + + if (copy_to_user((struct fstrim_range __user *)arg, &range, + sizeof(range))) + return -EFAULT; + + return 0; + } + + default: + return -ENOTTY; + } +} + +#ifdef CONFIG_COMPAT +long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + /* While these ioctl numbers defined with 'long' and have different + * numbers than the 64bit ABI, + * the actual implementation only deals with ints and is compatible. + */ + switch (cmd) { + case JFS_IOC_GETFLAGS32: + cmd = JFS_IOC_GETFLAGS; + break; + case JFS_IOC_SETFLAGS32: + cmd = JFS_IOC_SETFLAGS; + break; + case FITRIM: + cmd = FITRIM; + break; + } + return jfs_ioctl(filp, cmd, arg); +} +#endif diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index a76293767c7..489f993b7b1 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -1,18 +1,18 @@ /* - * Copyright (c) International Business Machines Corp., 2002 + * Copyright (C) International Business Machines Corp., 2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_ACL @@ -20,9 +20,9 @@ #ifdef CONFIG_JFS_POSIX_ACL -int jfs_permission(struct inode *, int, struct nameidata *); +struct posix_acl *jfs_get_acl(struct inode *inode, int type); +int jfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); int jfs_init_acl(tid_t, struct inode *, struct inode *); -int jfs_setattr(struct dentry *, struct iattr *); #else diff --git a/fs/jfs/jfs_btree.h b/fs/jfs/jfs_btree.h index 7f3e9ac454f..79c61805bd3 100644 --- a/fs/jfs/jfs_btree.h +++ b/fs/jfs/jfs_btree.h @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_BTREE diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c index 4caea6b43b9..dd824d9b0b1 100644 --- a/fs/jfs/jfs_debug.c +++ b/fs/jfs/jfs_debug.c @@ -4,16 +4,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -21,66 +21,29 @@ #include <linux/ctype.h> #include <linux/module.h> #include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <asm/uaccess.h> #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_debug.h" -#ifdef CONFIG_JFS_DEBUG -void dump_mem(char *label, void *data, int length) -{ - int i, j; - int *intptr = data; - char *charptr = data; - char buf[10], line[80]; - - printk("%s: dump of %d bytes of data at 0x%p\n\n", label, length, - data); - for (i = 0; i < length; i += 16) { - line[0] = 0; - for (j = 0; (j < 4) && (i + j * 4 < length); j++) { - sprintf(buf, " %08x", intptr[i / 4 + j]); - strcat(line, buf); - } - buf[0] = ' '; - buf[2] = 0; - for (j = 0; (j < 16) && (i + j < length); j++) { - buf[1] = - isprint(charptr[i + j]) ? charptr[i + j] : '.'; - strcat(line, buf); - } - printk("%s\n", line); - } -} -#endif - #ifdef PROC_FS_JFS /* see jfs_debug.h */ static struct proc_dir_entry *base; #ifdef CONFIG_JFS_DEBUG -static int loglevel_read(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int jfs_loglevel_proc_show(struct seq_file *m, void *v) { - int len; - - len = sprintf(page, "%d\n", jfsloglevel); - - len -= off; - *start = page + off; - - if (len > count) - len = count; - else - *eof = 1; - - if (len < 0) - len = 0; + seq_printf(m, "%d\n", jfsloglevel); + return 0; +} - return len; +static int jfs_loglevel_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, jfs_loglevel_proc_show, NULL); } -static int loglevel_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static ssize_t jfs_loglevel_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) { char c; @@ -93,41 +56,43 @@ static int loglevel_write(struct file *file, const char __user *buffer, jfsloglevel = c - '0'; return count; } + +static const struct file_operations jfs_loglevel_proc_fops = { + .owner = THIS_MODULE, + .open = jfs_loglevel_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = jfs_loglevel_proc_write, +}; #endif static struct { const char *name; - read_proc_t *read_fn; - write_proc_t *write_fn; + const struct file_operations *proc_fops; } Entries[] = { #ifdef CONFIG_JFS_STATISTICS - { "lmstats", jfs_lmstats_read, }, - { "txstats", jfs_txstats_read, }, - { "xtstat", jfs_xtstat_read, }, - { "mpstat", jfs_mpstat_read, }, + { "lmstats", &jfs_lmstats_proc_fops, }, + { "txstats", &jfs_txstats_proc_fops, }, + { "xtstat", &jfs_xtstat_proc_fops, }, + { "mpstat", &jfs_mpstat_proc_fops, }, #endif #ifdef CONFIG_JFS_DEBUG - { "TxAnchor", jfs_txanchor_read, }, - { "loglevel", loglevel_read, loglevel_write } + { "TxAnchor", &jfs_txanchor_proc_fops, }, + { "loglevel", &jfs_loglevel_proc_fops } #endif }; -#define NPROCENT (sizeof(Entries)/sizeof(Entries[0])) +#define NPROCENT ARRAY_SIZE(Entries) void jfs_proc_init(void) { int i; - if (!(base = proc_mkdir("jfs", proc_root_fs))) + if (!(base = proc_mkdir("fs/jfs", NULL))) return; - base->owner = THIS_MODULE; - for (i = 0; i < NPROCENT; i++) { - struct proc_dir_entry *p; - if ((p = create_proc_entry(Entries[i].name, 0, base))) { - p->read_proc = Entries[i].read_fn; - p->write_proc = Entries[i].write_fn; - } - } + for (i = 0; i < NPROCENT; i++) + proc_create(Entries[i].name, 0, base, Entries[i].proc_fops); } void jfs_proc_clean(void) @@ -137,7 +102,7 @@ void jfs_proc_clean(void) if (base) { for (i = 0; i < NPROCENT; i++) remove_proc_entry(Entries[i].name, base); - remove_proc_entry("jfs", proc_root_fs); + remove_proc_entry("fs/jfs", NULL); } } diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h index ddffbbd4d95..eafd1300a00 100644 --- a/fs/jfs/jfs_debug.h +++ b/fs/jfs/jfs_debug.h @@ -39,10 +39,6 @@ extern void jfs_proc_clean(void); /* * assert with traditional printf/panic */ -#ifdef CONFIG_KERNEL_ASSERTS -/* kgdb stuff */ -#define assert(p) KERNEL_ASSERT(#p, p) -#else #define assert(p) do { \ if (!(p)) { \ printk(KERN_CRIT "BUG at %s:%d assert(%s)\n", \ @@ -50,7 +46,6 @@ extern void jfs_proc_clean(void); BUG(); \ } \ } while (0) -#endif /* * debug ON @@ -67,8 +62,7 @@ extern void jfs_proc_clean(void); extern int jfsloglevel; -extern void dump_mem(char *label, void *data, int length); -extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); +extern const struct file_operations jfs_txanchor_proc_fops; /* information message: e.g., configuration, major event */ #define jfs_info(fmt, arg...) do { \ @@ -99,7 +93,6 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); * --------- */ #else /* CONFIG_JFS_DEBUG */ -#define dump_mem(label,data,length) do {} while (0) #define ASSERT(p) do {} while (0) #define jfs_info(fmt, arg...) do {} while (0) #define jfs_debug(fmt, arg...) do {} while (0) @@ -112,10 +105,10 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *); * ---------- */ #ifdef CONFIG_JFS_STATISTICS -extern int jfs_lmstats_read(char *, char **, off_t, int, int *, void *); -extern int jfs_txstats_read(char *, char **, off_t, int, int *, void *); -extern int jfs_mpstat_read(char *, char **, off_t, int, int *, void *); -extern int jfs_xtstat_read(char *, char **, off_t, int, int *, void *); +extern const struct file_operations jfs_lmstats_proc_fops; +extern const struct file_operations jfs_txstats_proc_fops; +extern const struct file_operations jfs_mpstat_proc_fops; +extern const struct file_operations jfs_xtstat_proc_fops; #define INCREMENT(x) ((x)++) #define DECREMENT(x) ((x)--) diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h index 580a3258449..395c4c0d0f0 100644 --- a/fs/jfs/jfs_dinode.h +++ b/fs/jfs/jfs_dinode.h @@ -1,41 +1,41 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2001 + * Copyright (C) International Business Machines Corp., 2000-2001 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_DINODE #define _H_JFS_DINODE /* - * jfs_dinode.h: on-disk inode manager + * jfs_dinode.h: on-disk inode manager */ -#define INODESLOTSIZE 128 -#define L2INODESLOTSIZE 7 -#define log2INODESIZE 9 /* log2(bytes per dinode) */ +#define INODESLOTSIZE 128 +#define L2INODESLOTSIZE 7 +#define log2INODESIZE 9 /* log2(bytes per dinode) */ /* - * on-disk inode : 512 bytes + * on-disk inode : 512 bytes * * note: align 64-bit fields on 8-byte boundary. */ struct dinode { /* - * I. base area (128 bytes) - * ------------------------ + * I. base area (128 bytes) + * ------------------------ * * define generic/POSIX attributes */ @@ -70,16 +70,16 @@ struct dinode { __le32 di_acltype; /* 4: Type of ACL */ /* - * Extension Areas. + * Extension Areas. * - * Historically, the inode was partitioned into 4 128-byte areas, - * the last 3 being defined as unions which could have multiple - * uses. The first 96 bytes had been completely unused until - * an index table was added to the directory. It is now more - * useful to describe the last 3/4 of the inode as a single - * union. We would probably be better off redesigning the - * entire structure from scratch, but we don't want to break - * commonality with OS/2's JFS at this time. + * Historically, the inode was partitioned into 4 128-byte areas, + * the last 3 being defined as unions which could have multiple + * uses. The first 96 bytes had been completely unused until + * an index table was added to the directory. It is now more + * useful to describe the last 3/4 of the inode as a single + * union. We would probably be better off redesigning the + * entire structure from scratch, but we don't want to break + * commonality with OS/2's JFS at this time. */ union { struct { @@ -95,7 +95,7 @@ struct dinode { } _dir; /* (384) */ #define di_dirtable u._dir._table #define di_dtroot u._dir._dtroot -#define di_parent di_dtroot.header.idotdot +#define di_parent di_dtroot.header.idotdot #define di_DASD di_dtroot.header.DASD struct { @@ -127,25 +127,50 @@ struct dinode { #define di_inlinedata u._file._u2._special._u #define di_rdev u._file._u2._special._u._rdev #define di_fastsymlink u._file._u2._special._u._fastsymlink -#define di_inlineea u._file._u2._special._inlineea +#define di_inlineea u._file._u2._special._inlineea } u; }; /* extended mode bits (on-disk inode di_mode) */ -#define IFJOURNAL 0x00010000 /* journalled file */ -#define ISPARSE 0x00020000 /* sparse file enabled */ -#define INLINEEA 0x00040000 /* inline EA area free */ +#define IFJOURNAL 0x00010000 /* journalled file */ +#define ISPARSE 0x00020000 /* sparse file enabled */ +#define INLINEEA 0x00040000 /* inline EA area free */ #define ISWAPFILE 0x00800000 /* file open for pager swap space */ /* more extended mode bits: attributes for OS/2 */ #define IREADONLY 0x02000000 /* no write access to file */ -#define IARCHIVE 0x40000000 /* file archive bit */ -#define ISYSTEM 0x08000000 /* system file */ #define IHIDDEN 0x04000000 /* hidden file */ -#define IRASH 0x4E000000 /* mask for changeable attributes */ -#define INEWNAME 0x80000000 /* non-8.3 filename format */ +#define ISYSTEM 0x08000000 /* system file */ + #define IDIRECTORY 0x20000000 /* directory (shadow of real bit) */ +#define IARCHIVE 0x40000000 /* file archive bit */ +#define INEWNAME 0x80000000 /* non-8.3 filename format */ + +#define IRASH 0x4E000000 /* mask for changeable attributes */ #define ATTRSHIFT 25 /* bits to shift to move attribute specification to mode position */ +/* extended attributes for Linux */ + +#define JFS_NOATIME_FL 0x00080000 /* do not update atime */ + +#define JFS_DIRSYNC_FL 0x00100000 /* dirsync behaviour */ +#define JFS_SYNC_FL 0x00200000 /* Synchronous updates */ +#define JFS_SECRM_FL 0x00400000 /* Secure deletion */ +#define JFS_UNRM_FL 0x00800000 /* allow for undelete */ + +#define JFS_APPEND_FL 0x01000000 /* writes to file may only append */ +#define JFS_IMMUTABLE_FL 0x02000000 /* Immutable file */ + +#define JFS_FL_USER_VISIBLE 0x03F80000 +#define JFS_FL_USER_MODIFIABLE 0x03F80000 +#define JFS_FL_INHERIT 0x03C80000 + +/* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */ +#define JFS_IOC_GETFLAGS _IOR('f', 1, long) +#define JFS_IOC_SETFLAGS _IOW('f', 2, long) + +#define JFS_IOC_GETFLAGS32 _IOR('f', 1, int) +#define JFS_IOC_SETFLAGS32 _IOW('f', 2, int) + #endif /*_H_JFS_DINODE */ diff --git a/fs/jfs/jfs_discard.c b/fs/jfs/jfs_discard.c new file mode 100644 index 00000000000..dfcd5030455 --- /dev/null +++ b/fs/jfs/jfs_discard.c @@ -0,0 +1,121 @@ +/* + * Copyright (C) Tino Reichardt, 2012 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/blkdev.h> + +#include "jfs_incore.h" +#include "jfs_superblock.h" +#include "jfs_discard.h" +#include "jfs_dmap.h" +#include "jfs_debug.h" + + +/* + * NAME: jfs_issue_discard() + * + * FUNCTION: TRIM the specified block range on device, if supported + * + * PARAMETERS: + * ip - pointer to in-core inode + * blkno - starting block number to be trimmed (0..N) + * nblocks - number of blocks to be trimmed + * + * RETURN VALUES: + * none + * + * serialization: IREAD_LOCK(ipbmap) held on entry/exit; + */ +void jfs_issue_discard(struct inode *ip, u64 blkno, u64 nblocks) +{ + struct super_block *sb = ip->i_sb; + int r = 0; + + r = sb_issue_discard(sb, blkno, nblocks, GFP_NOFS, 0); + if (unlikely(r != 0)) { + jfs_err("JFS: sb_issue_discard" \ + "(%p, %llu, %llu, GFP_NOFS, 0) = %d => failed!\n", + sb, (unsigned long long)blkno, + (unsigned long long)nblocks, r); + } + + jfs_info("JFS: sb_issue_discard" \ + "(%p, %llu, %llu, GFP_NOFS, 0) = %d\n", + sb, (unsigned long long)blkno, + (unsigned long long)nblocks, r); + + return; +} + +/* + * NAME: jfs_ioc_trim() + * + * FUNCTION: attempt to discard (TRIM) all free blocks from the + * filesystem. + * + * PARAMETERS: + * ip - pointer to in-core inode; + * range - the range, given by user space + * + * RETURN VALUES: + * 0 - success + * -EIO - i/o error + */ +int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range) +{ + struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; + struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; + struct super_block *sb = ipbmap->i_sb; + int agno, agno_end; + u64 start, end, minlen; + u64 trimmed = 0; + + /** + * convert byte values to block size of filesystem: + * start: First Byte to trim + * len: number of Bytes to trim from start + * minlen: minimum extent length in Bytes + */ + start = range->start >> sb->s_blocksize_bits; + end = start + (range->len >> sb->s_blocksize_bits) - 1; + minlen = range->minlen >> sb->s_blocksize_bits; + if (minlen == 0) + minlen = 1; + + if (minlen > bmp->db_agsize || + start >= bmp->db_mapsize || + range->len < sb->s_blocksize) + return -EINVAL; + + if (end >= bmp->db_mapsize) + end = bmp->db_mapsize - 1; + + /** + * we trim all ag's within the range + */ + agno = BLKTOAG(start, JFS_SBI(ip->i_sb)); + agno_end = BLKTOAG(end, JFS_SBI(ip->i_sb)); + while (agno <= agno_end) { + trimmed += dbDiscardAG(ip, agno, minlen); + agno++; + } + range->len = trimmed << sb->s_blocksize_bits; + + return 0; +} diff --git a/fs/jfs/jfs_discard.h b/fs/jfs/jfs_discard.h new file mode 100644 index 00000000000..40d1ee6081a --- /dev/null +++ b/fs/jfs/jfs_discard.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) Tino Reichardt, 2012 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _H_JFS_DISCARD +#define _H_JFS_DISCARD + +struct fstrim_range; + +extern void jfs_issue_discard(struct inode *ip, u64 blkno, u64 nblocks); +extern int jfs_ioc_trim(struct inode *ip, struct fstrim_range *range); + +#endif /* _H_JFS_DISCARD */ diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 68000a50ceb..2d514c7affc 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -1,22 +1,24 @@ /* * Copyright (C) International Business Machines Corp., 2000-2004 + * Portions Copyright (C) Tino Reichardt, 2012 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> +#include <linux/slab.h> #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_dmap.h" @@ -24,34 +26,35 @@ #include "jfs_lock.h" #include "jfs_metapage.h" #include "jfs_debug.h" +#include "jfs_discard.h" /* * SERIALIZATION of the Block Allocation Map. * * the working state of the block allocation map is accessed in * two directions: - * + * * 1) allocation and free requests that start at the dmap * level and move up through the dmap control pages (i.e. * the vast majority of requests). - * - * 2) allocation requests that start at dmap control page + * + * 2) allocation requests that start at dmap control page * level and work down towards the dmaps. - * - * the serialization scheme used here is as follows. * - * requests which start at the bottom are serialized against each - * other through buffers and each requests holds onto its buffers - * as it works it way up from a single dmap to the required level + * the serialization scheme used here is as follows. + * + * requests which start at the bottom are serialized against each + * other through buffers and each requests holds onto its buffers + * as it works it way up from a single dmap to the required level * of dmap control page. * requests that start at the top are serialized against each other * and request that start from the bottom by the multiple read/single * write inode lock of the bmap inode. requests starting at the top * take this lock in write mode while request starting at the bottom * take the lock in read mode. a single top-down request may proceed - * exclusively while multiple bottoms-up requests may proceed - * simultaneously (under the protection of busy buffers). - * + * exclusively while multiple bottoms-up requests may proceed + * simultaneously (under the protection of busy buffers). + * * in addition to information found in dmaps and dmap control pages, * the working state of the block allocation map also includes read/ * write information maintained in the bmap descriptor (i.e. total @@ -59,14 +62,14 @@ * a single exclusive lock (BMAP_LOCK) is used to guard this information * in the face of multiple-bottoms up requests. * (lock ordering: IREAD_LOCK, BMAP_LOCK); - * + * * accesses to the persistent state of the block allocation map (limited * to the persistent bitmaps in dmaps) is guarded by (busy) buffers. */ -#define BMAP_LOCK_INIT(bmp) init_MUTEX(&bmp->db_bmaplock) -#define BMAP_LOCK(bmp) down(&bmp->db_bmaplock) -#define BMAP_UNLOCK(bmp) up(&bmp->db_bmaplock) +#define BMAP_LOCK_INIT(bmp) mutex_init(&bmp->db_bmaplock) +#define BMAP_LOCK(bmp) mutex_lock(&bmp->db_bmaplock) +#define BMAP_UNLOCK(bmp) mutex_unlock(&bmp->db_bmaplock) /* * forward references @@ -103,7 +106,6 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, int nblocks); static int dbMaxBud(u8 * cp); -s64 dbMapFileSizeToMapSize(struct inode *ipbmap); static int blkstol2(s64 nb); static int cntlz(u32 value); @@ -120,12 +122,12 @@ static int dbGetL2AGSize(s64 nblocks); /* * buddy table * - * table used for determining buddy sizes within characters of + * table used for determining buddy sizes within characters of * dmap bitmap words. the characters themselves serve as indexes * into the table, with the table elements yielding the maximum * binary buddy of free bits within the character. */ -static s8 budtab[256] = { +static const s8 budtab[256] = { 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -144,9 +146,8 @@ static s8 budtab[256] = { 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 }; - /* - * NAME: dbMount() + * NAME: dbMount() * * FUNCTION: initializate the block allocation map. * @@ -154,12 +155,12 @@ static s8 budtab[256] = { * the in-core descriptor is initialized from disk. * * PARAMETERS: - * ipbmap - pointer to in-core inode for the block map. + * ipbmap - pointer to in-core inode for the block map. * * RETURN VALUES: - * 0 - success - * -ENOMEM - insufficient memory - * -EIO - i/o error + * 0 - success + * -ENOMEM - insufficient memory + * -EIO - i/o error */ int dbMount(struct inode *ipbmap) { @@ -195,7 +196,7 @@ int dbMount(struct inode *ipbmap) bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); - bmp->db_agheigth = le32_to_cpu(dbmp_le->dn_agheigth); + bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight); bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); @@ -223,20 +224,20 @@ int dbMount(struct inode *ipbmap) /* - * NAME: dbUnmount() + * NAME: dbUnmount() * * FUNCTION: terminate the block allocation map in preparation for * file system unmount. * - * the in-core bmap descriptor is written to disk and + * the in-core bmap descriptor is written to disk and * the memory for this descriptor is freed. * * PARAMETERS: - * ipbmap - pointer to in-core inode for the block map. + * ipbmap - pointer to in-core inode for the block map. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error */ int dbUnmount(struct inode *ipbmap, int mounterror) { @@ -287,7 +288,7 @@ int dbSync(struct inode *ipbmap) dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); - dbmp_le->dn_agheigth = cpu_to_le32(bmp->db_agheigth); + dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight); dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); @@ -302,17 +303,15 @@ int dbSync(struct inode *ipbmap) /* * write out dirty pages of bmap */ - filemap_fdatawrite(ipbmap->i_mapping); - filemap_fdatawait(ipbmap->i_mapping); + filemap_write_and_wait(ipbmap->i_mapping); diWriteSpecial(ipbmap, 0); return (0); } - /* - * NAME: dbFree() + * NAME: dbFree() * * FUNCTION: free the specified block range from the working block * allocation map. @@ -321,13 +320,13 @@ int dbSync(struct inode *ipbmap) * at a time. * * PARAMETERS: - * ip - pointer to in-core inode; - * blkno - starting block number to be freed. - * nblocks - number of blocks to be freed. + * ip - pointer to in-core inode; + * blkno - starting block number to be freed. + * nblocks - number of blocks to be freed. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error */ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) { @@ -337,8 +336,9 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) s64 lblkno, rem; struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; + struct super_block *sb = ipbmap->i_sb; - IREAD_LOCK(ipbmap); + IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); /* block to be freed better be within the mapsize. */ if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) { @@ -346,11 +346,17 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", (unsigned long long) blkno, (unsigned long long) nblocks); - jfs_error(ip->i_sb, - "dbFree: block to be freed is outside the map"); + jfs_error(ip->i_sb, "block to be freed is outside the map\n"); return -EIO; } + /** + * TRIM the blocks, when mounted with discard option + */ + if (JFS_SBI(sb)->flag & JFS_DISCARD) + if (JFS_SBI(sb)->minblks_trim <= nblocks) + jfs_issue_discard(ipbmap, blkno, nblocks); + /* * free the blocks a dmap at a time. */ @@ -377,7 +383,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) /* free the blocks. */ if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { - jfs_error(ip->i_sb, "dbFree: error in block map\n"); + jfs_error(ip->i_sb, "error in block map\n"); release_metapage(mp); IREAD_UNLOCK(ipbmap); return (rc); @@ -396,23 +402,23 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) /* * NAME: dbUpdatePMap() * - * FUNCTION: update the allocation state (free or allocate) of the + * FUNCTION: update the allocation state (free or allocate) of the * specified block range in the persistent block allocation map. - * + * * the blocks will be updated in the persistent map one * dmap at a time. * * PARAMETERS: - * ipbmap - pointer to in-core inode for the block map. - * free - TRUE if block range is to be freed from the persistent - * map; FALSE if it is to be allocated. - * blkno - starting block number of the range. - * nblocks - number of contiguous blocks in the range. - * tblk - transaction block; + * ipbmap - pointer to in-core inode for the block map. + * free - 'true' if block range is to be freed from the persistent + * map; 'false' if it is to be allocated. + * blkno - starting block number of the range. + * nblocks - number of contiguous blocks in the range. + * tblk - transaction block; * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error */ int dbUpdatePMap(struct inode *ipbmap, @@ -434,8 +440,7 @@ dbUpdatePMap(struct inode *ipbmap, printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", (unsigned long long) blkno, (unsigned long long) nblocks); - jfs_error(ipbmap->i_sb, - "dbUpdatePMap: blocks are outside the map"); + jfs_error(ipbmap->i_sb, "blocks are outside the map\n"); return -EIO; } @@ -476,7 +481,7 @@ dbUpdatePMap(struct inode *ipbmap, /* update the bits of the dmap words. the first and last * words may only have a subset of their bits updated. if * this is the case, we'll work against that word (i.e. - * partial first and/or last) only in a single pass. a + * partial first and/or last) only in a single pass. a * single pass will also be used to update all words that * are to have all their bits updated. */ @@ -533,10 +538,10 @@ dbUpdatePMap(struct inode *ipbmap, lastlblkno = lblkno; + LOGSYNC_LOCK(log, flags); if (mp->lsn != 0) { /* inherit older/smaller lsn */ logdiff(diffp, mp->lsn, log); - LOGSYNC_LOCK(log, flags); if (difft < diffp) { mp->lsn = lsn; @@ -549,20 +554,17 @@ dbUpdatePMap(struct inode *ipbmap, logdiff(diffp, mp->clsn, log); if (difft > diffp) mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log, flags); } else { mp->log = log; mp->lsn = lsn; /* insert bp after tblock in logsync list */ - LOGSYNC_LOCK(log, flags); - log->count++; list_add(&mp->synclist, &tblk->synclist); mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log, flags); } + LOGSYNC_UNLOCK(log, flags); } /* write the last buffer. */ @@ -577,7 +579,7 @@ dbUpdatePMap(struct inode *ipbmap, /* * NAME: dbNextAG() * - * FUNCTION: find the preferred allocation group for new allocations. + * FUNCTION: find the preferred allocation group for new allocations. * * Within the allocation groups, we maintain a preferred * allocation group which consists of a group with at least @@ -593,10 +595,10 @@ dbUpdatePMap(struct inode *ipbmap, * empty ags around for large allocations. * * PARAMETERS: - * ipbmap - pointer to in-core inode for the block map. + * ipbmap - pointer to in-core inode for the block map. * * RETURN VALUES: - * the preferred allocation group number. + * the preferred allocation group number. */ int dbNextAG(struct inode *ipbmap) { @@ -660,17 +662,17 @@ unlock: /* * NAME: dbAlloc() * - * FUNCTION: attempt to allocate a specified number of contiguous free + * FUNCTION: attempt to allocate a specified number of contiguous free * blocks from the working allocation block map. * * the block allocation policy uses hints and a multi-step * approach. * - * for allocation requests smaller than the number of blocks + * for allocation requests smaller than the number of blocks * per dmap, we first try to allocate the new blocks * immediately following the hint. if these blocks are not * available, we try to allocate blocks near the hint. if - * no blocks near the hint are available, we next try to + * no blocks near the hint are available, we next try to * allocate within the same dmap as contains the hint. * * if no blocks are available in the dmap or the allocation @@ -684,16 +686,16 @@ unlock: * size or requests that specify no hint value. * * PARAMETERS: - * ip - pointer to in-core inode; - * hint - allocation hint. - * nblocks - number of contiguous blocks in the range. - * results - on successful return, set to the starting block number + * ip - pointer to in-core inode; + * hint - allocation hint. + * nblocks - number of contiguous blocks in the range. + * results - on successful return, set to the starting block number * of the newly allocated contiguous range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error */ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) { @@ -710,26 +712,19 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) /* assert that nblocks is valid */ assert(nblocks > 0); -#ifdef _STILL_TO_PORT - /* DASD limit check F226941 */ - if (OVER_LIMIT(ip, nblocks)) - return -ENOSPC; -#endif /* _STILL_TO_PORT */ - /* get the log2 number of blocks to be allocated. - * if the number of blocks is not a log2 multiple, + * if the number of blocks is not a log2 multiple, * it will be rounded up to the next log2 multiple. */ l2nb = BLKSTOL2(nblocks); bmp = JFS_SBI(ip->i_sb)->bmap; -//retry: /* serialize w.r.t.extendfs() */ mapSize = bmp->db_mapsize; /* the hint should be within the map */ if (hint >= mapSize) { - jfs_error(ip->i_sb, "dbAlloc: the hint is outside the map"); + jfs_error(ip->i_sb, "the hint is outside the map\n"); return -EIO; } @@ -737,7 +732,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) * allocation group size, try to allocate anywhere. */ if (l2nb > bmp->db_agl2size) { - IWRITE_LOCK(ipbmap); + IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); rc = dbAllocAny(bmp, nblocks, l2nb, results); @@ -766,7 +761,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) * allocation group. */ if ((blkno & (bmp->db_agsize - 1)) == 0) - /* check if the AG is currenly being written to. + /* check if the AG is currently being written to. * if so, call dbNextAG() to find a non-busy * AG with sufficient free space. */ @@ -778,7 +773,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) * the hint using a tiered strategy. */ if (nblocks <= BPERDMAP) { - IREAD_LOCK(ipbmap); + IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); /* get the buffer for the dmap containing the hint. */ @@ -848,7 +843,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) /* try to satisfy the allocation request with blocks within * the same allocation group as the hint. */ - IWRITE_LOCK(ipbmap); + IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC) goto write_unlock; @@ -860,7 +855,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) * Let dbNextAG recommend a preferred allocation group */ agno = dbNextAG(ipbmap); - IWRITE_LOCK(ipbmap); + IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); /* Try to allocate within this allocation group. if that fails, try to * allocate anywhere in the map. @@ -883,17 +878,17 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) /* * NAME: dbAllocExact() * - * FUNCTION: try to allocate the requested extent; + * FUNCTION: try to allocate the requested extent; * * PARAMETERS: - * ip - pointer to in-core inode; - * blkno - extent address; - * nblocks - extent length; + * ip - pointer to in-core inode; + * blkno - extent address; + * nblocks - extent length; * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error */ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) { @@ -904,13 +899,13 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) s64 lblkno; struct metapage *mp; - IREAD_LOCK(ipbmap); + IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); /* * validate extent request: * * note: defragfs policy: - * max 64 blocks will be moved. + * max 64 blocks will be moved. * allocation request size must be satisfied from a single dmap. */ if (nblocks <= 0 || nblocks > BPERDMAP || blkno >= bmp->db_mapsize) { @@ -950,7 +945,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) /* * NAME: dbReAlloc() * - * FUNCTION: attempt to extend a current allocation by a specified + * FUNCTION: attempt to extend a current allocation by a specified * number of blocks. * * this routine attempts to satisfy the allocation request @@ -963,21 +958,21 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks) * number of blocks required. * * PARAMETERS: - * ip - pointer to in-core inode requiring allocation. - * blkno - starting block of the current allocation. - * nblocks - number of contiguous blocks within the current + * ip - pointer to in-core inode requiring allocation. + * blkno - starting block of the current allocation. + * nblocks - number of contiguous blocks within the current * allocation. - * addnblocks - number of blocks to add to the allocation. - * results - on successful return, set to the starting block number + * addnblocks - number of blocks to add to the allocation. + * results - on successful return, set to the starting block number * of the existing allocation if the existing allocation * was extended in place or to a newly allocated contiguous * range if the existing allocation could not be extended * in place. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error */ int dbReAlloc(struct inode *ip, @@ -1008,7 +1003,7 @@ dbReAlloc(struct inode *ip, /* * NAME: dbExtend() * - * FUNCTION: attempt to extend a current allocation by a specified + * FUNCTION: attempt to extend a current allocation by a specified * number of blocks. * * this routine attempts to satisfy the allocation request @@ -1017,16 +1012,16 @@ dbReAlloc(struct inode *ip, * immediately following the current allocation. * * PARAMETERS: - * ip - pointer to in-core inode requiring allocation. - * blkno - starting block of the current allocation. - * nblocks - number of contiguous blocks within the current + * ip - pointer to in-core inode requiring allocation. + * blkno - starting block of the current allocation. + * nblocks - number of contiguous blocks within the current * allocation. - * addnblocks - number of blocks to add to the allocation. + * addnblocks - number of blocks to add to the allocation. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error */ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) { @@ -1054,14 +1049,13 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) */ extblkno = lastblkno + 1; - IREAD_LOCK(ipbmap); + IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); /* better be within the file system */ bmp = sbi->bmap; if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { IREAD_UNLOCK(ipbmap); - jfs_error(ip->i_sb, - "dbExtend: the block is outside the filesystem"); + jfs_error(ip->i_sb, "the block is outside the filesystem\n"); return -EIO; } @@ -1105,7 +1099,6 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) /* we were not successful */ release_metapage(mp); - return (rc); } @@ -1113,19 +1106,19 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) /* * NAME: dbAllocNext() * - * FUNCTION: attempt to allocate the blocks of the specified block + * FUNCTION: attempt to allocate the blocks of the specified block * range within a dmap. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap. - * blkno - starting block number of the range. - * nblocks - number of contiguous free blocks of the range. + * bmp - pointer to bmap descriptor + * dp - pointer to dmap. + * blkno - starting block number of the range. + * nblocks - number of contiguous free blocks of the range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) held on entry/exit; */ @@ -1138,8 +1131,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, u32 mask; if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocNext: Corrupt dmap page"); + jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); return -EIO; } @@ -1216,7 +1208,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, * by this leaf. */ l2size = - min((int)leaf[word], NLSTOL2BSZ(nwords)); + min_t(int, leaf[word], NLSTOL2BSZ(nwords)); /* determine how many words were handled. */ @@ -1237,7 +1229,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, /* * NAME: dbAllocNear() * - * FUNCTION: attempt to allocate a number of contiguous free blocks near + * FUNCTION: attempt to allocate a number of contiguous free blocks near * a specified block (hint) within a dmap. * * starting with the dmap leaf that covers the hint, we'll @@ -1246,18 +1238,18 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, * the desired free space. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap. - * blkno - block number to allocate near. - * nblocks - actual number of contiguous free blocks desired. - * l2nb - log2 number of contiguous free blocks desired. - * results - on successful return, set to the starting block number + * bmp - pointer to bmap descriptor + * dp - pointer to dmap. + * blkno - block number to allocate near. + * nblocks - actual number of contiguous free blocks desired. + * l2nb - log2 number of contiguous free blocks desired. + * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) held on entry/exit; */ @@ -1269,8 +1261,7 @@ dbAllocNear(struct bmap * bmp, s8 *leaf; if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocNear: Corrupt dmap page"); + jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); return -EIO; } @@ -1320,7 +1311,7 @@ dbAllocNear(struct bmap * bmp, /* * NAME: dbAllocAG() * - * FUNCTION: attempt to allocate the specified number of contiguous + * FUNCTION: attempt to allocate the specified number of contiguous * free blocks within the specified allocation group. * * unless the allocation group size is equal to the number @@ -1337,7 +1328,7 @@ dbAllocNear(struct bmap * bmp, * or two sub-trees, depending on the allocation group size. * we search the top nodes of these subtrees left to right for * sufficient free space. if sufficient free space is found, - * the subtree is searched to find the leftmost leaf that + * the subtree is searched to find the leftmost leaf that * has free space. once we have made it to the leaf, we * move the search to the next lower level dmap control page * corresponding to this leaf. we continue down the dmap control @@ -1357,17 +1348,17 @@ dbAllocNear(struct bmap * bmp, * the allocation group. * * PARAMETERS: - * bmp - pointer to bmap descriptor + * bmp - pointer to bmap descriptor * agno - allocation group number. - * nblocks - actual number of contiguous free blocks desired. - * l2nb - log2 number of contiguous free blocks desired. - * results - on successful return, set to the starting block number + * nblocks - actual number of contiguous free blocks desired. + * l2nb - log2 number of contiguous free blocks desired. + * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * * note: IWRITE_LOCK(ipmap) held on entry/exit; */ @@ -1385,8 +1376,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) */ if (l2nb > bmp->db_agl2size) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: allocation request is larger than the " - "allocation group size"); + "allocation request is larger than the allocation group size\n"); return -EIO; } @@ -1402,7 +1392,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) * that fully describes the allocation group since the allocation * group is already fully described by a dmap. in this case, we * just call dbAllocCtl() to search the dmap tree and allocate the - * required space if available. + * required space if available. * * if the allocation group is completely free, dbAllocCtl() is * also called to allocate the required space. this is done for @@ -1421,7 +1411,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) (unsigned long long) blkno, (unsigned long long) nblocks); jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: dbAllocCtl failed in free AG"); + "dbAllocCtl failed in free AG\n"); } return (rc); } @@ -1437,8 +1427,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) budmin = dcp->budmin; if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: Corrupt dmapctl page"); + jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); release_metapage(mp); return -EIO; } @@ -1451,10 +1440,10 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) * tree index of this allocation group within the control page. */ agperlev = - (1 << (L2LPERCTL - (bmp->db_agheigth << 1))) / bmp->db_agwidth; + (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth; ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); - /* dmap control page trees fan-out by 4 and a single allocation + /* dmap control page trees fan-out by 4 and a single allocation * group may be described by 1 or 2 subtrees within the ag level * dmap control page, depending upon the ag size. examine the ag's * subtrees for sufficient free space, starting with the leftmost @@ -1470,7 +1459,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) * the subtree to find the leftmost leaf that describes this * free space. */ - for (k = bmp->db_agheigth; k > 0; k--) { + for (k = bmp->db_agheight; k > 0; k--) { for (n = 0, m = (ti << 2) + 1; n < 4; n++) { if (l2nb <= dcp->stree[m + n]) { ti = m + n; @@ -1479,7 +1468,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) } if (n == 4) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: failed descending stree"); + "failed descending stree\n"); release_metapage(mp); return -EIO; } @@ -1511,7 +1500,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) if (l2nb < budmin) { /* search the lower level dmap control pages to get - * the starting block number of the the dmap that + * the starting block number of the dmap that * contains or starts off the free space. */ if ((rc = @@ -1519,8 +1508,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) &blkno))) { if (rc == -ENOSPC) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: control page " - "inconsistent"); + "control page inconsistent\n"); return -EIO; } return (rc); @@ -1532,7 +1520,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); if (rc == -ENOSPC) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: unable to allocate blocks"); + "unable to allocate blocks\n"); rc = -EIO; } return (rc); @@ -1550,7 +1538,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) /* * NAME: dbAllocAny() * - * FUNCTION: attempt to allocate the specified number of contiguous + * FUNCTION: attempt to allocate the specified number of contiguous * free blocks anywhere in the file system. * * dbAllocAny() attempts to find the sufficient free space by @@ -1560,16 +1548,16 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) * desired free space is allocated. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * nblocks - actual number of contiguous free blocks desired. - * l2nb - log2 number of contiguous free blocks desired. - * results - on successful return, set to the starting block number + * bmp - pointer to bmap descriptor + * nblocks - actual number of contiguous free blocks desired. + * l2nb - log2 number of contiguous free blocks desired. + * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; */ @@ -1591,8 +1579,7 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) */ rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); if (rc == -ENOSPC) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAny: unable to allocate blocks"); + jfs_error(bmp->db_ipbmap->i_sb, "unable to allocate blocks\n"); return -EIO; } return (rc); @@ -1600,11 +1587,121 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) /* + * NAME: dbDiscardAG() + * + * FUNCTION: attempt to discard (TRIM) all free blocks of specific AG + * + * algorithm: + * 1) allocate blocks, as large as possible and save them + * while holding IWRITE_LOCK on ipbmap + * 2) trim all these saved block/length values + * 3) mark the blocks free again + * + * benefit: + * - we work only on one ag at some time, minimizing how long we + * need to lock ipbmap + * - reading / writing the fs is possible most time, even on + * trimming + * + * downside: + * - we write two times to the dmapctl and dmap pages + * - but for me, this seems the best way, better ideas? + * /TR 2012 + * + * PARAMETERS: + * ip - pointer to in-core inode + * agno - ag to trim + * minlen - minimum value of contiguous blocks + * + * RETURN VALUES: + * s64 - actual number of blocks trimmed + */ +s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) +{ + struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; + struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; + s64 nblocks, blkno; + u64 trimmed = 0; + int rc, l2nb; + struct super_block *sb = ipbmap->i_sb; + + struct range2trim { + u64 blkno; + u64 nblocks; + } *totrim, *tt; + + /* max blkno / nblocks pairs to trim */ + int count = 0, range_cnt; + u64 max_ranges; + + /* prevent others from writing new stuff here, while trimming */ + IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); + + nblocks = bmp->db_agfree[agno]; + max_ranges = nblocks; + do_div(max_ranges, minlen); + range_cnt = min_t(u64, max_ranges + 1, 32 * 1024); + totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS); + if (totrim == NULL) { + jfs_error(bmp->db_ipbmap->i_sb, "no memory for trim array\n"); + IWRITE_UNLOCK(ipbmap); + return 0; + } + + tt = totrim; + while (nblocks >= minlen) { + l2nb = BLKSTOL2(nblocks); + + /* 0 = okay, -EIO = fatal, -ENOSPC -> try smaller block */ + rc = dbAllocAG(bmp, agno, nblocks, l2nb, &blkno); + if (rc == 0) { + tt->blkno = blkno; + tt->nblocks = nblocks; + tt++; count++; + + /* the whole ag is free, trim now */ + if (bmp->db_agfree[agno] == 0) + break; + + /* give a hint for the next while */ + nblocks = bmp->db_agfree[agno]; + continue; + } else if (rc == -ENOSPC) { + /* search for next smaller log2 block */ + l2nb = BLKSTOL2(nblocks) - 1; + nblocks = 1 << l2nb; + } else { + /* Trim any already allocated blocks */ + jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n"); + break; + } + + /* check, if our trim array is full */ + if (unlikely(count >= range_cnt - 1)) + break; + } + IWRITE_UNLOCK(ipbmap); + + tt->nblocks = 0; /* mark the current end */ + for (tt = totrim; tt->nblocks != 0; tt++) { + /* when mounted with online discard, dbFree() will + * call jfs_issue_discard() itself */ + if (!(JFS_SBI(sb)->flag & JFS_DISCARD)) + jfs_issue_discard(ip, tt->blkno, tt->nblocks); + dbFree(ip, tt->blkno, tt->nblocks); + trimmed += tt->nblocks; + } + kfree(totrim); + + return trimmed; +} + +/* * NAME: dbFindCtl() * - * FUNCTION: starting at a specified dmap control page level and block + * FUNCTION: starting at a specified dmap control page level and block * number, search down the dmap control levels for a range of - * contiguous free blocks large enough to satisfy an allocation + * contiguous free blocks large enough to satisfy an allocation * request for the specified number of free blocks. * * if sufficient contiguous free blocks are found, this routine @@ -1613,17 +1710,17 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) * is sufficient in size. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * level - starting dmap control page level. - * l2nb - log2 number of contiguous free blocks desired. - * *blkno - on entry, starting block number for conducting the search. + * bmp - pointer to bmap descriptor + * level - starting dmap control page level. + * l2nb - log2 number of contiguous free blocks desired. + * *blkno - on entry, starting block number for conducting the search. * on successful return, the first block within a dmap page * that contains or starts a range of contiguous free blocks. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; */ @@ -1637,7 +1734,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) /* starting at the specified dmap control page level and block * number, search down the dmap control levels for the starting - * block number of a dmap page that contains or starts off + * block number of a dmap page that contains or starts off * sufficient free blocks. */ for (lev = level, b = *blkno; lev >= 0; lev--) { @@ -1653,13 +1750,13 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { jfs_error(bmp->db_ipbmap->i_sb, - "dbFindCtl: Corrupt dmapctl page"); + "Corrupt dmapctl page\n"); release_metapage(mp); return -EIO; } /* search the tree within the dmap control page for - * sufficent free space. if sufficient free space is found, + * sufficient free space. if sufficient free space is found, * dbFindLeaf() returns the index of the leaf at which * free space was found. */ @@ -1674,14 +1771,14 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) if (rc) { if (lev != level) { jfs_error(bmp->db_ipbmap->i_sb, - "dbFindCtl: dmap inconsistent"); + "dmap inconsistent\n"); return -EIO; } return -ENOSPC; } /* adjust the block number to reflect the location within - * the dmap control page (i.e. the leaf) at which free + * the dmap control page (i.e. the leaf) at which free * space was found. */ b += (((s64) leafidx) << budmin); @@ -1703,13 +1800,13 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) /* * NAME: dbAllocCtl() * - * FUNCTION: attempt to allocate a specified number of contiguous - * blocks starting within a specific dmap. - * + * FUNCTION: attempt to allocate a specified number of contiguous + * blocks starting within a specific dmap. + * * this routine is called by higher level routines that search * the dmap control pages above the actual dmaps for contiguous * free space. the result of successful searches by these - * routines are the starting block numbers within dmaps, with + * routines are the starting block numbers within dmaps, with * the dmaps themselves containing the desired contiguous free * space or starting a contiguous free space of desired size * that is made up of the blocks of one or more dmaps. these @@ -1730,18 +1827,18 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) * first dmap (i.e. blkno). * * PARAMETERS: - * bmp - pointer to bmap descriptor - * nblocks - actual number of contiguous free blocks to allocate. - * l2nb - log2 number of contiguous free blocks to allocate. - * blkno - starting block number of the dmap to start the allocation + * bmp - pointer to bmap descriptor + * nblocks - actual number of contiguous free blocks to allocate. + * l2nb - log2 number of contiguous free blocks to allocate. + * blkno - starting block number of the dmap to start the allocation * from. - * results - on successful return, set to the starting block number + * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; */ @@ -1798,14 +1895,14 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) if (dp->tree.stree[ROOT] != L2BPERDMAP) { release_metapage(mp); jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocCtl: the dmap is not all free"); + "the dmap is not all free\n"); rc = -EIO; goto backout; } /* determine how many blocks to allocate from this dmap. */ - nb = min(n, (s64)BPERDMAP); + nb = min_t(s64, n, BPERDMAP); /* allocate the blocks from the dmap. */ @@ -1845,7 +1942,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) * to indicate that we have leaked blocks. */ jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocCtl: I/O Error: Block Leakage."); + "I/O Error: Block Leakage\n"); continue; } dp = (struct dmap *) mp->data; @@ -1857,8 +1954,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) * to indicate that we have leaked blocks. */ release_metapage(mp); - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocCtl: Block Leakage."); + jfs_error(bmp->db_ipbmap->i_sb, "Block Leakage\n"); continue; } @@ -1874,27 +1970,27 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) /* * NAME: dbAllocDmapLev() * - * FUNCTION: attempt to allocate a specified number of contiguous blocks + * FUNCTION: attempt to allocate a specified number of contiguous blocks * from a specified dmap. - * + * * this routine checks if the contiguous blocks are available. * if so, nblocks of blocks are allocated; otherwise, ENOSPC is * returned. * * PARAMETERS: - * mp - pointer to bmap descriptor - * dp - pointer to dmap to attempt to allocate blocks from. - * l2nb - log2 number of contiguous block desired. - * nblocks - actual number of contiguous block desired. - * results - on successful return, set to the starting block number + * mp - pointer to bmap descriptor + * dp - pointer to dmap to attempt to allocate blocks from. + * l2nb - log2 number of contiguous block desired. + * nblocks - actual number of contiguous block desired. + * results - on successful return, set to the starting block number * of the newly allocated range. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient disk resources - * -EIO - i/o error + * 0 - success + * -ENOSPC - insufficient disk resources + * -EIO - i/o error * - * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or + * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; */ static int @@ -1937,7 +2033,7 @@ dbAllocDmapLev(struct bmap * bmp, /* * NAME: dbAllocDmap() * - * FUNCTION: adjust the disk allocation map to reflect the allocation + * FUNCTION: adjust the disk allocation map to reflect the allocation * of a specified block range within a dmap. * * this routine allocates the specified blocks from the dmap @@ -1950,14 +2046,14 @@ dbAllocDmapLev(struct bmap * bmp, * covers this dmap. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap to allocate the block range from. - * blkno - starting block number of the block to be allocated. - * nblocks - number of blocks to be allocated. + * bmp - pointer to bmap descriptor + * dp - pointer to dmap to allocate the block range from. + * blkno - starting block number of the block to be allocated. + * nblocks - number of blocks to be allocated. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ @@ -1993,7 +2089,7 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, /* * NAME: dbFreeDmap() * - * FUNCTION: adjust the disk allocation map to reflect the allocation + * FUNCTION: adjust the disk allocation map to reflect the allocation * of a specified block range within a dmap. * * this routine frees the specified blocks from the dmap through @@ -2001,18 +2097,18 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, * causes the maximum string of free blocks within the dmap to * change (i.e. the value of the root of the dmap's dmtree), this * routine will cause this change to be reflected up through the - * appropriate levels of the dmap control pages by a call to + * appropriate levels of the dmap control pages by a call to * dbAdjCtl() for the L0 dmap control page that covers this dmap. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap to free the block range from. - * blkno - starting block number of the block to be freed. - * nblocks - number of blocks to be freed. + * bmp - pointer to bmap descriptor + * dp - pointer to dmap to free the block range from. + * blkno - starting block number of the block to be freed. + * nblocks - number of blocks to be freed. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ @@ -2036,7 +2132,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, /* root changed. bubble the change up to the dmap control pages. * if the adjustment of the upper level control pages fails, - * backout the deallocation. + * backout the deallocation. */ if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) { word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; @@ -2059,7 +2155,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, /* * NAME: dbAllocBits() * - * FUNCTION: allocate a specified block range from a dmap. + * FUNCTION: allocate a specified block range from a dmap. * * this routine updates the dmap to reflect the working * state allocation of the specified block range. it directly @@ -2069,10 +2165,10 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, * dmap's dmtree, as a whole, to reflect the allocated range. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap to allocate bits from. - * blkno - starting block number of the bits to be allocated. - * nblocks - number of bits to be allocated. + * bmp - pointer to bmap descriptor + * dp - pointer to dmap to allocate bits from. + * blkno - starting block number of the bits to be allocated. + * nblocks - number of bits to be allocated. * * RETURN VALUES: none * @@ -2153,10 +2249,9 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * the allocated words. */ for (; nwords > 0; nwords -= nw) { - if (leaf[word] < BUDMIN) { + if (leaf[word] < BUDMIN) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocBits: leaf page " - "corrupt"); + "leaf page corrupt\n"); break; } @@ -2165,7 +2260,8 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * of bits being allocated and the l2 number * of bits currently described by this leaf. */ - size = min((int)leaf[word], NLSTOL2BSZ(nwords)); + size = min_t(int, leaf[word], + NLSTOL2BSZ(nwords)); /* update the leaf to reflect the allocation. * in addition to setting the leaf value to @@ -2183,7 +2279,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, } /* update the free count for this dmap */ - dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); + le32_add_cpu(&dp->nfree, -nblocks); BMAP_LOCK(bmp); @@ -2206,7 +2302,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, /* * NAME: dbFreeBits() * - * FUNCTION: free a specified block range from a dmap. + * FUNCTION: free a specified block range from a dmap. * * this routine updates the dmap to reflect the working * state allocation of the specified block range. it directly @@ -2216,10 +2312,10 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * dmtree, as a whole, to reflect the deallocated range. * * PARAMETERS: - * bmp - pointer to bmap descriptor - * dp - pointer to dmap to free bits from. - * blkno - starting block number of the bits to be freed. - * nblocks - number of bits to be freed. + * bmp - pointer to bmap descriptor + * dp - pointer to dmap to free bits from. + * blkno - starting block number of the bits to be freed. + * nblocks - number of bits to be freed. * * RETURN VALUES: 0 for success * @@ -2249,7 +2345,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * words (i.e. partial first and/or last) on an individual basis * (a single pass), freeing the bits of interest by hand and updating * the leaf corresponding to the dmap word. a single pass will be used - * for all dmap words fully contained within the specified range. + * for all dmap words fully contained within the specified range. * within this pass, the bits of all fully contained dmap words will * be marked as free in a single shot and the leaves will be updated. a * single leaf may describe the free space of multiple dmap words, @@ -2271,7 +2367,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, */ if (nb < DBWORD) { /* free (zero) the appropriate bits within this - * dmap word. + * dmap word. */ dp->wmap[word] &= cpu_to_le32(~(ONES << (DBWORD - nb) @@ -2327,11 +2423,11 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, /* update the free count for this dmap. */ - dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks); + le32_add_cpu(&dp->nfree, nblocks); BMAP_LOCK(bmp); - /* update the free count for the allocation group and + /* update the free count for the allocation group and * map. */ agno = blkno >> bmp->db_agl2size; @@ -2382,7 +2478,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * or deallocation resulted in the root change. this range * is respresented by a single leaf of the current dmapctl * and the leaf will be updated with this value, possibly - * causing a binary buddy system within the leaves to be + * causing a binary buddy system within the leaves to be * split or joined. the update may also cause the dmapctl's * dmtree to be updated. * @@ -2392,19 +2488,19 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, * the new root value and the next dmap control page level to * be adjusted. * PARAMETERS: - * bmp - pointer to bmap descriptor - * blkno - the first block of a block range within a dmap. it is + * bmp - pointer to bmap descriptor + * blkno - the first block of a block range within a dmap. it is * the allocation or deallocation of this block range that * requires the dmap control page to be adjusted. - * newval - the new value of the lower level dmap or dmap control + * newval - the new value of the lower level dmap or dmap control * page root. - * alloc - TRUE if adjustment is due to an allocation. - * level - current level of dmap control page (i.e. L0, L1, L2) to + * alloc - 'true' if adjustment is due to an allocation. + * level - current level of dmap control page (i.e. L0, L1, L2) to * be adjusted. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error * * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; */ @@ -2428,8 +2524,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) dcp = (struct dmapctl *) mp->data; if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAdjCtl: Corrupt dmapctl page"); + jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); release_metapage(mp); return -EIO; } @@ -2448,7 +2543,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) /* check if this is a control page update for an allocation. * if so, update the leaf to reflect the new leaf value using - * dbSplit(); otherwise (deallocation), use dbJoin() to udpate + * dbSplit(); otherwise (deallocation), use dbJoin() to update * the leaf with the new value. in addition to updating the * leaf, dbSplit() will also split the binary buddy system of * the leaves, if required, and bubble new values within the @@ -2530,8 +2625,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) assert(level == bmp->db_maxlevel); if (bmp->db_maxfreebud != oldroot) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAdjCtl: the maximum free buddy is " - "not the old root"); + "the maximum free buddy is not the old root\n"); } bmp->db_maxfreebud = dcp->stree[ROOT]; } @@ -2548,16 +2642,16 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) /* * NAME: dbSplit() * - * FUNCTION: update the leaf of a dmtree with a new value, splitting + * FUNCTION: update the leaf of a dmtree with a new value, splitting * the leaf from the binary buddy system of the dmtree's * leaves, as required. * * PARAMETERS: - * tp - pointer to the tree containing the leaf. - * leafno - the number of the leaf to be updated. - * splitsz - the size the binary buddy system starting at the leaf + * tp - pointer to the tree containing the leaf. + * leafno - the number of the leaf to be updated. + * splitsz - the size the binary buddy system starting at the leaf * must be split to, specified as the log2 number of blocks. - * newval - the new value for the leaf. + * newval - the new value for the leaf. * * RETURN VALUES: none * @@ -2594,7 +2688,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) } } - /* adjust the dmap tree to reflect the specified leaf's new + /* adjust the dmap tree to reflect the specified leaf's new * value. */ dbAdjTree(tp, leafno, newval); @@ -2604,7 +2698,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) /* * NAME: dbBackSplit() * - * FUNCTION: back split the binary buddy system of dmtree leaves + * FUNCTION: back split the binary buddy system of dmtree leaves * that hold a specified leaf until the specified leaf * starts its own binary buddy system. * @@ -2621,8 +2715,8 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval) * in which a previous join operation must be backed out. * * PARAMETERS: - * tp - pointer to the tree containing the leaf. - * leafno - the number of the leaf to be updated. + * tp - pointer to the tree containing the leaf. + * leafno - the number of the leaf to be updated. * * RETURN VALUES: none * @@ -2642,7 +2736,7 @@ static int dbBackSplit(dmtree_t * tp, int leafno) /* the back split is accomplished by iteratively finding the leaf * that starts the buddy system that contains the specified leaf and * splitting that system in two. this iteration continues until - * the specified leaf becomes the start of a buddy system. + * the specified leaf becomes the start of a buddy system. * * determine maximum possible l2 size for the specified leaf. */ @@ -2696,14 +2790,14 @@ static int dbBackSplit(dmtree_t * tp, int leafno) /* * NAME: dbJoin() * - * FUNCTION: update the leaf of a dmtree with a new value, joining + * FUNCTION: update the leaf of a dmtree with a new value, joining * the leaf with other leaves of the dmtree into a multi-leaf * binary buddy system, as required. * * PARAMETERS: - * tp - pointer to the tree containing the leaf. - * leafno - the number of the leaf to be updated. - * newval - the new value for the leaf. + * tp - pointer to the tree containing the leaf. + * leafno - the number of the leaf to be updated. + * newval - the new value for the leaf. * * RETURN VALUES: none */ @@ -2754,7 +2848,7 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval) /* check which (leafno or buddy) is the left buddy. * the left buddy gets to claim the blocks resulting * from the join while the right gets to claim none. - * the left buddy is also eligable to participate in + * the left buddy is also eligible to participate in * a join at the next higher level while the right * is not. * @@ -2789,15 +2883,15 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval) /* * NAME: dbAdjTree() * - * FUNCTION: update a leaf of a dmtree with a new value, adjusting + * FUNCTION: update a leaf of a dmtree with a new value, adjusting * the dmtree, as required, to reflect the new leaf value. * the combination of any buddies must already be done before * this is called. * * PARAMETERS: - * tp - pointer to the tree to be adjusted. - * leafno - the number of the leaf to be updated. - * newval - the new value for the leaf. + * tp - pointer to the tree to be adjusted. + * leafno - the number of the leaf to be updated. + * newval - the new value for the leaf. * * RETURN VALUES: none */ @@ -2856,8 +2950,8 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval) /* * NAME: dbFindLeaf() * - * FUNCTION: search a dmtree_t for sufficient free blocks, returning - * the index of a leaf describing the free blocks if + * FUNCTION: search a dmtree_t for sufficient free blocks, returning + * the index of a leaf describing the free blocks if * sufficient free blocks are found. * * the search starts at the top of the dmtree_t tree and @@ -2865,15 +2959,15 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval) * free space. * * PARAMETERS: - * tp - pointer to the tree to be searched. - * l2nb - log2 number of free blocks to search for. + * tp - pointer to the tree to be searched. + * l2nb - log2 number of free blocks to search for. * leafidx - return pointer to be set to the index of the leaf * describing at least l2nb free blocks if sufficient * free blocks are found. * * RETURN VALUES: - * 0 - success - * -ENOSPC - insufficient free blocks. + * 0 - success + * -ENOSPC - insufficient free blocks. */ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) { @@ -2920,18 +3014,18 @@ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx) /* * NAME: dbFindBits() * - * FUNCTION: find a specified number of binary buddy free bits within a + * FUNCTION: find a specified number of binary buddy free bits within a * dmap bitmap word value. * * this routine searches the bitmap value for (1 << l2nb) free * bits at (1 << l2nb) alignments within the value. * * PARAMETERS: - * word - dmap bitmap word value. - * l2nb - number of free bits specified as a log2 number. + * word - dmap bitmap word value. + * l2nb - number of free bits specified as a log2 number. * * RETURN VALUES: - * starting bit number of free bits. + * starting bit number of free bits. */ static int dbFindBits(u32 word, int l2nb) { @@ -2967,14 +3061,14 @@ static int dbFindBits(u32 word, int l2nb) /* * NAME: dbMaxBud(u8 *cp) * - * FUNCTION: determine the largest binary buddy string of free + * FUNCTION: determine the largest binary buddy string of free * bits within 32-bits of the map. * * PARAMETERS: - * cp - pointer to the 32-bit value. + * cp - pointer to the 32-bit value. * * RETURN VALUES: - * largest binary buddy of free bits within a dmap word. + * largest binary buddy of free bits within a dmap word. */ static int dbMaxBud(u8 * cp) { @@ -3004,14 +3098,14 @@ static int dbMaxBud(u8 * cp) /* * NAME: cnttz(uint word) * - * FUNCTION: determine the number of trailing zeros within a 32-bit + * FUNCTION: determine the number of trailing zeros within a 32-bit * value. * * PARAMETERS: - * value - 32-bit value to be examined. + * value - 32-bit value to be examined. * * RETURN VALUES: - * count of trailing zeros + * count of trailing zeros */ static int cnttz(u32 word) { @@ -3029,14 +3123,14 @@ static int cnttz(u32 word) /* * NAME: cntlz(u32 value) * - * FUNCTION: determine the number of leading zeros within a 32-bit + * FUNCTION: determine the number of leading zeros within a 32-bit * value. * * PARAMETERS: - * value - 32-bit value to be examined. + * value - 32-bit value to be examined. * * RETURN VALUES: - * count of leading zeros + * count of leading zeros */ static int cntlz(u32 value) { @@ -3054,14 +3148,14 @@ static int cntlz(u32 value) * NAME: blkstol2(s64 nb) * * FUNCTION: convert a block count to its log2 value. if the block - * count is not a l2 multiple, it is rounded up to the next + * count is not a l2 multiple, it is rounded up to the next * larger l2 multiple. * * PARAMETERS: - * nb - number of blocks + * nb - number of blocks * * RETURN VALUES: - * log2 number of blocks + * log2 number of blocks */ static int blkstol2(s64 nb) { @@ -3094,7 +3188,7 @@ static int blkstol2(s64 nb) /* - * NAME: dbAllocBottomUp() + * NAME: dbAllocBottomUp() * * FUNCTION: alloc the specified block range from the working block * allocation map. @@ -3103,13 +3197,13 @@ static int blkstol2(s64 nb) * at a time. * * PARAMETERS: - * ip - pointer to in-core inode; - * blkno - starting block number to be freed. - * nblocks - number of blocks to be freed. + * ip - pointer to in-core inode; + * blkno - starting block number to be freed. + * nblocks - number of blocks to be freed. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * 0 - success + * -EIO - i/o error */ int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) { @@ -3120,7 +3214,7 @@ int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; - IREAD_LOCK(ipbmap); + IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); /* block to be allocated better be within the mapsize. */ ASSERT(nblocks <= bmp->db_mapsize - blkno); @@ -3171,7 +3265,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, { int rc; int dbitno, word, rembits, nb, nwords, wbitno, agno; - s8 oldroot, *leaf; + s8 oldroot; struct dmaptree *tp = (struct dmaptree *) & dp->tree; /* save the current value of the root (i.e. maximum free string) @@ -3179,9 +3273,6 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, */ oldroot = tp->stree[ROOT]; - /* pick up a pointer to the leaves of the dmap tree */ - leaf = tp->stree + LEAFIND; - /* determine the bit number and word within the dmap of the * starting block. */ @@ -3237,7 +3328,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, } /* update the free count for this dmap */ - dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) - nblocks); + le32_add_cpu(&dp->nfree, -nblocks); /* reconstruct summary tree */ dbInitDmapTree(dp); @@ -3245,7 +3336,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, BMAP_LOCK(bmp); /* if this allocation group is completely free, - * update the highest active allocation group number + * update the highest active allocation group number * if this allocation group is the new max. */ agno = blkno >> bmp->db_agl2size; @@ -3277,24 +3368,24 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, * NAME: dbExtendFS() * * FUNCTION: extend bmap from blkno for nblocks; - * dbExtendFS() updates bmap ready for dbAllocBottomUp(); + * dbExtendFS() updates bmap ready for dbAllocBottomUp(); * * L2 * | * L1---------------------------------L1 - * | | - * L0---------L0---------L0 L0---------L0---------L0 - * | | | | | | - * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; + * | | + * L0---------L0---------L0 L0---------L0---------L0 + * | | | | | | + * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm * - * <---old---><----------------------------extend-----------------------> + * <---old---><----------------------------extend-----------------------> */ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) { struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb); int nbperpage = sbi->nbperpage; - int i, i0 = TRUE, j, j0 = TRUE, k, n; + int i, i0 = true, j, j0 = true, k, n; s64 newsize; s64 p; struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL; @@ -3311,7 +3402,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) (long long) blkno, (long long) nblocks, (long long) newsize); /* - * initialize bmap control page. + * initialize bmap control page. * * all the data in bmap control page should exclude * the mkfs hidden dmap page. @@ -3334,7 +3425,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; /* - * reconfigure db_agfree[] + * reconfigure db_agfree[] * from old AG configuration to new AG configuration; * * coalesce contiguous k (newAGSize/oldAGSize) AGs; @@ -3348,7 +3439,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) for (i = 0, n = 0; i < agno; n++) { bmp->db_agfree[n] = 0; /* init collection point */ - /* coalesce cotiguous k AGs; */ + /* coalesce contiguous k AGs; */ for (j = 0; j < k && i < agno; j++, i++) { /* merge AGi to AGn */ bmp->db_agfree[n] += bmp->db_agfree[i]; @@ -3366,7 +3457,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) bmp->db_maxag = bmp->db_maxag / k; /* - * extend bmap + * extend bmap * * update bit maps and corresponding level control pages; * global control page db_nfree, db_agfree[agno], db_maxfreebud; @@ -3376,7 +3467,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) p = BMAPBLKNO + nbperpage; /* L2 page */ l2mp = read_metapage(ipbmap, p, PSIZE, 0); if (!l2mp) { - jfs_error(ipbmap->i_sb, "dbExtendFS: L2 page could not be read"); + jfs_error(ipbmap->i_sb, "L2 page could not be read\n"); return -EIO; } l2dcp = (struct dmapctl *) l2mp->data; @@ -3402,7 +3493,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE; l1leaf = l1dcp->stree + CTLLEAFIND + j; p = BLKTOL0(blkno, sbi->l2nbperpage); - j0 = FALSE; + j0 = false; } else { /* assign/init L1 page */ l1mp = get_metapage(ipbmap, p, PSIZE, 0); @@ -3414,7 +3505,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) /* compute start L0 */ j = 0; l1leaf = l1dcp->stree + CTLLEAFIND; - p += nbperpage; /* 1st L0 of L1.k */ + p += nbperpage; /* 1st L0 of L1.k */ } /* @@ -3436,7 +3527,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) l0leaf = l0dcp->stree + CTLLEAFIND + i; p = BLKTODMAP(blkno, sbi->l2nbperpage); - i0 = FALSE; + i0 = false; } else { /* assign/init L0 page */ l0mp = get_metapage(ipbmap, p, PSIZE, 0); @@ -3473,7 +3564,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) if (mp == NULL) goto errout; - n = min(nblocks, (s64)BPERDMAP); + n = min_t(s64, nblocks, BPERDMAP); } dp = (struct dmap *) mp->data; @@ -3495,7 +3586,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) } /* for each dmap in a L0 */ /* - * build current L0 page from its leaves, and + * build current L0 page from its leaves, and * initialize corresponding parent L1 leaf */ *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i); @@ -3519,7 +3610,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) } /* for each L0 in a L1 */ /* - * build current L1 page from its leaves, and + * build current L1 page from its leaves, and * initialize corresponding parent L2 leaf */ *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j); @@ -3541,8 +3632,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) } } /* for each L1 in a L2 */ - jfs_error(ipbmap->i_sb, - "dbExtendFS: function has not returned as expected"); + jfs_error(ipbmap->i_sb, "function has not returned as expected\n"); errout: if (l0mp) release_metapage(l0mp); @@ -3552,7 +3642,7 @@ errout: return -EIO; /* - * finalize bmap control page + * finalize bmap control page */ finalize: @@ -3571,10 +3661,10 @@ void dbFinalizeBmap(struct inode *ipbmap) int i, n; /* - * finalize bmap control page + * finalize bmap control page */ //finalize: - /* + /* * compute db_agpref: preferred ag to allocate from * (the leftmost ag with average free space in it); */ @@ -3612,23 +3702,23 @@ void dbFinalizeBmap(struct inode *ipbmap) } if (bmp->db_agpref >= bmp->db_numag) { jfs_error(ipbmap->i_sb, - "cannot find ag with average freespace"); + "cannot find ag with average freespace\n"); } } /* - * compute db_aglevel, db_agheigth, db_width, db_agstart: - * an ag is covered in aglevel dmapctl summary tree, - * at agheight level height (from leaf) with agwidth number of nodes - * each, which starts at agstart index node of the smmary tree node + * compute db_aglevel, db_agheight, db_width, db_agstart: + * an ag is covered in aglevel dmapctl summary tree, + * at agheight level height (from leaf) with agwidth number of nodes + * each, which starts at agstart index node of the smmary tree node * array; */ bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); l2nl = bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); - bmp->db_agheigth = l2nl >> 1; - bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheigth << 1)); - for (i = 5 - bmp->db_agheigth, bmp->db_agstart = 0, n = 1; i > 0; + bmp->db_agheight = l2nl >> 1; + bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1)); + for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0; i--) { bmp->db_agstart += n; n <<= 2; @@ -3639,13 +3729,13 @@ void dbFinalizeBmap(struct inode *ipbmap) /* * NAME: dbInitDmap()/ujfs_idmap_page() - * + * * FUNCTION: initialize working/persistent bitmap of the dmap page * for the specified number of blocks: - * + * * at entry, the bitmaps had been initialized as free (ZEROS); - * The number of blocks will only account for the actually - * existing blocks. Blocks which don't actually exist in + * The number of blocks will only account for the actually + * existing blocks. Blocks which don't actually exist in * the aggregate will be marked as allocated (ONES); * * PARAMETERS: @@ -3671,9 +3761,8 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) goto initTree; } } else { - dp->nblocks = - cpu_to_le32(le32_to_cpu(dp->nblocks) + nblocks); - dp->nfree = cpu_to_le32(le32_to_cpu(dp->nfree) + nblocks); + le32_add_cpu(&dp->nblocks, nblocks); + le32_add_cpu(&dp->nfree, nblocks); } /* word number containing start block number */ @@ -3681,7 +3770,7 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) /* * free the bits corresponding to the block range (ZEROS): - * note: not all bits of the first and last words may be contained + * note: not all bits of the first and last words may be contained * within the block range. */ for (r = nblocks; r > 0; r -= nb, blkno += nb) { @@ -3713,7 +3802,7 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) } /* - * mark bits following the range to be freed (non-existing + * mark bits following the range to be freed (non-existing * blocks) as allocated (ONES) */ @@ -3745,11 +3834,11 @@ static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) /* * NAME: dbInitDmapTree()/ujfs_complete_dmap() - * + * * FUNCTION: initialize summary tree of the specified dmap: * * at entry, bitmap of the dmap has been initialized; - * + * * PARAMETERS: * dp - dmap to complete * blkno - starting block number for this dmap @@ -3773,7 +3862,7 @@ static int dbInitDmapTree(struct dmap * dp) /* init each leaf from corresponding wmap word: * note: leaf is set to NOFREE(-1) if all blocks of corresponding - * bitmap word are allocated. + * bitmap word are allocated. */ cp = tp->stree + le32_to_cpu(tp->leafidx); for (i = 0; i < LPERDMAP; i++) @@ -3786,10 +3875,10 @@ static int dbInitDmapTree(struct dmap * dp) /* * NAME: dbInitTree()/ujfs_adjtree() - * + * * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl. * - * at entry, the leaves of the tree has been initialized + * at entry, the leaves of the tree has been initialized * from corresponding bitmap word or root of summary tree * of the child control page; * configure binary buddy system at the leaf level, then @@ -3817,15 +3906,15 @@ static int dbInitTree(struct dmaptree * dtp) /* * configure the leaf levevl into binary buddy system * - * Try to combine buddies starting with a buddy size of 1 - * (i.e. two leaves). At a buddy size of 1 two buddy leaves - * can be combined if both buddies have a maximum free of l2min; - * the combination will result in the left-most buddy leaf having - * a maximum free of l2min+1. - * After processing all buddies for a given size, process buddies - * at the next higher buddy size (i.e. current size * 2) and - * the next maximum free (current free + 1). - * This continues until the maximum possible buddy combination + * Try to combine buddies starting with a buddy size of 1 + * (i.e. two leaves). At a buddy size of 1 two buddy leaves + * can be combined if both buddies have a maximum free of l2min; + * the combination will result in the left-most buddy leaf having + * a maximum free of l2min+1. + * After processing all buddies for a given size, process buddies + * at the next higher buddy size (i.e. current size * 2) and + * the next maximum free (current free + 1). + * This continues until the maximum possible buddy combination * yields maximum free. */ for (l2free = dtp->budmin, bsize = 1; l2free < l2max; @@ -3849,10 +3938,10 @@ static int dbInitTree(struct dmaptree * dtp) * bubble summary information of leaves up the tree. * * Starting at the leaf node level, the four nodes described by - * the higher level parent node are compared for a maximum free and - * this maximum becomes the value of the parent node. - * when all lower level nodes are processed in this fashion then - * move up to the next level (parent becomes a lower level node) and + * the higher level parent node are compared for a maximum free and + * this maximum becomes the value of the parent node. + * when all lower level nodes are processed in this fashion then + * move up to the next level (parent becomes a lower level node) and * continue the process for that level. */ for (child = le32_to_cpu(dtp->leafidx), @@ -3861,7 +3950,7 @@ static int dbInitTree(struct dmaptree * dtp) /* get index of 1st node of parent level */ parent = (child - 1) >> 2; - /* set the value of the parent node as the maximum + /* set the value of the parent node as the maximum * of the four nodes of the current level. */ for (i = 0, cp = tp + child, cp1 = tp + parent; @@ -3889,8 +3978,8 @@ static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i) dcp->budmin = L2BPERDMAP + L2LPERCTL * level; /* - * initialize the leaves of current level that were not covered - * by the specified input block range (i.e. the leaves have no + * initialize the leaves of current level that were not covered + * by the specified input block range (i.e. the leaves have no * low level dmapctl or dmap). */ cp = &dcp->stree[CTLLEAFIND + i]; @@ -3904,9 +3993,9 @@ static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i) /* * NAME: dbGetL2AGSize()/ujfs_getagl2size() - * + * * FUNCTION: Determine log2(allocation group size) from aggregate size - * + * * PARAMETERS: * nblocks - Number of blocks in aggregate * @@ -3939,8 +4028,8 @@ static int dbGetL2AGSize(s64 nblocks) /* * NAME: dbMapFileSizeToMapSize() - * - * FUNCTION: compute number of blocks the block allocation map file + * + * FUNCTION: compute number of blocks the block allocation map file * can cover from the map file size; * * RETURNS: Number of blocks which can be covered by this block map file; @@ -3957,8 +4046,8 @@ static int dbGetL2AGSize(s64 nblocks) * convert number of map pages to the zero origin top dmapctl level */ #define BMAPPGTOLEV(npages) \ - (((npages) <= 3 + MAXL0PAGES) ? 0 \ - : ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) + (((npages) <= 3 + MAXL0PAGES) ? 0 : \ + ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) s64 dbMapFileSizeToMapSize(struct inode * ipbmap) { @@ -3972,7 +4061,7 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap) npages = nblocks >> JFS_SBI(sb)->l2nbperpage; level = BMAPPGTOLEV(npages); - /* At each level, accumulate the number of dmap pages covered by + /* At each level, accumulate the number of dmap pages covered by * the number of full child levels below it; * repeat for the last incomplete child level. */ @@ -3985,8 +4074,8 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap) factor = (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); complete = (u32) npages / factor; - ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL - : ((i == 1) ? LPERCTL : 1)); + ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL : + ((i == 1) ? LPERCTL : 1)); /* pages in last/incomplete child */ npages = (u32) npages % factor; @@ -3994,7 +4083,7 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap) npages--; } - /* convert the number of dmaps into the number of blocks + /* convert the number of dmaps into the number of blocks * which can be covered by the dmaps; */ nblocks = ndmaps << L2BPERDMAP; diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h index 32e25884e7e..562b9a7e431 100644 --- a/fs/jfs/jfs_dmap.h +++ b/fs/jfs/jfs_dmap.h @@ -1,18 +1,18 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2002 + * Copyright (C) International Business Machines Corp., 2000-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_DMAP @@ -27,7 +27,7 @@ #define L2LPERDMAP 8 /* l2 number of leaves per dmap tree */ #define DBWORD 32 /* # of blks covered by a map word */ #define L2DBWORD 5 /* l2 # of blks covered by a mword */ -#define BUDMIN L2DBWORD /* max free string in a map word */ +#define BUDMIN L2DBWORD /* max free string in a map word */ #define BPERDMAP (LPERDMAP * DBWORD) /* num of blks per dmap */ #define L2BPERDMAP 13 /* l2 num of blks per dmap */ #define CTLTREESIZE (1024+256+64+16+4+1) /* size of a dmapctl tree */ @@ -57,11 +57,11 @@ #define MAXMAPSIZE MAXL2SIZE /* maximum aggregate map size */ -/* +/* * determine the maximum free string for four (lower level) nodes * of the tree. */ -static __inline signed char TREEMAX(signed char *cp) +static inline signed char TREEMAX(signed char *cp) { signed char tmp1, tmp2; @@ -83,7 +83,7 @@ static __inline signed char TREEMAX(signed char *cp) * - 1 is added to account for the control page of the map. */ #define BLKTODMAP(b,s) \ - ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s)) + ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s)) /* * convert disk block number to the logical block number of the LEVEL 0 @@ -98,7 +98,7 @@ static __inline signed char TREEMAX(signed char *cp) * - 1 is added to account for the control page of the map. */ #define BLKTOL0(b,s) \ - (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s)) + (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s)) /* * convert disk block number to the logical block number of the LEVEL 1 @@ -120,9 +120,9 @@ static __inline signed char TREEMAX(signed char *cp) * at the specified level which describes the disk block. */ #define BLKTOCTL(b,s,l) \ - (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s))) + (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s))) -/* +/* * convert aggregate map size to the zero origin dmapctl level of the * top dmapctl. */ @@ -145,27 +145,27 @@ static __inline signed char TREEMAX(signed char *cp) * dmaptree must be consistent with dmapctl. */ struct dmaptree { - __le32 nleafs; /* 4: number of tree leafs */ - __le32 l2nleafs; /* 4: l2 number of tree leafs */ - __le32 leafidx; /* 4: index of first tree leaf */ - __le32 height; /* 4: height of the tree */ + __le32 nleafs; /* 4: number of tree leafs */ + __le32 l2nleafs; /* 4: l2 number of tree leafs */ + __le32 leafidx; /* 4: index of first tree leaf */ + __le32 height; /* 4: height of the tree */ s8 budmin; /* 1: min l2 tree leaf value to combine */ - s8 stree[TREESIZE]; /* TREESIZE: tree */ - u8 pad[2]; /* 2: pad to word boundary */ -}; /* - 360 - */ + s8 stree[TREESIZE]; /* TREESIZE: tree */ + u8 pad[2]; /* 2: pad to word boundary */ +}; /* - 360 - */ /* * dmap page per 8K blocks bitmap */ struct dmap { - __le32 nblocks; /* 4: num blks covered by this dmap */ - __le32 nfree; /* 4: num of free blks in this dmap */ - __le64 start; /* 8: starting blkno for this dmap */ - struct dmaptree tree; /* 360: dmap tree */ - u8 pad[1672]; /* 1672: pad to 2048 bytes */ - __le32 wmap[LPERDMAP]; /* 1024: bits of the working map */ - __le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */ -}; /* - 4096 - */ + __le32 nblocks; /* 4: num blks covered by this dmap */ + __le32 nfree; /* 4: num of free blks in this dmap */ + __le64 start; /* 8: starting blkno for this dmap */ + struct dmaptree tree; /* 360: dmap tree */ + u8 pad[1672]; /* 1672: pad to 2048 bytes */ + __le32 wmap[LPERDMAP]; /* 1024: bits of the working map */ + __le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */ +}; /* - 4096 - */ /* * disk map control page per level. @@ -173,14 +173,14 @@ struct dmap { * dmapctl must be consistent with dmaptree. */ struct dmapctl { - __le32 nleafs; /* 4: number of tree leafs */ - __le32 l2nleafs; /* 4: l2 number of tree leafs */ - __le32 leafidx; /* 4: index of the first tree leaf */ - __le32 height; /* 4: height of tree */ - s8 budmin; /* 1: minimum l2 tree leaf value */ - s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */ - u8 pad[2714]; /* 2714: pad to 4096 */ -}; /* - 4096 - */ + __le32 nleafs; /* 4: number of tree leafs */ + __le32 l2nleafs; /* 4: l2 number of tree leafs */ + __le32 leafidx; /* 4: index of the first tree leaf */ + __le32 height; /* 4: height of tree */ + s8 budmin; /* 1: minimum l2 tree leaf value */ + s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */ + u8 pad[2714]; /* 2714: pad to 4096 */ +}; /* - 4096 - */ /* * common definition for dmaptree within dmap and dmapctl @@ -192,58 +192,58 @@ typedef union dmtree { /* macros for accessing fields within dmtree */ #define dmt_nleafs t1.nleafs -#define dmt_l2nleafs t1.l2nleafs -#define dmt_leafidx t1.leafidx -#define dmt_height t1.height -#define dmt_budmin t1.budmin -#define dmt_stree t1.stree +#define dmt_l2nleafs t1.l2nleafs +#define dmt_leafidx t1.leafidx +#define dmt_height t1.height +#define dmt_budmin t1.budmin +#define dmt_stree t1.stree -/* +/* * on-disk aggregate disk allocation map descriptor. */ struct dbmap_disk { - __le64 dn_mapsize; /* 8: number of blocks in aggregate */ - __le64 dn_nfree; /* 8: num free blks in aggregate map */ - __le32 dn_l2nbperpage; /* 4: number of blks per page */ - __le32 dn_numag; /* 4: total number of ags */ - __le32 dn_maxlevel; /* 4: number of active ags */ - __le32 dn_maxag; /* 4: max active alloc group number */ - __le32 dn_agpref; /* 4: preferred alloc group (hint) */ - __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ - __le32 dn_agheigth; /* 4: height in dmapctl of the AG */ - __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ - __le32 dn_agstart; /* 4: start tree index at AG height */ - __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ - __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */ - __le64 dn_agsize; /* 8: num of blks per alloc group */ - s8 dn_maxfreebud; /* 1: max free buddy system */ - u8 pad[3007]; /* 3007: pad to 4096 */ -}; /* - 4096 - */ + __le64 dn_mapsize; /* 8: number of blocks in aggregate */ + __le64 dn_nfree; /* 8: num free blks in aggregate map */ + __le32 dn_l2nbperpage; /* 4: number of blks per page */ + __le32 dn_numag; /* 4: total number of ags */ + __le32 dn_maxlevel; /* 4: number of active ags */ + __le32 dn_maxag; /* 4: max active alloc group number */ + __le32 dn_agpref; /* 4: preferred alloc group (hint) */ + __le32 dn_aglevel; /* 4: dmapctl level holding the AG */ + __le32 dn_agheight; /* 4: height in dmapctl of the AG */ + __le32 dn_agwidth; /* 4: width in dmapctl of the AG */ + __le32 dn_agstart; /* 4: start tree index at AG height */ + __le32 dn_agl2size; /* 4: l2 num of blks per alloc group */ + __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */ + __le64 dn_agsize; /* 8: num of blks per alloc group */ + s8 dn_maxfreebud; /* 1: max free buddy system */ + u8 pad[3007]; /* 3007: pad to 4096 */ +}; /* - 4096 - */ struct dbmap { - s64 dn_mapsize; /* number of blocks in aggregate */ - s64 dn_nfree; /* num free blks in aggregate map */ - int dn_l2nbperpage; /* number of blks per page */ - int dn_numag; /* total number of ags */ - int dn_maxlevel; /* number of active ags */ - int dn_maxag; /* max active alloc group number */ - int dn_agpref; /* preferred alloc group (hint) */ - int dn_aglevel; /* dmapctl level holding the AG */ - int dn_agheigth; /* height in dmapctl of the AG */ - int dn_agwidth; /* width in dmapctl of the AG */ - int dn_agstart; /* start tree index at AG height */ - int dn_agl2size; /* l2 num of blks per alloc group */ - s64 dn_agfree[MAXAG]; /* per AG free count */ - s64 dn_agsize; /* num of blks per alloc group */ - signed char dn_maxfreebud; /* max free buddy system */ -}; /* - 4096 - */ -/* + s64 dn_mapsize; /* number of blocks in aggregate */ + s64 dn_nfree; /* num free blks in aggregate map */ + int dn_l2nbperpage; /* number of blks per page */ + int dn_numag; /* total number of ags */ + int dn_maxlevel; /* number of active ags */ + int dn_maxag; /* max active alloc group number */ + int dn_agpref; /* preferred alloc group (hint) */ + int dn_aglevel; /* dmapctl level holding the AG */ + int dn_agheight; /* height in dmapctl of the AG */ + int dn_agwidth; /* width in dmapctl of the AG */ + int dn_agstart; /* start tree index at AG height */ + int dn_agl2size; /* l2 num of blks per alloc group */ + s64 dn_agfree[MAXAG]; /* per AG free count */ + s64 dn_agsize; /* num of blks per alloc group */ + signed char dn_maxfreebud; /* max free buddy system */ +}; /* - 4096 - */ +/* * in-memory aggregate disk allocation map descriptor. */ struct bmap { struct dbmap db_bmap; /* on-disk aggregate map descriptor */ struct inode *db_ipbmap; /* ptr to aggregate map incore inode */ - struct semaphore db_bmaplock; /* aggregate map lock */ + struct mutex db_bmaplock; /* aggregate map lock */ atomic_t db_active[MAXAG]; /* count of active, open files in AG */ u32 *db_DBmap; }; @@ -255,7 +255,7 @@ struct bmap { #define db_agsize db_bmap.dn_agsize #define db_agl2size db_bmap.dn_agl2size #define db_agwidth db_bmap.dn_agwidth -#define db_agheigth db_bmap.dn_agheigth +#define db_agheight db_bmap.dn_agheight #define db_agstart db_bmap.dn_agstart #define db_numag db_bmap.dn_numag #define db_maxlevel db_bmap.dn_maxlevel @@ -311,4 +311,6 @@ extern int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks); extern int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks); extern void dbFinalizeBmap(struct inode *ipbmap); extern s64 dbMapFileSizeToMapSize(struct inode *ipbmap); +extern s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen); + #endif /* _H_JFS_DMAP */ diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 404f33eae50..984c2bbf4f6 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -78,7 +78,7 @@ * * case-insensitive search: * - * fold search key; + * fold search key; * * case-insensitive search of B-tree: * for internal entry, router key is already folded; @@ -93,7 +93,7 @@ * else * return no match; * - * serialization: + * serialization: * target directory inode lock is being held on entry/exit * of all main directory service routines. * @@ -102,6 +102,7 @@ #include <linux/fs.h> #include <linux/quotaops.h> +#include <linux/slab.h> #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" @@ -123,21 +124,21 @@ struct dtsplit { #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) /* get page buffer for specified block address */ -#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ -{\ - BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\ - if (!(RC))\ - {\ - if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\ - ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\ - {\ - BT_PUTPAGE(MP);\ - jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\ - MP = NULL;\ - RC = -EIO;\ - }\ - }\ -} +#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ +do { \ + BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot); \ + if (!(RC)) { \ + if (((P)->header.nextindex > \ + (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \ + ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) { \ + BT_PUTPAGE(MP); \ + jfs_error((IP)->i_sb, \ + "DT_GETPAGE: dtree page corrupt\n"); \ + MP = NULL; \ + RC = -EIO; \ + } \ + } \ +} while (0) /* for consistency */ #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) @@ -284,11 +285,11 @@ static struct dir_table_slot *find_index(struct inode *ip, u32 index, release_metapage(*mp); *mp = NULL; } - if (*mp == 0) { + if (!(*mp)) { *lblock = blkno; *mp = read_index_page(ip, blkno); } - if (*mp == 0) { + if (!(*mp)) { jfs_err("free_index: error reading directory table"); return NULL; } @@ -315,8 +316,8 @@ static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp, lv = &llck->lv[llck->index]; /* - * Linelock slot size is twice the size of directory table - * slot size. 512 entries per page. + * Linelock slot size is twice the size of directory table + * slot size. 512 entries per page. */ lv->offset = ((index - 2) & 511) >> 1; lv->length = 1; @@ -381,10 +382,10 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) * It's time to move the inline table to an external * page and begin to build the xtree */ - if (DQUOT_ALLOC_BLOCK(ip, sbi->nbperpage)) + if (dquot_alloc_block(ip, sbi->nbperpage)) goto clean_up; if (dbAlloc(ip, 0, sbi->nbperpage, &xaddr)) { - DQUOT_FREE_BLOCK(ip, sbi->nbperpage); + dquot_free_block(ip, sbi->nbperpage); goto clean_up; } @@ -408,12 +409,13 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) memcpy(&jfs_ip->i_dirtable, temp_table, sizeof (temp_table)); dbFree(ip, xaddr, sbi->nbperpage); - DQUOT_FREE_BLOCK(ip, sbi->nbperpage); + dquot_free_block(ip, sbi->nbperpage); goto clean_up; } ip->i_size = PSIZE; - if ((mp = get_index_page(ip, 0)) == 0) { + mp = get_index_page(ip, 0); + if (!mp) { jfs_err("add_index: get_metapage failed!"); xtTruncate(tid, ip, 0, COMMIT_PWMAP); memcpy(&jfs_ip->i_dirtable, temp_table, @@ -461,7 +463,7 @@ static u32 add_index(tid_t tid, struct inode *ip, s64 bn, int slot) } else mp = read_index_page(ip, blkno); - if (mp == 0) { + if (!mp) { jfs_err("add_index: get/read_metapage failed!"); goto clean_up; } @@ -499,7 +501,7 @@ static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next) dirtab_slot = find_index(ip, index, &mp, &lblock); - if (dirtab_slot == 0) + if (!dirtab_slot) return; dirtab_slot->flag = DIR_INDEX_FREE; @@ -520,13 +522,13 @@ static void free_index(tid_t tid, struct inode *ip, u32 index, u32 next) * Changes an entry in the directory index table */ static void modify_index(tid_t tid, struct inode *ip, u32 index, s64 bn, - int slot, struct metapage ** mp, u64 *lblock) + int slot, struct metapage ** mp, s64 *lblock) { struct dir_table_slot *dirtab_slot; dirtab_slot = find_index(ip, index, mp, lblock); - if (dirtab_slot == 0) + if (!dirtab_slot) return; DTSaddress(dirtab_slot, bn); @@ -552,7 +554,7 @@ static int read_index(struct inode *ip, u32 index, struct dir_table_slot *slot; slot = find_index(ip, index, &mp, &lblock); - if (slot == 0) { + if (!slot) { return -EIO; } @@ -592,10 +594,8 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, struct component_name ciKey; struct super_block *sb = ip->i_sb; - ciKey.name = - (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), - GFP_NOFS); - if (ciKey.name == 0) { + ciKey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), GFP_NOFS); + if (!ciKey.name) { rc = -ENOMEM; goto dtSearch_Exit2; } @@ -615,7 +615,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, btstack->nsplit = 1; /* - * search down tree from root: + * search down tree from root: * * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of * internal page, child page Pi contains entry with k, Ki <= K < Kj. @@ -659,7 +659,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, } if (cmp == 0) { /* - * search hit + * search hit */ /* search hit - leaf page: * return the entry found @@ -723,7 +723,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, } /* - * search miss + * search miss * * base is the smallest index with key (Kj) greater than * search key (K) and may be zero or (maxindex + 1) index. @@ -773,10 +773,10 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, getChild: /* update max. number of pages to split */ if (BT_STACK_FULL(btstack)) { - /* Something's corrupted, mark filesytem dirty so + /* Something's corrupted, mark filesystem dirty so * chkdsk will fix it. */ - jfs_error(sb, "stack overrun in dtSearch!"); + jfs_error(sb, "stack overrun!\n"); BT_STACK_DUMP(btstack); rc = -EIO; goto out; @@ -834,7 +834,7 @@ int dtInsert(tid_t tid, struct inode *ip, struct lv *lv; /* - * retrieve search result + * retrieve search result * * dtSearch() returns (leaf page pinned, index at which to insert). * n.b. dtSearch() may return index of (maxindex + 1) of @@ -843,7 +843,7 @@ int dtInsert(tid_t tid, struct inode *ip, DT_GETSEARCH(ip, btstack->top, bn, mp, p, index); /* - * insert entry for new key + * insert entry for new key */ if (DO_INDEX(ip)) { if (JFS_IP(ip)->next_index == DIREND) { @@ -860,9 +860,9 @@ int dtInsert(tid_t tid, struct inode *ip, data.leaf.ino = *fsn; /* - * leaf page does not have enough room for new entry: + * leaf page does not have enough room for new entry: * - * extend/split the leaf page; + * extend/split the leaf page; * * dtSplitUp() will insert the entry and unpin the leaf page. */ @@ -877,9 +877,9 @@ int dtInsert(tid_t tid, struct inode *ip, } /* - * leaf page does have enough room for new entry: + * leaf page does have enough room for new entry: * - * insert the new data entry into the leaf page; + * insert the new data entry into the leaf page; */ BT_MARK_DIRTY(mp, ip); /* @@ -925,7 +925,7 @@ int dtInsert(tid_t tid, struct inode *ip, * * return: 0 - success; * errno - failure; - * leaf page unpinned; + * leaf page unpinned; */ static int dtSplitUp(tid_t tid, struct inode *ip, struct dtsplit * split, struct btstack * btstack) @@ -957,23 +957,21 @@ static int dtSplitUp(tid_t tid, smp = split->mp; sp = DT_PAGE(ip, smp); - key.name = - (wchar_t *) kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), - GFP_NOFS); - if (key.name == 0) { + key.name = kmalloc((JFS_NAME_MAX + 2) * sizeof(wchar_t), GFP_NOFS); + if (!key.name) { DT_PUTPAGE(smp); rc = -ENOMEM; goto dtSplitUp_Exit; } /* - * split leaf page + * split leaf page * * The split routines insert the new entry, and * acquire txLock as appropriate. */ /* - * split root leaf page: + * split root leaf page: */ if (sp->header.flag & BT_ROOT) { /* @@ -1005,11 +1003,14 @@ static int dtSplitUp(tid_t tid, DT_PUTPAGE(smp); + if (!DO_INDEX(ip)) + ip->i_size = xlen << sbi->l2bsize; + goto freeKeyName; } /* - * extend first leaf page + * extend first leaf page * * extend the 1st extent if less than buffer page size * (dtExtendPage() reurns leaf page unpinned) @@ -1027,10 +1028,9 @@ static int dtSplitUp(tid_t tid, n = xlen; /* Allocate blocks to quota. */ - if (DQUOT_ALLOC_BLOCK(ip, n)) { - rc = -EDQUOT; + rc = dquot_alloc_block(ip, n); + if (rc) goto extendOut; - } quota_allocation += n; if ((rc = dbReAlloc(sbi->ipbmap, xaddr, (s64) xlen, @@ -1055,7 +1055,9 @@ static int dtSplitUp(tid_t tid, xaddr = addressPXD(pxd) + xlen; dbFree(ip, xaddr, (s64) n); } - } + } else if (!DO_INDEX(ip)) + ip->i_size = lengthPXD(pxd) << sbi->l2bsize; + extendOut: DT_PUTPAGE(smp); @@ -1063,7 +1065,7 @@ static int dtSplitUp(tid_t tid, } /* - * split leaf page <sp> into <sp> and a new right page <rp>. + * split leaf page <sp> into <sp> and a new right page <rp>. * * return <rp> pinned and its extent descriptor <rpxd> */ @@ -1098,6 +1100,9 @@ static int dtSplitUp(tid_t tid, goto splitOut; } + if (!DO_INDEX(ip)) + ip->i_size += PSIZE; + /* * propagate up the router entry for the leaf page just split * @@ -1303,7 +1308,7 @@ static int dtSplitUp(tid_t tid, /* Rollback quota allocation */ if (rc && quota_allocation) - DQUOT_FREE_BLOCK(ip, quota_allocation); + dquot_free_block(ip, quota_allocation); dtSplitUp_Exit: @@ -1364,9 +1369,10 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, return -EIO; /* Allocate blocks to quota. */ - if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + rc = dquot_alloc_block(ip, lengthPXD(pxd)); + if (rc) { release_metapage(rmp); - return -EDQUOT; + return rc; } jfs_info("dtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp); @@ -1425,7 +1431,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, rp->header.freecnt = rp->header.maxslot - fsi; /* - * sequential append at tail: append without split + * sequential append at tail: append without split * * If splitting the last page on a level because of appending * a entry to it (skip is maxentry), it's likely that the access is @@ -1459,7 +1465,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, } /* - * non-sequential insert (at possibly middle page) + * non-sequential insert (at possibly middle page) */ /* @@ -1500,7 +1506,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, left = 0; /* - * compute fill factor for split pages + * compute fill factor for split pages * * <nxt> traces the next entry to move to rp * <off> traces the next entry to stay in sp @@ -1543,7 +1549,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split, /* <nxt> poins to the 1st entry to move */ /* - * move entries to right page + * move entries to right page * * dtMoveEntry() initializes rp and reserves entry for insertion * @@ -1669,7 +1675,7 @@ static int dtExtendPage(tid_t tid, return (rc); /* - * extend the extent + * extend the extent */ pxdlist = split->pxdlist; pxd = &pxdlist->pxd[pxdlist->npxd]; @@ -1714,7 +1720,7 @@ static int dtExtendPage(tid_t tid, } /* - * extend the page + * extend the page */ sp->header.self = *pxd; @@ -1731,9 +1737,6 @@ static int dtExtendPage(tid_t tid, /* update buffer extent descriptor of extended page */ xlen = lengthPXD(pxd); xsize = xlen << JFS_SBI(sb)->l2bsize; -#ifdef _STILL_TO_PORT - bmSetXD(smp, xaddr, xsize); -#endif /* _STILL_TO_PORT */ /* * copy old stbl to new stbl at start of extended area @@ -1828,7 +1831,7 @@ static int dtExtendPage(tid_t tid, } /* - * update parent entry on the parent/root page + * update parent entry on the parent/root page */ /* * acquire a transaction lock on the parent/root page @@ -1890,13 +1893,14 @@ static int dtSplitRoot(tid_t tid, struct dt_lock *dtlck; struct tlock *tlck; struct lv *lv; + int rc; /* get split root page */ smp = split->mp; sp = &JFS_IP(ip)->i_dtroot; /* - * allocate/initialize a single (right) child page + * allocate/initialize a single (right) child page * * N.B. at first split, a one (or two) block to fit new entry * is allocated; at subsequent split, a full page is allocated; @@ -1914,9 +1918,10 @@ static int dtSplitRoot(tid_t tid, rp = rmp->data; /* Allocate blocks to quota. */ - if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + rc = dquot_alloc_block(ip, lengthPXD(pxd)); + if (rc) { release_metapage(rmp); - return -EDQUOT; + return rc; } BT_MARK_DIRTY(rmp, ip); @@ -1935,7 +1940,7 @@ static int dtSplitRoot(tid_t tid, rp->header.prev = 0; /* - * move in-line root page into new right page extent + * move in-line root page into new right page extent */ /* linelock header + copied entries + new stbl (1st slot) in new page */ ASSERT(dtlck->index == 0); @@ -2008,7 +2013,7 @@ static int dtSplitRoot(tid_t tid, dtInsertEntry(rp, split->index, split->key, split->data, &dtlck); /* - * reset parent/root page + * reset parent/root page * * set the 1st entry offset to 0, which force the left-most key * at any level of the tree to be less than any search key. @@ -2094,7 +2099,7 @@ int dtDelete(tid_t tid, dtpage_t *np; /* - * search for the entry to delete: + * search for the entry to delete: * * dtSearch() returns (leaf page pinned, index at which to delete). */ @@ -2245,7 +2250,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, int i; /* - * keep the root leaf page which has become empty + * keep the root leaf page which has become empty */ if (BT_IS_ROOT(fmp)) { /* @@ -2261,7 +2266,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, } /* - * free the non-root leaf page + * free the non-root leaf page */ /* * acquire a transaction lock on the page @@ -2285,13 +2290,13 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, xlen = lengthPXD(&fp->header.self); /* Free quota allocation. */ - DQUOT_FREE_BLOCK(ip, xlen); + dquot_free_block(ip, xlen); /* free/invalidate its buffer page */ discard_metapage(fmp); /* - * propagate page deletion up the directory tree + * propagate page deletion up the directory tree * * If the delete from the parent page makes it empty, * continue all the way up the tree. @@ -2361,7 +2366,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, xlen = lengthPXD(&p->header.self); /* Free quota allocation */ - DQUOT_FREE_BLOCK(ip, xlen); + dquot_free_block(ip, xlen); /* free/invalidate its buffer page */ discard_metapage(mp); @@ -2424,15 +2429,18 @@ static int dtDeleteUp(tid_t tid, struct inode *ip, break; } + if (!DO_INDEX(ip)) + ip->i_size -= PSIZE; + return 0; } #ifdef _NOTYET /* - * NAME: dtRelocate() + * NAME: dtRelocate() * - * FUNCTION: relocate dtpage (internal or leaf) of directory; - * This function is mainly used by defragfs utility. + * FUNCTION: relocate dtpage (internal or leaf) of directory; + * This function is mainly used by defragfs utility. */ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, s64 nxaddr) @@ -2460,8 +2468,8 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, xlen); /* - * 1. get the internal parent dtpage covering - * router entry for the tartget page to be relocated; + * 1. get the internal parent dtpage covering + * router entry for the tartget page to be relocated; */ rc = dtSearchNode(ip, lmxaddr, opxd, &btstack); if (rc) @@ -2472,7 +2480,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, jfs_info("dtRelocate: parent router entry validated."); /* - * 2. relocate the target dtpage + * 2. relocate the target dtpage */ /* read in the target page from src extent */ DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc); @@ -2570,9 +2578,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, /* update the buffer extent descriptor of the dtpage */ xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize; -#ifdef _STILL_TO_PORT - bmSetXD(mp, nxaddr, xsize); -#endif /* _STILL_TO_PORT */ + /* unpin the relocated page */ DT_PUTPAGE(mp); jfs_info("dtRelocate: target dtpage relocated."); @@ -2583,7 +2589,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, */ /* - * 3. acquire maplock for the source extent to be freed; + * 3. acquire maplock for the source extent to be freed; */ /* for dtpage relocation, write a LOG_NOREDOPAGE record * for the source dtpage (logredo() will init NoRedoPage @@ -2598,7 +2604,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, pxdlock->index = 1; /* - * 4. update the parent router entry for relocation; + * 4. update the parent router entry for relocation; * * acquire tlck for the parent entry covering the target dtpage; * write LOG_REDOPAGE to apply after image only; @@ -2626,7 +2632,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd, * NAME: dtSearchNode() * * FUNCTION: Search for an dtpage containing a specified address - * This function is mainly used by defragfs utility. + * This function is mainly used by defragfs utility. * * NOTE: Search result on stack, the found page is pinned at exit. * The result page must be an internal dtpage. @@ -2649,7 +2655,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd, BT_CLR(btstack); /* reset stack */ /* - * descend tree to the level with specified leftmost page + * descend tree to the level with specified leftmost page * * by convention, root bn = 0. */ @@ -2688,7 +2694,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd, } /* - * search each page at the current levevl + * search each page at the current levevl */ loop: stbl = DT_GETSTBL(p); @@ -2996,9 +3002,9 @@ static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent) * return: offset = (pn, index) of start entry * of next jfs_readdir()/dtRead() */ -int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +int jfs_readdir(struct file *file, struct dir_context *ctx) { - struct inode *ip = filp->f_dentry->d_inode; + struct inode *ip = file_inode(file); struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; int rc = 0; loff_t dtpos; /* legacy OS/2 style position */ @@ -3027,19 +3033,27 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int overflow, fix_page, page_fixed = 0; static int unique_pos = 2; /* If we can't fix broken index */ - if (filp->f_pos == DIREND) + if (ctx->pos == DIREND) return 0; if (DO_INDEX(ip)) { /* * persistent index is stored in directory entries. - * Special cases: 0 = . - * 1 = .. - * -1 = End of directory + * Special cases: 0 = . + * 1 = .. + * -1 = End of directory */ do_index = 1; - dir_index = (u32) filp->f_pos; + dir_index = (u32) ctx->pos; + + /* + * NFSv4 reserves cookies 1 and 2 for . and .. so the value + * we return to the vfs is one greater than the one we use + * internally. + */ + if (dir_index) + dir_index--; if (dir_index > 1) { struct dir_table_slot dirtab_slot; @@ -3047,25 +3061,25 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (dtEmpty(ip) || (dir_index >= JFS_IP(ip)->next_index)) { /* Stale position. Directory has shrunk */ - filp->f_pos = DIREND; + ctx->pos = DIREND; return 0; } repeat: rc = read_index(ip, dir_index, &dirtab_slot); if (rc) { - filp->f_pos = DIREND; + ctx->pos = DIREND; return rc; } if (dirtab_slot.flag == DIR_INDEX_FREE) { if (loop_count++ > JFS_IP(ip)->next_index) { jfs_err("jfs_readdir detected " "infinite loop!"); - filp->f_pos = DIREND; + ctx->pos = DIREND; return 0; } dir_index = le32_to_cpu(dirtab_slot.addr2); if (dir_index == -1) { - filp->f_pos = DIREND; + ctx->pos = DIREND; return 0; } goto repeat; @@ -3074,13 +3088,13 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) index = dirtab_slot.slot; DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); if (rc) { - filp->f_pos = DIREND; + ctx->pos = DIREND; return 0; } if (p->header.flag & BT_INTERNAL) { jfs_err("jfs_readdir: bad index table"); DT_PUTPAGE(mp); - filp->f_pos = -1; + ctx->pos = DIREND; return 0; } } else { @@ -3088,23 +3102,22 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * self "." */ - filp->f_pos = 0; - if (filldir(dirent, ".", 1, 0, ip->i_ino, - DT_DIR)) + ctx->pos = 1; + if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) return 0; } /* * parent ".." */ - filp->f_pos = 1; - if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) + ctx->pos = 2; + if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) return 0; /* * Find first entry of left-most leaf */ if (dtEmpty(ip)) { - filp->f_pos = DIREND; + ctx->pos = DIREND; return 0; } @@ -3117,28 +3130,25 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 * - * pn = index = 0: First entry "." - * pn = 0; index = 1: Second entry ".." - * pn > 0: Real entries, pn=1 -> leftmost page - * pn = index = -1: No more entries + * pn = 0; index = 1: First entry "." + * pn = 0; index = 2: Second entry ".." + * pn > 0: Real entries, pn=1 -> leftmost page + * pn = index = -1: No more entries */ - dtpos = filp->f_pos; - if (dtpos == 0) { + dtpos = ctx->pos; + if (dtpos < 2) { /* build "." entry */ - - if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino, - DT_DIR)) + ctx->pos = 1; + if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) return 0; - dtoffset->index = 1; - filp->f_pos = dtpos; + dtoffset->index = 2; + ctx->pos = dtpos; } if (dtoffset->pn == 0) { - if (dtoffset->index == 1) { + if (dtoffset->index == 2) { /* build ".." entry */ - - if (filldir(dirent, "..", 2, filp->f_pos, - PARENT(ip), DT_DIR)) + if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) return 0; } else { jfs_err("jfs_readdir called with " @@ -3146,18 +3156,18 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } dtoffset->pn = 1; dtoffset->index = 0; - filp->f_pos = dtpos; + ctx->pos = dtpos; } if (dtEmpty(ip)) { - filp->f_pos = DIREND; + ctx->pos = DIREND; return 0; } - if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) { + if ((rc = dtReadNext(ip, &ctx->pos, &btstack))) { jfs_err("jfs_readdir: unexpected rc = %d " "from dtReadNext", rc); - filp->f_pos = DIREND; + ctx->pos = DIREND; return 0; } /* get start leaf page and index */ @@ -3165,7 +3175,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* offset beyond directory eof ? */ if (bn < 0) { - filp->f_pos = DIREND; + ctx->pos = DIREND; return 0; } } @@ -3174,7 +3184,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (dirent_buf == 0) { DT_PUTPAGE(mp); jfs_warn("jfs_readdir: __get_free_page failed!"); - filp->f_pos = DIREND; + ctx->pos = DIREND; return -ENOMEM; } @@ -3227,6 +3237,12 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } jfs_dirent->position = unique_pos++; } + /* + * We add 1 to the index because we may + * use a value of 2 internally, and NFSv4 + * doesn't like that. + */ + jfs_dirent->position++; } else { jfs_dirent->position = dtpos; len = min(d_namleft, DTLHDRDATALEN_LEGACY); @@ -3246,8 +3262,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* Sanity Check */ if (d_namleft == 0) { jfs_error(ip->i_sb, - "JFS:Dtree error: ino = " - "%ld, bn=%Ld, index = %d", + "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n", (long)ip->i_ino, (long long)bn, i); @@ -3289,9 +3304,9 @@ skip_one: jfs_dirent = (struct jfs_dirent *) dirent_buf; while (jfs_dirents--) { - filp->f_pos = jfs_dirent->position; - if (filldir(dirent, jfs_dirent->name, - jfs_dirent->name_len, filp->f_pos, + ctx->pos = jfs_dirent->position; + if (!dir_emit(ctx, jfs_dirent->name, + jfs_dirent->name_len, jfs_dirent->ino, DT_UNKNOWN)) goto out; jfs_dirent = next_jfs_dirent(jfs_dirent); @@ -3303,7 +3318,7 @@ skip_one: } if (!overflow && (bn == 0)) { - filp->f_pos = DIREND; + ctx->pos = DIREND; break; } @@ -3340,7 +3355,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack) BT_CLR(btstack); /* reset stack */ /* - * descend leftmost path of the tree + * descend leftmost path of the tree * * by convention, root bn = 0. */ @@ -3367,7 +3382,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack) */ if (BT_STACK_FULL(btstack)) { DT_PUTPAGE(mp); - jfs_error(ip->i_sb, "dtReadFirst: btstack overrun"); + jfs_error(ip->i_sb, "btstack overrun\n"); BT_STACK_DUMP(btstack); return -EIO; } @@ -3756,7 +3771,7 @@ static int ciCompare(struct component_name * key, /* search key */ * across page boundary * * return: non-zero on error - * + * */ static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, int ri, struct component_name * key, int flag) @@ -3766,16 +3781,16 @@ static int ciGetLeafPrefixKey(dtpage_t * lp, int li, dtpage_t * rp, struct component_name lkey; struct component_name rkey; - lkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), + lkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), GFP_KERNEL); if (lkey.name == NULL) - return -ENOSPC; + return -ENOMEM; - rkey.name = (wchar_t *) kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), + rkey.name = kmalloc((JFS_NAME_MAX + 1) * sizeof(wchar_t), GFP_KERNEL); if (rkey.name == NULL) { kfree(lkey.name); - return -ENOSPC; + return -ENOMEM; } /* get left and right key */ @@ -4520,7 +4535,7 @@ int dtModify(tid_t tid, struct inode *ip, struct ldtentry *entry; /* - * search for the entry to modify: + * search for the entry to modify: * * dtSearch() returns (leaf page pinned, index at which to modify). */ diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h index 13e4fdf0772..fd4169e6e69 100644 --- a/fs/jfs/jfs_dtree.h +++ b/fs/jfs/jfs_dtree.h @@ -1,18 +1,18 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2002 + * Copyright (C) International Business Machines Corp., 2000-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_DTREE @@ -35,7 +35,7 @@ typedef union { /* - * entry segment/slot + * entry segment/slot * * an entry consists of type dependent head/only segment/slot and * additional segments/slots linked vi next field; @@ -74,13 +74,13 @@ struct idtentry { #define DTIHDRDATALEN 11 /* compute number of slots for entry */ -#define NDTINTERNAL(klen) ( ((4 + (klen)) + (15 - 1)) / 15 ) +#define NDTINTERNAL(klen) (DIV_ROUND_UP((4 + (klen)), 15)) /* * leaf node entry head/only segment * - * For legacy filesystems, name contains 13 wchars -- no index field + * For legacy filesystems, name contains 13 wchars -- no index field */ struct ldtentry { __le32 inumber; /* 4: 4-byte aligned */ @@ -133,7 +133,7 @@ struct dir_table_slot { ( ((s64)((dts)->addr1)) << 32 | __le32_to_cpu((dts)->addr2) ) /* compute number of slots for entry */ -#define NDTLEAF_LEGACY(klen) ( ((2 + (klen)) + (15 - 1)) / 15 ) +#define NDTLEAF_LEGACY(klen) (DIV_ROUND_UP((2 + (klen)), 15)) #define NDTLEAF NDTINTERNAL @@ -243,9 +243,6 @@ typedef union { #define JFS_REMOVE 3 #define JFS_RENAME 4 -#define DIRENTSIZ(namlen) \ - ( (sizeof(struct dirent) - 2*(JFS_NAME_MAX+1) + 2*((namlen)+1) + 3) &~ 3 ) - /* * Maximum file offset for directories. */ @@ -268,5 +265,5 @@ extern int dtDelete(tid_t tid, struct inode *ip, struct component_name * key, extern int dtModify(tid_t tid, struct inode *ip, struct component_name * key, ino_t * orig_ino, ino_t new_ino, int flag); -extern int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir); +extern int jfs_readdir(struct file *file, struct dir_context *ctx); #endif /* !_H_JFS_DTREE */ diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index 4879603daa1..2ae7d59ab10 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -34,8 +34,8 @@ static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *); #endif static s64 extRoundDown(s64 nb); -#define DPD(a) (printk("(a): %d\n",(a))) -#define DPC(a) (printk("(a): %c\n",(a))) +#define DPD(a) (printk("(a): %d\n",(a))) +#define DPC(a) (printk("(a): %c\n",(a))) #define DPL1(a) \ { \ if ((a) >> 32) \ @@ -51,19 +51,19 @@ static s64 extRoundDown(s64 nb); printk("(a): %x\n",(a) << 32); \ } -#define DPD1(a) (printk("(a): %d ",(a))) -#define DPX(a) (printk("(a): %08x\n",(a))) -#define DPX1(a) (printk("(a): %08x ",(a))) -#define DPS(a) (printk("%s\n",(a))) -#define DPE(a) (printk("\nENTERING: %s\n",(a))) -#define DPE1(a) (printk("\nENTERING: %s",(a))) -#define DPS1(a) (printk(" %s ",(a))) +#define DPD1(a) (printk("(a): %d ",(a))) +#define DPX(a) (printk("(a): %08x\n",(a))) +#define DPX1(a) (printk("(a): %08x ",(a))) +#define DPS(a) (printk("%s\n",(a))) +#define DPE(a) (printk("\nENTERING: %s\n",(a))) +#define DPE1(a) (printk("\nENTERING: %s",(a))) +#define DPS1(a) (printk(" %s ",(a))) /* * NAME: extAlloc() * - * FUNCTION: allocate an extent for a specified page range within a + * FUNCTION: allocate an extent for a specified page range within a * file. * * PARAMETERS: @@ -74,16 +74,16 @@ static s64 extRoundDown(s64 nb); * extent that is used as an allocation hint if the * xaddr of the xad is non-zero. on successful exit, * the xad describes the newly allocated extent. - * abnr - boolean_t indicating whether the newly allocated extent + * abnr - bool indicating whether the newly allocated extent * should be marked as allocated but not recorded. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. + * 0 - success + * -EIO - i/o error. + * -ENOSPC - insufficient disk resources. */ int -extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) +extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr) { struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); s64 nxlen, nxaddr, xoff, hint, xaddr = 0; @@ -94,7 +94,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) txBeginAnon(ip->i_sb); /* Avoid race with jfs_commit_inode() */ - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(ip)->commit_mutex); /* validate extent length */ if (xlen > MAXXLEN) @@ -117,7 +117,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) * following the hint extent. */ if (offsetXAD(xp) + nxlen == xoff && - abnr == ((xp->flag & XAD_NOTRECORDED) ? TRUE : FALSE)) + abnr == ((xp->flag & XAD_NOTRECORDED) ? true : false)) xaddr = hint + nxlen; /* adjust the hint to the last block of the extent */ @@ -125,8 +125,8 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) } /* allocate the disk blocks for the extent. initially, extBalloc() - * will try to allocate disk blocks for the requested size (xlen). - * if this fails (xlen contigious free blocks not avaliable), it'll + * will try to allocate disk blocks for the requested size (xlen). + * if this fails (xlen contiguous free blocks not available), it'll * try to allocate a smaller number of blocks (producing a smaller * extent), with this smaller number of blocks consisting of the * requested number of blocks rounded down to the next smaller @@ -136,21 +136,22 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) */ nxlen = xlen; if ((rc = extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) { - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); return (rc); } /* Allocate blocks to quota. */ - if (DQUOT_ALLOC_BLOCK(ip, nxlen)) { + rc = dquot_alloc_block(ip, nxlen); + if (rc) { dbFree(ip, nxaddr, (s64) nxlen); - up(&JFS_IP(ip)->commit_sem); - return -EDQUOT; + mutex_unlock(&JFS_IP(ip)->commit_mutex); + return rc; } /* determine the value of the extent flag */ - xflag = (abnr == TRUE) ? XAD_NOTRECORDED : 0; + xflag = abnr ? XAD_NOTRECORDED : 0; - /* if we can extend the hint extent to cover the current request, + /* if we can extend the hint extent to cover the current request, * extend it. otherwise, insert a new extent to * cover the current request. */ @@ -159,13 +160,13 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) else rc = xtInsert(0, ip, xflag, xoff, (int) nxlen, &nxaddr, 0); - /* if the extend or insert failed, + /* if the extend or insert failed, * free the newly allocated blocks and return the error. */ if (rc) { dbFree(ip, nxaddr, nxlen); - DQUOT_FREE_BLOCK(ip, nxlen); - up(&JFS_IP(ip)->commit_sem); + dquot_free_block(ip, nxlen); + mutex_unlock(&JFS_IP(ip)->commit_mutex); return (rc); } @@ -177,7 +178,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) mark_inode_dirty(ip); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); /* * COMMIT_SyncList flags an anonymous tlock on page that is on * sync list. @@ -192,9 +193,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) #ifdef _NOTYET /* - * NAME: extRealloc() + * NAME: extRealloc() * - * FUNCTION: extend the allocation of a file extent containing a + * FUNCTION: extend the allocation of a file extent containing a * partial back last page. * * PARAMETERS: @@ -203,15 +204,15 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) * xlen - request size of the resulting extent. * xp - pointer to an xad. on successful exit, the xad * describes the newly allocated extent. - * abnr - boolean_t indicating whether the newly allocated extent + * abnr - bool indicating whether the newly allocated extent * should be marked as allocated but not recorded. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. + * 0 - success + * -EIO - i/o error. + * -ENOSPC - insufficient disk resources. */ -int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) +int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr) { struct super_block *sb = ip->i_sb; s64 xaddr, xlen, nxaddr, delta, xoff; @@ -222,7 +223,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) /* This blocks if we are low on resources */ txBeginAnon(ip->i_sb); - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(ip)->commit_mutex); /* validate extent length */ if (nxlen > MAXXLEN) nxlen = MAXXLEN; @@ -235,7 +236,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) xoff = offsetXAD(xp); /* if the extend page is abnr and if the request is for - * the extent to be allocated and recorded, + * the extent to be allocated and recorded, * make the page allocated and recorded. */ if ((xp->flag & XAD_NOTRECORDED) && !abnr) { @@ -256,10 +257,11 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) goto exit; /* Allocat blocks to quota. */ - if (DQUOT_ALLOC_BLOCK(ip, nxlen)) { + rc = dquot_alloc_block(ip, nxlen); + if (rc) { dbFree(ip, nxaddr, (s64) nxlen); - up(&JFS_IP(ip)->commit_sem); - return -EDQUOT; + mutex_unlock(&JFS_IP(ip)->commit_mutex); + return rc; } delta = nxlen - xlen; @@ -297,7 +299,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) /* extend the extent */ if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) { dbFree(ip, xaddr + xlen, delta); - DQUOT_FREE_BLOCK(ip, nxlen); + dquot_free_block(ip, nxlen); goto exit; } } else { @@ -308,7 +310,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) */ if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) { dbFree(ip, nxaddr, nxlen); - DQUOT_FREE_BLOCK(ip, nxlen); + dquot_free_block(ip, nxlen); goto exit; } } @@ -338,16 +340,16 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) mark_inode_dirty(ip); exit: - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); return (rc); } #endif /* _NOTYET */ /* - * NAME: extHint() + * NAME: extHint() * - * FUNCTION: produce an extent allocation hint for a file offset. + * FUNCTION: produce an extent allocation hint for a file offset. * * PARAMETERS: * ip - the inode of the file. @@ -356,17 +358,18 @@ exit: * the hint. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. + * 0 - success + * -EIO - i/o error. */ int extHint(struct inode *ip, s64 offset, xad_t * xp) { struct super_block *sb = ip->i_sb; - struct xadlist xadl; - struct lxdlist lxdl; - lxd_t lxd; + int nbperpage = JFS_SBI(sb)->nbperpage; s64 prev; - int rc, nbperpage = JFS_SBI(sb)->nbperpage; + int rc = 0; + s64 xaddr; + int xlen; + int xflag; /* init the hint as "no hint provided" */ XADaddress(xp, 0); @@ -376,62 +379,47 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) */ prev = ((offset & ~POFFSET) >> JFS_SBI(sb)->l2bsize) - nbperpage; - /* if the offsets in the first page of the file, - * no hint provided. + /* if the offset is in the first page of the file, no hint provided. */ if (prev < 0) - return (0); + goto out; - /* prepare to lookup the previous page's extent info */ - lxdl.maxnlxd = 1; - lxdl.nlxd = 1; - lxdl.lxd = &lxd; - LXDoffset(&lxd, prev) - LXDlength(&lxd, nbperpage); + rc = xtLookup(ip, prev, nbperpage, &xflag, &xaddr, &xlen, 0); - xadl.maxnxad = 1; - xadl.nxad = 0; - xadl.xad = xp; - - /* perform the lookup */ - if ((rc = xtLookupList(ip, &lxdl, &xadl, 0))) - return (rc); - - /* check if not extent exists for the previous page. - * this is possible for sparse files. - */ - if (xadl.nxad == 0) { -// assert(ISSPARSE(ip)); - return (0); - } - - /* only preserve the abnr flag within the xad flags - * of the returned hint. - */ - xp->flag &= XAD_NOTRECORDED; - - if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) { - jfs_error(ip->i_sb, "extHint: corrupt xtree"); - return -EIO; - } + if ((rc == 0) && xlen) { + if (xlen != nbperpage) { + jfs_error(ip->i_sb, "corrupt xtree\n"); + rc = -EIO; + } + XADaddress(xp, xaddr); + XADlength(xp, xlen); + XADoffset(xp, prev); + /* + * only preserve the abnr flag within the xad flags + * of the returned hint. + */ + xp->flag = xflag & XAD_NOTRECORDED; + } else + rc = 0; - return (0); +out: + return (rc); } /* - * NAME: extRecord() + * NAME: extRecord() * - * FUNCTION: change a page with a file from not recorded to recorded. + * FUNCTION: change a page with a file from not recorded to recorded. * * PARAMETERS: * ip - inode of the file. * cp - cbuf of the file page. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. + * 0 - success + * -EIO - i/o error. + * -ENOSPC - insufficient disk resources. */ int extRecord(struct inode *ip, xad_t * xp) { @@ -439,21 +427,21 @@ int extRecord(struct inode *ip, xad_t * xp) txBeginAnon(ip->i_sb); - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(ip)->commit_mutex); /* update the extent */ rc = xtUpdate(0, ip, xp); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); return rc; } #ifdef _NOTYET /* - * NAME: extFill() + * NAME: extFill() * - * FUNCTION: allocate disk space for a file page that represents + * FUNCTION: allocate disk space for a file page that represents * a file hole. * * PARAMETERS: @@ -461,22 +449,22 @@ int extRecord(struct inode *ip, xad_t * xp) * cp - cbuf of the file page represent the hole. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. + * 0 - success + * -EIO - i/o error. + * -ENOSPC - insufficient disk resources. */ int extFill(struct inode *ip, xad_t * xp) { int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; - s64 blkno = offsetXAD(xp) >> ip->i_blksize; + s64 blkno = offsetXAD(xp) >> ip->i_blkbits; -// assert(ISSPARSE(ip)); +// assert(ISSPARSE(ip)); /* initialize the extent allocation hint */ XADaddress(xp, 0); /* allocate an extent to fill the hole */ - if ((rc = extAlloc(ip, nbperpage, blkno, xp, FALSE))) + if ((rc = extAlloc(ip, nbperpage, blkno, xp, false))) return (rc); assert(lengthPXD(xp) == nbperpage); @@ -489,18 +477,18 @@ int extFill(struct inode *ip, xad_t * xp) /* * NAME: extBalloc() * - * FUNCTION: allocate disk blocks to form an extent. + * FUNCTION: allocate disk blocks to form an extent. * * initially, we will try to allocate disk blocks for the - * requested size (nblocks). if this fails (nblocks - * contigious free blocks not avaliable), we'll try to allocate + * requested size (nblocks). if this fails (nblocks + * contiguous free blocks not available), we'll try to allocate * a smaller number of blocks (producing a smaller extent), with * this smaller number of blocks consisting of the requested * number of blocks rounded down to the next smaller power of 2 * number (i.e. 16 -> 8). we'll continue to round down and * retry the allocation until the number of blocks to allocate * is smaller than the number of blocks per page. - * + * * PARAMETERS: * ip - the inode of the file. * hint - disk block number to be used as an allocation hint. @@ -509,13 +497,13 @@ int extFill(struct inode *ip, xad_t * xp) * exit, this value is set to the number of blocks actually * allocated. * blkno - pointer to a block address that is filled in on successful - * return with the starting block number of the newly + * return with the starting block number of the newly * allocated block range. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. + * 0 - success + * -EIO - i/o error. + * -ENOSPC - insufficient disk resources. */ static int extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) @@ -529,8 +517,8 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) /* get the number of blocks to initially attempt to allocate. * we'll first try the number of blocks requested unless this - * number is greater than the maximum number of contigious free - * blocks in the map. in that case, we'll start off with the + * number is greater than the maximum number of contiguous free + * blocks in the map. in that case, we'll start off with the * maximum free. */ max = (s64) 1 << bmp->db_maxfreebud; @@ -580,25 +568,25 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) /* * NAME: extBrealloc() * - * FUNCTION: attempt to extend an extent's allocation. + * FUNCTION: attempt to extend an extent's allocation. * - * initially, we will try to extend the extent's allocation - * in place. if this fails, we'll try to move the extent - * to a new set of blocks. if moving the extent, we initially + * Initially, we will try to extend the extent's allocation + * in place. If this fails, we'll try to move the extent + * to a new set of blocks. If moving the extent, we initially * will try to allocate disk blocks for the requested size - * (nnew). if this fails (nnew contigious free blocks not - * avaliable), we'll try to allocate a smaller number of + * (newnblks). if this fails (new contiguous free blocks not + * available), we'll try to allocate a smaller number of * blocks (producing a smaller extent), with this smaller * number of blocks consisting of the requested number of * blocks rounded down to the next smaller power of 2 - * number (i.e. 16 -> 8). we'll continue to round down and + * number (i.e. 16 -> 8). We'll continue to round down and * retry the allocation until the number of blocks to allocate * is smaller than the number of blocks per page. - * + * * PARAMETERS: * ip - the inode of the file. - * blkno - starting block number of the extents current allocation. - * nblks - number of blocks within the extents current allocation. + * blkno - starting block number of the extents current allocation. + * nblks - number of blocks within the extents current allocation. * newnblks - pointer to a s64 value. on entry, this value is the * the new desired extent size (number of blocks). on * successful exit, this value is set to the extent's actual @@ -606,9 +594,9 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) * newblkno - the starting block number of the extents new allocation. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOSPC - insufficient disk resources. + * 0 - success + * -EIO - i/o error. + * -ENOSPC - insufficient disk resources. */ static int extBrealloc(struct inode *ip, @@ -625,7 +613,7 @@ extBrealloc(struct inode *ip, return (rc); } - /* in place extension not possible. + /* in place extension not possible. * try to move the extent to a new set of blocks. */ return (extBalloc(ip, blkno, newnblks, newblkno)); @@ -634,16 +622,16 @@ extBrealloc(struct inode *ip, /* - * NAME: extRoundDown() + * NAME: extRoundDown() * - * FUNCTION: round down a specified number of blocks to the next + * FUNCTION: round down a specified number of blocks to the next * smallest power of 2 number. * * PARAMETERS: * nb - the inode of the file. * * RETURN VALUES: - * next smallest power of 2 number. + * next smallest power of 2 number. */ static s64 extRoundDown(s64 nb) { diff --git a/fs/jfs/jfs_extent.h b/fs/jfs/jfs_extent.h index e80fc7ced87..b567e12c52d 100644 --- a/fs/jfs/jfs_extent.h +++ b/fs/jfs/jfs_extent.h @@ -1,18 +1,18 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2001 + * Copyright (C) International Business Machines Corp., 2000-2001 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_EXTENT @@ -22,10 +22,10 @@ #define INOHINT(ip) \ (addressPXD(&(JFS_IP(ip)->ixpxd)) + lengthPXD(&(JFS_IP(ip)->ixpxd)) - 1) -extern int extAlloc(struct inode *, s64, s64, xad_t *, boolean_t); +extern int extAlloc(struct inode *, s64, s64, xad_t *, bool); extern int extFill(struct inode *, xad_t *); extern int extHint(struct inode *, s64, xad_t *); -extern int extRealloc(struct inode *, s64, xad_t *, boolean_t); +extern int extRealloc(struct inode *, s64, xad_t *, bool); extern int extRecord(struct inode *, xad_t *); #endif /* _H_JFS_EXTENT */ diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index 72a5588faec..b67d64671bb 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_FILSYS @@ -21,46 +21,40 @@ /* * jfs_filsys.h * - * file system (implementation-dependent) constants + * file system (implementation-dependent) constants * - * refer to <limits.h> for system wide implementation-dependent constants + * refer to <limits.h> for system wide implementation-dependent constants */ /* * file system option (superblock flag) */ -/* mount time flag to disable journaling to disk */ -#define JFS_NOINTEGRITY 0x00000010 + +/* directory option */ +#define JFS_UNICODE 0x00000001 /* unicode name */ /* mount time flags for error handling */ -#define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */ -#define JFS_ERR_CONTINUE 0x00000004 /* continue */ -#define JFS_ERR_PANIC 0x00000008 /* panic */ +#define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */ +#define JFS_ERR_CONTINUE 0x00000004 /* continue */ +#define JFS_ERR_PANIC 0x00000008 /* panic */ +/* Quota support */ #define JFS_USRQUOTA 0x00000010 #define JFS_GRPQUOTA 0x00000020 -/* platform option (conditional compilation) */ -#define JFS_AIX 0x80000000 /* AIX support */ -/* POSIX name/directory support */ - -#define JFS_OS2 0x40000000 /* OS/2 support */ -/* case-insensitive name/directory support */ - -#define JFS_DFS 0x20000000 /* DCE DFS LFS support */ - -#define JFS_LINUX 0x10000000 /* Linux support */ -/* case-sensitive name/directory support */ +/* mount time flag to disable journaling to disk */ +#define JFS_NOINTEGRITY 0x00000040 -/* directory option */ -#define JFS_UNICODE 0x00000001 /* unicode name */ +/* mount time flag to enable TRIM to ssd disks */ +#define JFS_DISCARD 0x00000080 /* commit option */ #define JFS_COMMIT 0x00000f00 /* commit option mask */ #define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */ #define JFS_LAZYCOMMIT 0x00000200 /* lazy commit */ -#define JFS_TMPFS 0x00000400 /* temporary file system - +#define JFS_TMPFS 0x00000400 /* temporary file system - * do not log/commit: + * Never implemented */ /* log logical volume option */ @@ -74,16 +68,24 @@ #define JFS_SPARSE 0x00020000 /* sparse regular file */ /* DASD Limits F226941 */ -#define JFS_DASD_ENABLED 0x00040000 /* DASD limits enabled */ -#define JFS_DASD_PRIME 0x00080000 /* Prime DASD usage on boot */ +#define JFS_DASD_ENABLED 0x00040000 /* DASD limits enabled */ +#define JFS_DASD_PRIME 0x00080000 /* Prime DASD usage on boot */ /* big endian flag */ -#define JFS_SWAP_BYTES 0x00100000 /* running on big endian computer */ +#define JFS_SWAP_BYTES 0x00100000 /* running on big endian computer */ /* Directory index */ -#define JFS_DIR_INDEX 0x00200000 /* Persistant index for */ - /* directory entries */ +#define JFS_DIR_INDEX 0x00200000 /* Persistent index for */ +/* platform options */ +#define JFS_LINUX 0x10000000 /* Linux support */ +#define JFS_DFS 0x20000000 /* DCE DFS LFS support */ +/* Never implemented */ + +#define JFS_OS2 0x40000000 /* OS/2 support */ +/* case-insensitive name/directory support */ + +#define JFS_AIX 0x80000000 /* AIX support */ /* * buffer cache configuration @@ -113,10 +115,10 @@ #define IDATASIZE 256 /* inode inline data size */ #define IXATTRSIZE 128 /* inode inline extended attribute size */ -#define XTPAGE_SIZE 4096 -#define log2_PAGESIZE 12 +#define XTPAGE_SIZE 4096 +#define log2_PAGESIZE 12 -#define IAG_SIZE 4096 +#define IAG_SIZE 4096 #define IAG_EXTENT_SIZE 4096 #define INOSPERIAG 4096 /* number of disk inodes per iag */ #define L2INOSPERIAG 12 /* l2 number of disk inodes per iag */ @@ -196,7 +198,7 @@ * followed by 1st extent of map */ #define AITBL_OFF (AIMAP_OFF + (SIZE_OF_MAP_PAGE << 1)) - /* + /* * 1st extent of aggregate inode table */ #define SUPER2_OFF (AITBL_OFF + INODE_EXTENT_SIZE) @@ -270,13 +272,13 @@ */ #define FM_CLEAN 0x00000000 /* file system is unmounted and clean */ #define FM_MOUNT 0x00000001 /* file system is mounted cleanly */ -#define FM_DIRTY 0x00000002 /* file system was not unmounted and clean - * when mounted or +#define FM_DIRTY 0x00000002 /* file system was not unmounted and clean + * when mounted or * commit failure occurred while being mounted: - * fsck() must be run to repair + * fsck() must be run to repair */ #define FM_LOGREDO 0x00000004 /* log based recovery (logredo()) failed: - * fsck() must be run to repair + * fsck() must be run to repair */ #define FM_EXTENDFS 0x00000008 /* file system extendfs() in progress */ diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 28201b194f5..f321986e73d 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -45,6 +45,7 @@ #include <linux/buffer_head.h> #include <linux/pagemap.h> #include <linux/quotaops.h> +#include <linux/slab.h> #include "jfs_incore.h" #include "jfs_inode.h" @@ -57,29 +58,23 @@ #include "jfs_debug.h" /* - * __mark_inode_dirty expects inodes to be hashed. Since we don't want - * special inodes in the fileset inode space, we hash them to a dummy head - */ -static HLIST_HEAD(aggregate_hash); - -/* * imap locks */ /* iag free list lock */ -#define IAGFREE_LOCK_INIT(imap) init_MUTEX(&imap->im_freelock) -#define IAGFREE_LOCK(imap) down(&imap->im_freelock) -#define IAGFREE_UNLOCK(imap) up(&imap->im_freelock) +#define IAGFREE_LOCK_INIT(imap) mutex_init(&imap->im_freelock) +#define IAGFREE_LOCK(imap) mutex_lock(&imap->im_freelock) +#define IAGFREE_UNLOCK(imap) mutex_unlock(&imap->im_freelock) /* per ag iag list locks */ -#define AG_LOCK_INIT(imap,index) init_MUTEX(&(imap->im_aglock[index])) -#define AG_LOCK(imap,agno) down(&imap->im_aglock[agno]) -#define AG_UNLOCK(imap,agno) up(&imap->im_aglock[agno]) +#define AG_LOCK_INIT(imap,index) mutex_init(&(imap->im_aglock[index])) +#define AG_LOCK(imap,agno) mutex_lock(&imap->im_aglock[agno]) +#define AG_UNLOCK(imap,agno) mutex_unlock(&imap->im_aglock[agno]) /* * forward references */ -static int diAllocAG(struct inomap *, int, boolean_t, struct inode *); -static int diAllocAny(struct inomap *, int, boolean_t, struct inode *); +static int diAllocAG(struct inomap *, int, bool, struct inode *); +static int diAllocAny(struct inomap *, int, bool, struct inode *); static int diAllocBit(struct inomap *, struct iag *, int); static int diAllocExt(struct inomap *, int, struct inode *); static int diAllocIno(struct inomap *, int, struct inode *); @@ -93,21 +88,21 @@ static int copy_from_dinode(struct dinode *, struct inode *); static void copy_to_dinode(struct dinode *, struct inode *); /* - * NAME: diMount() + * NAME: diMount() * - * FUNCTION: initialize the incore inode map control structures for + * FUNCTION: initialize the incore inode map control structures for * a fileset or aggregate init time. * - * the inode map's control structure (dinomap) is - * brought in from disk and placed in virtual memory. + * the inode map's control structure (dinomap) is + * brought in from disk and placed in virtual memory. * * PARAMETERS: - * ipimap - pointer to inode map inode for the aggregate or fileset. + * ipimap - pointer to inode map inode for the aggregate or fileset. * * RETURN VALUES: - * 0 - success - * -ENOMEM - insufficient free virtual memory. - * -EIO - i/o error. + * 0 - success + * -ENOMEM - insufficient free virtual memory. + * -EIO - i/o error. */ int diMount(struct inode *ipimap) { @@ -120,7 +115,7 @@ int diMount(struct inode *ipimap) * allocate/initialize the in-memory inode map control structure */ /* allocate the in-memory inode map control structure. */ - imap = (struct inomap *) kmalloc(sizeof(struct inomap), GFP_KERNEL); + imap = kmalloc(sizeof(struct inomap), GFP_KERNEL); if (imap == NULL) { jfs_err("diMount: kmalloc returned NULL!"); return -ENOMEM; @@ -180,18 +175,18 @@ int diMount(struct inode *ipimap) /* - * NAME: diUnmount() + * NAME: diUnmount() * - * FUNCTION: write to disk the incore inode map control structures for + * FUNCTION: write to disk the incore inode map control structures for * a fileset or aggregate at unmount time. * * PARAMETERS: - * ipimap - pointer to inode map inode for the aggregate or fileset. + * ipimap - pointer to inode map inode for the aggregate or fileset. * * RETURN VALUES: - * 0 - success - * -ENOMEM - insufficient free virtual memory. - * -EIO - i/o error. + * 0 - success + * -ENOMEM - insufficient free virtual memory. + * -EIO - i/o error. */ int diUnmount(struct inode *ipimap, int mounterror) { @@ -265,8 +260,7 @@ int diSync(struct inode *ipimap) /* * write out dirty pages of imap */ - filemap_fdatawrite(ipimap->i_mapping); - filemap_fdatawait(ipimap->i_mapping); + filemap_write_and_wait(ipimap->i_mapping); diWriteSpecial(ipimap, 0); @@ -275,22 +269,22 @@ int diSync(struct inode *ipimap) /* - * NAME: diRead() + * NAME: diRead() * - * FUNCTION: initialize an incore inode from disk. + * FUNCTION: initialize an incore inode from disk. * * on entry, the specifed incore inode should itself * specify the disk inode number corresponding to the * incore inode (i.e. i_number should be initialized). - * + * * this routine handles incore inode initialization for * both "special" and "regular" inodes. special inodes * are those required early in the mount process and - * require special handling since much of the file system + * require special handling since much of the file system * is not yet initialized. these "special" inodes are * identified by a NULL inode map inode pointer and are * actually initialized by a call to diReadSpecial(). - * + * * for regular inodes, the iag describing the disk inode * is read from disk to determine the inode extent address * for the disk inode. with the inode extent address in @@ -299,13 +293,13 @@ int diSync(struct inode *ipimap) * incore inode. * * PARAMETERS: - * ip - pointer to incore inode to be initialized from disk. + * ip - pointer to incore inode to be initialized from disk. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. - * -ENOMEM - insufficient memory - * + * 0 - success + * -EIO - i/o error. + * -ENOMEM - insufficient memory + * */ int diRead(struct inode *ip) { @@ -319,7 +313,7 @@ int diRead(struct inode *ip) struct inomap *imap; int block_offset; int inodes_left; - uint pageno; + unsigned long pageno; int rel_inode; jfs_info("diRead: ino = %ld", ip->i_ino); @@ -332,7 +326,7 @@ int diRead(struct inode *ip) /* read the iag */ imap = JFS_IP(ipimap)->i_imap; - IREAD_LOCK(ipimap); + IREAD_LOCK(ipimap, RDWRLOCK_IMAP); rc = diIAGRead(imap, iagno, &mp); IREAD_UNLOCK(ipimap); if (rc) { @@ -382,17 +376,17 @@ int diRead(struct inode *ip) /* read the page of disk inode */ mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); - if (mp == 0) { + if (!mp) { jfs_err("diRead: read_metapage failed"); return -EIO; } - /* locate the the disk inode requested */ + /* locate the disk inode requested */ dp = (struct dinode *) mp->data; dp += rel_inode; if (ip->i_ino != le32_to_cpu(dp->di_number)) { - jfs_error(ip->i_sb, "diRead: i_ino != di_number"); + jfs_error(ip->i_sb, "i_ino != di_number\n"); rc = -EIO; } else if (le32_to_cpu(dp->di_nlink) == 0) rc = -ESTALE; @@ -403,7 +397,7 @@ int diRead(struct inode *ip) release_metapage(mp); /* set the ag for the inode */ - JFS_IP(ip)->agno = BLKTOAG(agstart, sbi); + JFS_IP(ip)->agstart = agstart; JFS_IP(ip)->active_ag = -1; return (rc); @@ -411,26 +405,26 @@ int diRead(struct inode *ip) /* - * NAME: diReadSpecial() + * NAME: diReadSpecial() * - * FUNCTION: initialize a 'special' inode from disk. + * FUNCTION: initialize a 'special' inode from disk. * * this routines handles aggregate level inodes. The * inode cache cannot differentiate between the * aggregate inodes and the filesystem inodes, so we * handle these here. We don't actually use the aggregate - * inode map, since these inodes are at a fixed location + * inode map, since these inodes are at a fixed location * and in some cases the aggregate inode map isn't initialized * yet. * * PARAMETERS: - * sb - filesystem superblock + * sb - filesystem superblock * inum - aggregate inode number * secondary - 1 if secondary aggregate inode table * * RETURN VALUES: - * new inode - success - * NULL - i/o error. + * new inode - success + * NULL - i/o error. */ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) { @@ -463,7 +457,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) /* read the page of fixed disk inode (AIT) in raw mode */ mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1); if (mp == NULL) { - ip->i_nlink = 1; /* Don't want iput() deleting it */ + set_nlink(ip, 1); /* Don't want iput() deleting it */ iput(ip); return (NULL); } @@ -475,7 +469,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) /* copy on-disk inode to in-memory inode */ if ((copy_from_dinode(dp, ip)) != 0) { /* handle bad return by returning NULL for ip */ - ip->i_nlink = 1; /* Don't want iput() deleting it */ + set_nlink(ip, 1); /* Don't want iput() deleting it */ iput(ip); /* release the page */ release_metapage(mp); @@ -497,18 +491,24 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) /* release the page */ release_metapage(mp); - hlist_add_head(&ip->i_hash, &aggregate_hash); + /* + * __mark_inode_dirty expects inodes to be hashed. Since we don't + * want special inodes in the fileset inode space, we make them + * appear hashed, but do not put on any lists. hlist_del() + * will work fine and require no locking. + */ + hlist_add_fake(&ip->i_hash); return (ip); } /* - * NAME: diWriteSpecial() + * NAME: diWriteSpecial() * - * FUNCTION: Write the special inode to disk + * FUNCTION: Write the special inode to disk * * PARAMETERS: - * ip - special inode + * ip - special inode * secondary - 1 if secondary aggregate inode table * * RETURN VALUES: none @@ -555,9 +555,9 @@ void diWriteSpecial(struct inode *ip, int secondary) } /* - * NAME: diFreeSpecial() + * NAME: diFreeSpecial() * - * FUNCTION: Free allocated space for special inode + * FUNCTION: Free allocated space for special inode */ void diFreeSpecial(struct inode *ip) { @@ -565,8 +565,7 @@ void diFreeSpecial(struct inode *ip) jfs_err("diFreeSpecial called with NULL ip!"); return; } - filemap_fdatawrite(ip->i_mapping); - filemap_fdatawait(ip->i_mapping); + filemap_write_and_wait(ip->i_mapping); truncate_inode_pages(ip->i_mapping, 0); iput(ip); } @@ -574,9 +573,9 @@ void diFreeSpecial(struct inode *ip) /* - * NAME: diWrite() + * NAME: diWrite() * - * FUNCTION: write the on-disk inode portion of the in-memory inode + * FUNCTION: write the on-disk inode portion of the in-memory inode * to its corresponding on-disk inode. * * on entry, the specifed incore inode should itself @@ -588,14 +587,14 @@ void diFreeSpecial(struct inode *ip) * page of the extent that contains the disk inode is * read and the disk inode portion of the incore inode * is copied to the disk inode. - * + * * PARAMETERS: * tid - transacation id - * ip - pointer to incore inode to be written to the inode extent. + * ip - pointer to incore inode to be written to the inode extent. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. + * 0 - success + * -EIO - i/o error. */ int diWrite(tid_t tid, struct inode *ip) { @@ -608,7 +607,7 @@ int diWrite(tid_t tid, struct inode *ip) int block_offset; int inodes_left; struct metapage *mp; - uint pageno; + unsigned long pageno; int rel_inode; int dioffset; struct inode *ipimap; @@ -626,7 +625,7 @@ int diWrite(tid_t tid, struct inode *ip) if (!addressPXD(&(jfs_ip->ixpxd)) || (lengthPXD(&(jfs_ip->ixpxd)) != JFS_IP(ipimap)->i_imap->im_nbperiext)) { - jfs_error(ip->i_sb, "diWrite: ixpxd invalid"); + jfs_error(ip->i_sb, "ixpxd invalid\n"); return -EIO; } @@ -656,7 +655,7 @@ int diWrite(tid_t tid, struct inode *ip) /* read the page of disk inode */ retry: mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1); - if (mp == 0) + if (!mp) return -EIO; /* get the pointer to the disk inode */ @@ -678,11 +677,11 @@ int diWrite(tid_t tid, struct inode *ip) * copy btree root from in-memory inode to on-disk inode * * (tlock is taken from inline B+-tree root in in-memory - * inode when the B+-tree root is updated, which is pointed + * inode when the B+-tree root is updated, which is pointed * by jfs_ip->blid as well as being on tx tlock list) * - * further processing of btree root is based on the copy - * in in-memory inode, where txLog() will log from, and, + * further processing of btree root is based on the copy + * in in-memory inode, where txLog() will log from, and, * for xtree root, txUpdateMap() will update map and reset * XAD_NEW bit; */ @@ -732,7 +731,7 @@ int diWrite(tid_t tid, struct inode *ip) ilinelock = (struct linelock *) & tlck->lock; /* - * regular file: 16 byte (XAD slot) granularity + * regular file: 16 byte (XAD slot) granularity */ if (type & tlckXTREE) { xtpage_t *p, *xp; @@ -757,7 +756,7 @@ int diWrite(tid_t tid, struct inode *ip) xad->flag &= ~(XAD_NEW | XAD_EXTENDED); } /* - * directory: 32 byte (directory entry slot) granularity + * directory: 32 byte (directory entry slot) granularity */ else if (type & tlckDTREE) { dtpage_t *p, *xp; @@ -802,9 +801,8 @@ int diWrite(tid_t tid, struct inode *ip) } /* - * lock/copy inode base: 128 byte slot granularity + * lock/copy inode base: 128 byte slot granularity */ -// baseDinode: lv = & dilinelock->lv[dilinelock->index]; lv->offset = dioffset >> L2INODESLOTSIZE; copy_to_dinode(dp, ip); @@ -815,18 +813,7 @@ int diWrite(tid_t tid, struct inode *ip) lv->length = 1; dilinelock->index++; -#ifdef _JFS_FASTDASD - /* - * We aren't logging changes to the DASD used in directory inodes, - * but we need to write them to disk. If we don't unmount cleanly, - * mount will recalculate the DASD used. - */ - if (S_ISDIR(ip->i_mode) - && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED)) - memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd)); -#endif /* _JFS_FASTDASD */ - - /* release the buffer holding the updated on-disk inode. + /* release the buffer holding the updated on-disk inode. * the buffer will be later written by commit processing. */ write_metapage(mp); @@ -836,15 +823,15 @@ int diWrite(tid_t tid, struct inode *ip) /* - * NAME: diFree(ip) + * NAME: diFree(ip) * - * FUNCTION: free a specified inode from the inode working map + * FUNCTION: free a specified inode from the inode working map * for a fileset or aggregate. * * if the inode to be freed represents the first (only) * free inode within the iag, the iag will be placed on * the ag free inode list. - * + * * freeing the inode will cause the inode extent to be * freed if the inode is the only allocated inode within * the extent. in this case all the disk resource backing @@ -867,11 +854,11 @@ int diWrite(tid_t tid, struct inode *ip) * any updates and are held until all updates are complete. * * PARAMETERS: - * ip - inode to be freed. + * ip - inode to be freed. * * RETURN VALUES: - * 0 - success - * -EIO - i/o error. + * 0 - success + * -EIO - i/o error. */ int diFree(struct inode *ip) { @@ -900,20 +887,20 @@ int diFree(struct inode *ip) */ iagno = INOTOIAG(inum); - /* make sure that the iag is contained within + /* make sure that the iag is contained within * the map. */ if (iagno >= imap->im_nextiag) { - dump_mem("imap", imap, 32); - jfs_error(ip->i_sb, - "diFree: inum = %d, iagno = %d, nextiag = %d", + print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, + imap, 32, 0); + jfs_error(ip->i_sb, "inum = %d, iagno = %d, nextiag = %d\n", (uint) inum, iagno, imap->im_nextiag); return -EIO; } /* get the allocation group for this ino. */ - agno = JFS_IP(ip)->agno; + agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb)); /* Lock the AG specific inode map information */ @@ -922,7 +909,7 @@ int diFree(struct inode *ip) /* Obtain read lock in imap inode. Don't release it until we have * read all of the IAG's that we are going to. */ - IREAD_LOCK(ipimap); + IREAD_LOCK(ipimap, RDWRLOCK_IMAP); /* read the iag. */ @@ -942,15 +929,14 @@ int diFree(struct inode *ip) mask = HIGHORDER >> bitno; if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { - jfs_error(ip->i_sb, - "diFree: wmap shows inode already free"); + jfs_error(ip->i_sb, "wmap shows inode already free\n"); } if (!addressPXD(&iagp->inoext[extno])) { release_metapage(mp); IREAD_UNLOCK(ipimap); AG_UNLOCK(imap, agno); - jfs_error(ip->i_sb, "diFree: invalid inoext"); + jfs_error(ip->i_sb, "invalid inoext\n"); return -EIO; } @@ -962,12 +948,12 @@ int diFree(struct inode *ip) release_metapage(mp); IREAD_UNLOCK(ipimap); AG_UNLOCK(imap, agno); - jfs_error(ip->i_sb, "diFree: numfree > numinos"); + jfs_error(ip->i_sb, "numfree > numinos\n"); return -EIO; } /* - * inode extent still has some inodes or below low water mark: - * keep the inode extent; + * inode extent still has some inodes or below low water mark: + * keep the inode extent; */ if (bitmap || imap->im_agctl[agno].numfree < 96 || @@ -1015,7 +1001,7 @@ int diFree(struct inode *ip) /* update the free inode summary map for the extent if * freeing the inode means the extent will now have free - * inodes (i.e., the inode being freed is the first free + * inodes (i.e., the inode being freed is the first free * inode of extent), */ if (iagp->wmap[extno] == cpu_to_le32(ONES)) { @@ -1032,8 +1018,7 @@ int diFree(struct inode *ip) /* update the free inode counts at the iag, ag and * map level. */ - iagp->nfreeinos = - cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + 1); + le32_add_cpu(&iagp->nfreeinos, 1); imap->im_agctl[agno].numfree += 1; atomic_inc(&imap->im_numfree); @@ -1049,12 +1034,12 @@ int diFree(struct inode *ip) /* - * inode extent has become free and above low water mark: - * free the inode extent; + * inode extent has become free and above low water mark: + * free the inode extent; */ /* - * prepare to update iag list(s) (careful update step 1) + * prepare to update iag list(s) (careful update step 1) */ amp = bmp = cmp = dmp = NULL; fwd = back = -1; @@ -1082,7 +1067,7 @@ int diFree(struct inode *ip) */ if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) { /* in preparation for removing the iag from the - * ag extent free list, read the iags preceeding + * ag extent free list, read the iags preceding * and following the iag on the ag extent free * list. */ @@ -1108,7 +1093,7 @@ int diFree(struct inode *ip) int inofreefwd = le32_to_cpu(iagp->inofreefwd); /* in preparation for removing the iag from the - * ag inode free list, read the iags preceeding + * ag inode free list, read the iags preceding * and following the iag on the ag inode free * list. before reading these iags, we must make * sure that we already don't have them in hand @@ -1154,7 +1139,7 @@ int diFree(struct inode *ip) invalidate_pxd_metapages(ip, freepxd); /* - * update iag list(s) (careful update step 2) + * update iag list(s) (careful update step 2) */ /* add the iag to the ag extent free list if this is the * first free extent for the iag. @@ -1206,13 +1191,13 @@ int diFree(struct inode *ip) iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1); } - /* update the inode extent address and working map + /* update the inode extent address and working map * to reflect the free extent. - * the permanent map should have been updated already + * the permanent map should have been updated already * for the inode being freed. */ if (iagp->pmap[extno] != 0) { - jfs_error(ip->i_sb, "diFree: the pmap does not show inode free"); + jfs_error(ip->i_sb, "the pmap does not show inode free\n"); } iagp->wmap[extno] = 0; PXDlength(&iagp->inoext[extno], 0); @@ -1220,7 +1205,7 @@ int diFree(struct inode *ip) /* update the free extent and free inode summary maps * to reflect the freed extent. - * the inode summary map is marked to indicate no inodes + * the inode summary map is marked to indicate no inodes * available for the freed extent. */ sword = extno >> L2EXTSPERSUM; @@ -1232,9 +1217,8 @@ int diFree(struct inode *ip) /* update the number of free inodes and number of free extents * for the iag. */ - iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - - (INOSPEREXT - 1)); - iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) + 1); + le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1)); + le32_add_cpu(&iagp->nfreeexts, 1); /* update the number of free inodes and backed inodes * at the ag and inode map level. @@ -1257,17 +1241,17 @@ int diFree(struct inode *ip) * start transaction to update block allocation map * for the inode extent freed; * - * N.B. AG_LOCK is released and iag will be released below, and + * N.B. AG_LOCK is released and iag will be released below, and * other thread may allocate inode from/reusing the ixad freed - * BUT with new/different backing inode extent from the extent - * to be freed by the transaction; + * BUT with new/different backing inode extent from the extent + * to be freed by the transaction; */ tid = txBegin(ipimap->i_sb, COMMIT_FORCE); - down(&JFS_IP(ipimap)->commit_sem); + mutex_lock(&JFS_IP(ipimap)->commit_mutex); - /* acquire tlock of the iag page of the freed ixad + /* acquire tlock of the iag page of the freed ixad * to force the page NOHOMEOK (even though no data is - * logged from the iag page) until NOREDOPAGE|FREEXTENT log + * logged from the iag page) until NOREDOPAGE|FREEXTENT log * for the free of the extent is committed; * write FREEXTENT|NOREDOPAGE log record * N.B. linelock is overlaid as freed extent descriptor; @@ -1286,8 +1270,8 @@ int diFree(struct inode *ip) * logredo needs the IAG number and IAG extent index in order * to ensure that the IMap is consistent. The least disruptive * way to pass these values through to the transaction manager - * is in the iplist array. - * + * is in the iplist array. + * * It's not pretty, but it works. */ iplist[1] = (struct inode *) (size_t)iagno; @@ -1296,7 +1280,7 @@ int diFree(struct inode *ip) rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); txEnd(tid); - up(&JFS_IP(ipimap)->commit_sem); + mutex_unlock(&JFS_IP(ipimap)->commit_mutex); /* unlock the AG inode map information */ AG_UNLOCK(imap, agno); @@ -1329,33 +1313,32 @@ int diFree(struct inode *ip) static inline void diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) { - struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); struct jfs_inode_info *jfs_ip = JFS_IP(ip); ip->i_ino = (iagno << L2INOSPERIAG) + ino; jfs_ip->ixpxd = iagp->inoext[extno]; - jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); + jfs_ip->agstart = le64_to_cpu(iagp->agstart); jfs_ip->active_ag = -1; } /* - * NAME: diAlloc(pip,dir,ip) + * NAME: diAlloc(pip,dir,ip) * - * FUNCTION: allocate a disk inode from the inode working map + * FUNCTION: allocate a disk inode from the inode working map * for a fileset or aggregate. * * PARAMETERS: - * pip - pointer to incore inode for the parent inode. - * dir - TRUE if the new disk inode is for a directory. - * ip - pointer to a new inode + * pip - pointer to incore inode for the parent inode. + * dir - 'true' if the new disk inode is for a directory. + * ip - pointer to a new inode * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ -int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) +int diAlloc(struct inode *pip, bool dir, struct inode *ip) { int rc, ino, iagno, addext, extno, bitno, sword; int nwords, rem, i, agno; @@ -1374,10 +1357,10 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) JFS_IP(ip)->ipimap = ipimap; JFS_IP(ip)->fileset = FILESYSTEM_I; - /* for a directory, the allocation policy is to start + /* for a directory, the allocation policy is to start * at the ag level using the preferred ag. */ - if (dir == TRUE) { + if (dir) { agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap); AG_LOCK(imap, agno); goto tryag; @@ -1393,7 +1376,7 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) */ /* get the ag number of this iag */ - agno = JFS_IP(pip)->agno; + agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb)); if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) { /* @@ -1409,7 +1392,7 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) inum = pip->i_ino + 1; ino = inum & (INOSPERIAG - 1); - /* back off the the hint if it is outside of the iag */ + /* back off the hint if it is outside of the iag */ if (ino == 0) inum = pip->i_ino; @@ -1417,7 +1400,7 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) AG_LOCK(imap, agno); /* Get read lock on imap inode */ - IREAD_LOCK(ipimap); + IREAD_LOCK(ipimap, RDWRLOCK_IMAP); /* get the iag number and read the iag */ iagno = INOTOIAG(inum); @@ -1435,9 +1418,9 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts); /* - * try to allocate from the IAG + * try to allocate from the IAG */ - /* check if the inode may be allocated from the iag + /* check if the inode may be allocated from the iag * (i.e. the inode has free inodes or new extent can be added). */ if (iagp->nfreeinos || addext) { @@ -1492,7 +1475,7 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) * hint or, if appropriate (i.e. addext is true), allocate * an extent of free inodes at or following the extent * containing the hint. - * + * * the free inode and free extent summary maps are used * here, so determine the starting summary map position * and the number of words we'll have to examine. again, @@ -1508,7 +1491,7 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) /* mask any prior bits for the starting words of the * summary map. */ - mask = ONES << (EXTSPERSUM - bitno); + mask = (bitno == 0) ? 0 : (ONES << (EXTSPERSUM - bitno)); inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask; extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask; @@ -1533,9 +1516,8 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) release_metapage(mp); AG_UNLOCK(imap, agno); jfs_error(ip->i_sb, - "diAlloc: can't find free bit " - "in wmap"); - return EIO; + "can't find free bit in wmap\n"); + return -EIO; } /* determine the inode number within the @@ -1635,15 +1617,15 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) /* - * NAME: diAllocAG(imap,agno,dir,ip) + * NAME: diAllocAG(imap,agno,dir,ip) * - * FUNCTION: allocate a disk inode from the allocation group. + * FUNCTION: allocate a disk inode from the allocation group. * * this routine first determines if a new extent of free * inodes should be added for the allocation group, with * the current request satisfied from this extent. if this * is the case, an attempt will be made to do just that. if - * this attempt fails or it has been determined that a new + * this attempt fails or it has been determined that a new * extent should not be added, an attempt is made to satisfy * the request by allocating an existing (backed) free inode * from the allocation group. @@ -1651,31 +1633,31 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) * PRE CONDITION: Already have the AG lock for this AG. * * PARAMETERS: - * imap - pointer to inode map control structure. - * agno - allocation group to allocate from. - * dir - TRUE if the new disk inode is for a directory. - * ip - pointer to the new inode to be filled in on successful return + * imap - pointer to inode map control structure. + * agno - allocation group to allocate from. + * dir - 'true' if the new disk inode is for a directory. + * ip - pointer to the new inode to be filled in on successful return * with the disk inode number allocated, its extent address * and the start of the ag. * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ static int -diAllocAG(struct inomap * imap, int agno, boolean_t dir, struct inode *ip) +diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) { int rc, addext, numfree, numinos; - /* get the number of free and the number of backed disk + /* get the number of free and the number of backed disk * inodes currently within the ag. */ numfree = imap->im_agctl[agno].numfree; numinos = imap->im_agctl[agno].numinos; if (numfree > numinos) { - jfs_error(ip->i_sb, "diAllocAG: numfree > numinos"); + jfs_error(ip->i_sb, "numfree > numinos\n"); return -EIO; } @@ -1684,7 +1666,7 @@ diAllocAG(struct inomap * imap, int agno, boolean_t dir, struct inode *ip) * if there are a small number of free inodes or number of free * inodes is a small percentage of the number of backed inodes. */ - if (dir == TRUE) + if (dir) addext = (numfree < 64 || (numfree < 256 && ((numfree * 100) / numinos) <= 20)); @@ -1695,7 +1677,7 @@ diAllocAG(struct inomap * imap, int agno, boolean_t dir, struct inode *ip) * try to allocate a new extent of free inodes. */ if (addext) { - /* if free space is not avaliable for this new extent, try + /* if free space is not available for this new extent, try * below to allocate a free and existing (already backed) * inode from the ag. */ @@ -1711,9 +1693,9 @@ diAllocAG(struct inomap * imap, int agno, boolean_t dir, struct inode *ip) /* - * NAME: diAllocAny(imap,agno,dir,iap) + * NAME: diAllocAny(imap,agno,dir,iap) * - * FUNCTION: allocate a disk inode from any other allocation group. + * FUNCTION: allocate a disk inode from any other allocation group. * * this routine is called when an allocation attempt within * the primary allocation group has failed. if attempts to @@ -1721,26 +1703,26 @@ diAllocAG(struct inomap * imap, int agno, boolean_t dir, struct inode *ip) * specified primary group. * * PARAMETERS: - * imap - pointer to inode map control structure. - * agno - primary allocation group (to avoid). - * dir - TRUE if the new disk inode is for a directory. - * ip - pointer to a new inode to be filled in on successful return + * imap - pointer to inode map control structure. + * agno - primary allocation group (to avoid). + * dir - 'true' if the new disk inode is for a directory. + * ip - pointer to a new inode to be filled in on successful return * with the disk inode number allocated, its extent address * and the start of the ag. * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ static int -diAllocAny(struct inomap * imap, int agno, boolean_t dir, struct inode *ip) +diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip) { int ag, rc; int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag; - /* try to allocate from the ags following agno up to + /* try to allocate from the ags following agno up to * the maximum ag number. */ for (ag = agno + 1; ag <= maxag; ag++) { @@ -1774,29 +1756,29 @@ diAllocAny(struct inomap * imap, int agno, boolean_t dir, struct inode *ip) /* - * NAME: diAllocIno(imap,agno,ip) + * NAME: diAllocIno(imap,agno,ip) * - * FUNCTION: allocate a disk inode from the allocation group's free + * FUNCTION: allocate a disk inode from the allocation group's free * inode list, returning an error if this free list is * empty (i.e. no iags on the list). * * allocation occurs from the first iag on the list using * the iag's free inode summary map to find the leftmost - * free inode in the iag. - * + * free inode in the iag. + * * PRE CONDITION: Already have AG lock for this AG. - * + * * PARAMETERS: - * imap - pointer to inode map control structure. - * agno - allocation group. - * ip - pointer to new inode to be filled in on successful return + * imap - pointer to inode map control structure. + * agno - allocation group. + * ip - pointer to new inode to be filled in on successful return * with the disk inode number allocated, its extent address * and the start of the ag. * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) { @@ -1810,7 +1792,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) return -ENOSPC; /* obtain read lock on imap inode */ - IREAD_LOCK(imap->im_ipimap); + IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); /* read the iag at the head of the list. */ @@ -1826,8 +1808,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) if (!iagp->nfreeinos) { IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); - jfs_error(ip->i_sb, - "diAllocIno: nfreeinos = 0, but iag on freelist"); + jfs_error(ip->i_sb, "nfreeinos = 0, but iag on freelist\n"); return -EIO; } @@ -1839,7 +1820,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); jfs_error(ip->i_sb, - "diAllocIno: free inode not found in summary map"); + "free inode not found in summary map\n"); return -EIO; } @@ -1854,7 +1835,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) if (rem >= EXTSPERSUM) { IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); - jfs_error(ip->i_sb, "diAllocIno: no free extent found"); + jfs_error(ip->i_sb, "no free extent found\n"); return -EIO; } extno = (sword << L2EXTSPERSUM) + rem; @@ -1865,11 +1846,11 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) if (rem >= INOSPEREXT) { IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); - jfs_error(ip->i_sb, "diAllocIno: free inode not found"); + jfs_error(ip->i_sb, "free inode not found\n"); return -EIO; } - /* compute the inode number within the iag. + /* compute the inode number within the iag. */ ino = (extno << L2INOSPEREXT) + rem; @@ -1892,19 +1873,19 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) /* - * NAME: diAllocExt(imap,agno,ip) + * NAME: diAllocExt(imap,agno,ip) + * + * FUNCTION: add a new extent of free inodes to an iag, allocating + * an inode from this extent to satisfy the current allocation + * request. * - * FUNCTION: add a new extent of free inodes to an iag, allocating - * an inode from this extent to satisfy the current allocation - * request. - * * this routine first tries to find an existing iag with free * extents through the ag free extent list. if list is not * empty, the head of the list will be selected as the home * of the new extent of free inodes. otherwise (the list is * empty), a new iag will be allocated for the ag to contain * the extent. - * + * * once an iag has been selected, the free extent summary map * is used to locate a free extent within the iag and diNewExt() * is called to initialize the extent, with initialization @@ -1912,16 +1893,16 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) * for the purpose of satisfying this request. * * PARAMETERS: - * imap - pointer to inode map control structure. - * agno - allocation group number. - * ip - pointer to new inode to be filled in on successful return + * imap - pointer to inode map control structure. + * agno - allocation group number. + * ip - pointer to new inode to be filled in on successful return * with the disk inode number allocated, its extent address * and the start of the ag. * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) { @@ -1948,10 +1929,10 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) } else { /* read the iag. */ - IREAD_LOCK(imap->im_ipimap); + IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); if ((rc = diIAGRead(imap, iagno, &mp))) { IREAD_UNLOCK(imap->im_ipimap); - jfs_error(ip->i_sb, "diAllocExt: error reading iag"); + jfs_error(ip->i_sb, "error reading iag\n"); return rc; } iagp = (struct iag *) mp->data; @@ -1963,8 +1944,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) if (sword >= SMAPSZ) { release_metapage(mp); IREAD_UNLOCK(imap->im_ipimap); - jfs_error(ip->i_sb, - "diAllocExt: free ext summary map not found"); + jfs_error(ip->i_sb, "free ext summary map not found\n"); return -EIO; } if (~iagp->extsmap[sword]) @@ -1977,7 +1957,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) if (rem >= EXTSPERSUM) { release_metapage(mp); IREAD_UNLOCK(imap->im_ipimap); - jfs_error(ip->i_sb, "diAllocExt: free extent not found"); + jfs_error(ip->i_sb, "free extent not found\n"); return -EIO; } extno = (sword << L2EXTSPERSUM) + rem; @@ -2012,9 +1992,9 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) /* - * NAME: diAllocBit(imap,iagp,ino) + * NAME: diAllocBit(imap,iagp,ino) * - * FUNCTION: allocate a backed inode from an iag. + * FUNCTION: allocate a backed inode from an iag. * * this routine performs the mechanics of allocating a * specified inode from a backed extent. @@ -2027,19 +2007,19 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) * in the face of updates to multiple buffers. under this * approach, all required buffers are obtained before making * any updates and are held all are updates are complete. - * + * * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on * this AG. Must have read lock on imap inode. * * PARAMETERS: - * imap - pointer to inode map control structure. - * iagp - pointer to iag. - * ino - inode number to be allocated within the iag. + * imap - pointer to inode map control structure. + * iagp - pointer to iag. + * ino - inode number to be allocated within the iag. * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) { @@ -2050,7 +2030,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) /* check if this is the last free inode within the iag. * if so, it will have to be removed from the ag free - * inode list, so get the iags preceeding and following + * inode list, so get the iags preceding and following * it on the list. */ if (iagp->nfreeinos == cpu_to_le32(1)) { @@ -2096,8 +2076,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) if (bmp) release_metapage(bmp); - jfs_error(imap->im_ipimap->i_sb, - "diAllocBit: iag inconsistent"); + jfs_error(imap->im_ipimap->i_sb, "iag inconsistent\n"); return -EIO; } @@ -2137,7 +2116,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) /* update the free inode count at the iag, ag, inode * map levels. */ - iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) - 1); + le32_add_cpu(&iagp->nfreeinos, -1); imap->im_agctl[agno].numfree -= 1; atomic_dec(&imap->im_numfree); @@ -2146,11 +2125,11 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) /* - * NAME: diNewExt(imap,iagp,extno) + * NAME: diNewExt(imap,iagp,extno) * - * FUNCTION: initialize a new extent of inodes for an iag, allocating - * the first inode of the extent for use for the current - * allocation request. + * FUNCTION: initialize a new extent of inodes for an iag, allocating + * the first inode of the extent for use for the current + * allocation request. * * disk resources are allocated for the new extent of inodes * and the inodes themselves are initialized to reflect their @@ -2174,19 +2153,19 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) * buffers. under this approach, all required buffers are * obtained before making any updates and are held until all * updates are complete. - * + * * PRE CONDITION: Already have buffer lock on iagp. Already have AG lock on * this AG. Must have read lock on imap inode. * * PARAMETERS: - * imap - pointer to inode map control structure. - * iagp - pointer to iag. - * extno - extent number. + * imap - pointer to inode map control structure. + * iagp - pointer to iag. + * extno - extent number. * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. */ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) { @@ -2204,7 +2183,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) /* better have free extents. */ if (!iagp->nfreeexts) { - jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents"); + jfs_error(imap->im_ipimap->i_sb, "no free extents\n"); return -EIO; } @@ -2222,7 +2201,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) /* check if this is the last free extent within the * iag. if so, the iag must be removed from the ag - * free extent list, so get the iags preceeding and + * free extent list, so get the iags preceding and * following the iag on this list. */ if (iagp->nfreeexts == cpu_to_le32(1)) { @@ -2276,7 +2255,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) } if (ciagp == NULL) { jfs_error(imap->im_ipimap->i_sb, - "diNewExt: ciagp == NULL"); + "ciagp == NULL\n"); rc = -EIO; goto error_out; } @@ -2391,9 +2370,8 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) /* update the free inode and free extent counts for the * iag. */ - iagp->nfreeinos = cpu_to_le32(le32_to_cpu(iagp->nfreeinos) + - (INOSPEREXT - 1)); - iagp->nfreeexts = cpu_to_le32(le32_to_cpu(iagp->nfreeexts) - 1); + le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1)); + le32_add_cpu(&iagp->nfreeexts, -1); /* update the free and backed inode counts for the ag. */ @@ -2432,38 +2410,38 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) /* - * NAME: diNewIAG(imap,iagnop,agno) + * NAME: diNewIAG(imap,iagnop,agno) + * + * FUNCTION: allocate a new iag for an allocation group. * - * FUNCTION: allocate a new iag for an allocation group. - * - * first tries to allocate the iag from the inode map - * iagfree list: - * if the list has free iags, the head of the list is removed + * first tries to allocate the iag from the inode map + * iagfree list: + * if the list has free iags, the head of the list is removed * and returned to satisfy the request. * if the inode map's iag free list is empty, the inode map * is extended to hold a new iag. this new iag is initialized * and returned to satisfy the request. * * PARAMETERS: - * imap - pointer to inode map control structure. - * iagnop - pointer to an iag number set with the number of the + * imap - pointer to inode map control structure. + * iagnop - pointer to an iag number set with the number of the * newly allocated iag upon successful return. - * agno - allocation group number. + * agno - allocation group number. * bpp - Buffer pointer to be filled in with new IAG's buffer * * RETURN VALUES: - * 0 - success. - * -ENOSPC - insufficient disk resources. - * -EIO - i/o error. + * 0 - success. + * -ENOSPC - insufficient disk resources. + * -EIO - i/o error. * - * serialization: + * serialization: * AG lock held on entry/exit; * write lock on the map is held inside; * read lock on the map is held on successful completion; * - * note: new iag transaction: + * note: new iag transaction: * . synchronously write iag; - * . write log of xtree and inode of imap; + * . write log of xtree and inode of imap; * . commit; * . synchronous write of xtree (right to left, bottom to top); * . at start of logredo(): init in-memory imap with one additional iag page; @@ -2483,9 +2461,6 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) s64 xaddr = 0; s64 blkno; tid_t tid; -#ifdef _STILL_TO_PORT - xad_t xad; -#endif /* _STILL_TO_PORT */ struct inode *iplist[1]; /* pick up pointers to the inode map and mount inodes */ @@ -2496,7 +2471,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) /* acquire the free iag lock */ IAGFREE_LOCK(imap); - /* if there are any iags on the inode map free iag list, + /* if there are any iags on the inode map free iag list, * allocate the iag from the head of the list. */ if (imap->im_freeiag >= 0) { @@ -2511,18 +2486,18 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) */ /* acquire inode map lock */ - IWRITE_LOCK(ipimap); + IWRITE_LOCK(ipimap, RDWRLOCK_IMAP); if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) { IWRITE_UNLOCK(ipimap); IAGFREE_UNLOCK(imap); jfs_error(imap->im_ipimap->i_sb, - "diNewIAG: ipimap->i_size is wrong"); + "ipimap->i_size is wrong\n"); return -EIO; } - /* get the next avaliable iag number */ + /* get the next available iag number */ iagno = imap->im_nextiag; /* make sure that we have not exceeded the maximum inode @@ -2556,13 +2531,13 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) * addressing structure pointing to the new iag page; */ tid = txBegin(sb, COMMIT_FORCE); - down(&JFS_IP(ipimap)->commit_sem); + mutex_lock(&JFS_IP(ipimap)->commit_mutex); /* update the inode map addressing structure to point to it */ if ((rc = xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) { txEnd(tid); - up(&JFS_IP(ipimap)->commit_sem); + mutex_unlock(&JFS_IP(ipimap)->commit_mutex); /* Free the blocks allocated for the iag since it was * not successfully added to the inode map */ @@ -2590,6 +2565,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) txAbort(tid, 0); txEnd(tid); + mutex_unlock(&JFS_IP(ipimap)->commit_mutex); /* release the inode map lock */ IWRITE_UNLOCK(ipimap); @@ -2620,19 +2596,19 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) flush_metapage(mp); /* - * txCommit(COMMIT_FORCE) will synchronously write address - * index pages and inode after commit in careful update order + * txCommit(COMMIT_FORCE) will synchronously write address + * index pages and inode after commit in careful update order * of address index pages (right to left, bottom up); */ iplist[0] = ipimap; rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE); txEnd(tid); - up(&JFS_IP(ipimap)->commit_sem); + mutex_unlock(&JFS_IP(ipimap)->commit_mutex); duplicateIXtree(sb, blkno, xlen, &xaddr); - /* update the next avaliable iag number */ + /* update the next available iag number */ imap->im_nextiag += 1; /* Add the iag to the iag free list so we don't lose the iag @@ -2650,7 +2626,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) } /* obtain read lock on map */ - IREAD_LOCK(ipimap); + IREAD_LOCK(ipimap, RDWRLOCK_IMAP); /* read the iag */ if ((rc = diIAGRead(imap, iagno, &mp))) { @@ -2676,15 +2652,15 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) } /* - * NAME: diIAGRead() + * NAME: diIAGRead() * - * FUNCTION: get the buffer for the specified iag within a fileset + * FUNCTION: get the buffer for the specified iag within a fileset * or aggregate inode map. - * + * * PARAMETERS: - * imap - pointer to inode map control structure. - * iagno - iag number. - * bpp - point to buffer pointer to be filled in on successful + * imap - pointer to inode map control structure. + * iagno - iag number. + * bpp - point to buffer pointer to be filled in on successful * exit. * * SERIALIZATION: @@ -2693,8 +2669,8 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) * the read lock is unnecessary.) * * RETURN VALUES: - * 0 - success. - * -EIO - i/o error. + * 0 - success. + * -EIO - i/o error. */ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) { @@ -2714,17 +2690,17 @@ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp) } /* - * NAME: diFindFree() + * NAME: diFindFree() * - * FUNCTION: find the first free bit in a word starting at + * FUNCTION: find the first free bit in a word starting at * the specified bit position. * * PARAMETERS: - * word - word to be examined. - * start - starting bit position. + * word - word to be examined. + * start - starting bit position. * * RETURN VALUES: - * bit position of first free bit in the word or 32 if + * bit position of first free bit in the word or 32 if * no free bits were found. */ static int diFindFree(u32 word, int start) @@ -2742,24 +2718,24 @@ static int diFindFree(u32 word, int start) /* * NAME: diUpdatePMap() - * - * FUNCTION: Update the persistent map in an IAG for the allocation or + * + * FUNCTION: Update the persistent map in an IAG for the allocation or * freeing of the specified inode. - * + * * PRE CONDITIONS: Working map has already been updated for allocate. * * PARAMETERS: * ipimap - Incore inode map inode * inum - Number of inode to mark in permanent map - * is_free - If TRUE indicates inode should be marked freed, otherwise + * is_free - If 'true' indicates inode should be marked freed, otherwise * indicates inode should be marked allocated. * - * RETURN VALUES: + * RETURN VALUES: * 0 for success */ int diUpdatePMap(struct inode *ipimap, - unsigned long inum, boolean_t is_free, struct tblock * tblk) + unsigned long inum, bool is_free, struct tblock * tblk) { int rc; struct iag *iagp; @@ -2776,12 +2752,11 @@ diUpdatePMap(struct inode *ipimap, iagno = INOTOIAG(inum); /* make sure that the iag is contained within the map */ if (iagno >= imap->im_nextiag) { - jfs_error(ipimap->i_sb, - "diUpdatePMap: the iag is outside the map"); + jfs_error(ipimap->i_sb, "the iag is outside the map\n"); return -EIO; } /* read the iag */ - IREAD_LOCK(ipimap); + IREAD_LOCK(ipimap, RDWRLOCK_IMAP); rc = diIAGRead(imap, iagno, &mp); IREAD_UNLOCK(ipimap); if (rc) @@ -2795,24 +2770,24 @@ diUpdatePMap(struct inode *ipimap, extno = ino >> L2INOSPEREXT; bitno = ino & (INOSPEREXT - 1); mask = HIGHORDER >> bitno; - /* + /* * mark the inode free in persistent map: */ - if (is_free == TRUE) { + if (is_free) { /* The inode should have been allocated both in working * map and in persistent map; * the inode will be freed from working map at the release * of last reference release; */ if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { - jfs_error(ipimap->i_sb, - "diUpdatePMap: inode %ld not marked as " - "allocated in wmap!", inum); + jfs_error(ipimap->i_sb, + "inode %ld not marked as allocated in wmap!\n", + inum); } if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { jfs_error(ipimap->i_sb, - "diUpdatePMap: inode %ld not marked as " - "allocated in pmap!", inum); + "inode %ld not marked as allocated in pmap!\n", + inum); } /* update the bitmap for the extent of the freed inode */ iagp->pmap[extno] &= cpu_to_le32(~mask); @@ -2827,15 +2802,13 @@ diUpdatePMap(struct inode *ipimap, if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { release_metapage(mp); jfs_error(ipimap->i_sb, - "diUpdatePMap: the inode is not allocated in " - "the working map"); + "the inode is not allocated in the working map\n"); return -EIO; } if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { release_metapage(mp); jfs_error(ipimap->i_sb, - "diUpdatePMap: the inode is not free in the " - "persistent map"); + "the inode is not free in the persistent map\n"); return -EIO; } /* update the bitmap for the extent of the allocated inode */ @@ -2846,11 +2819,11 @@ diUpdatePMap(struct inode *ipimap, */ lsn = tblk->lsn; log = JFS_SBI(tblk->sb)->log; + LOGSYNC_LOCK(log, flags); if (mp->lsn != 0) { /* inherit older/smaller lsn */ logdiff(difft, lsn, log); logdiff(diffp, mp->lsn, log); - LOGSYNC_LOCK(log, flags); if (difft < diffp) { mp->lsn = lsn; /* move mp after tblock in logsync list */ @@ -2862,17 +2835,15 @@ diUpdatePMap(struct inode *ipimap, logdiff(diffp, mp->clsn, log); if (difft > diffp) mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log, flags); } else { mp->log = log; mp->lsn = lsn; /* insert mp after tblock in logsync list */ - LOGSYNC_LOCK(log, flags); log->count++; list_add(&mp->synclist, &tblk->synclist); mp->clsn = tblk->clsn; - LOGSYNC_UNLOCK(log, flags); } + LOGSYNC_UNLOCK(log, flags); write_metapage(mp); return (0); } @@ -2881,8 +2852,8 @@ diUpdatePMap(struct inode *ipimap, * diExtendFS() * * function: update imap for extendfs(); - * - * note: AG size has been increased s.t. each k old contiguous AGs are + * + * note: AG size has been increased s.t. each k old contiguous AGs are * coalesced into a new AG; */ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) @@ -2901,7 +2872,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) atomic_read(&imap->im_numfree)); /* - * reconstruct imap + * reconstruct imap * * coalesce contiguous k (newAGSize/oldAGSize) AGs; * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; @@ -2917,7 +2888,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) } /* - * process each iag page of the map. + * process each iag page of the map. * * rebuild AG Free Inode List, AG Free Inode Extent List; */ @@ -2929,21 +2900,19 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) iagp = (struct iag *) bp->data; if (le32_to_cpu(iagp->iagnum) != i) { release_metapage(bp); - jfs_error(ipimap->i_sb, - "diExtendFs: unexpected value of iagnum"); + jfs_error(ipimap->i_sb, "unexpected value of iagnum\n"); return -EIO; } /* leave free iag in the free iag list */ - if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { - release_metapage(bp); + if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) { + release_metapage(bp); continue; } - /* agstart that computes to the same ag is treated as same; */ agstart = le64_to_cpu(iagp->agstart); - /* iagp->agstart = agstart & ~(mp->db_agsize - 1); */ n = agstart >> mp->db_agl2size; + iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size); /* compute backed inodes */ numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts)) @@ -3007,8 +2976,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) if (xnuminos != atomic_read(&imap->im_numinos) || xnumfree != atomic_read(&imap->im_numfree)) { - jfs_error(ipimap->i_sb, - "diExtendFs: numinos or numfree incorrect"); + jfs_error(ipimap->i_sb, "numinos or numfree incorrect\n"); return -EIO; } @@ -3067,25 +3035,52 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno, } /* - * NAME: copy_from_dinode() + * NAME: copy_from_dinode() * - * FUNCTION: Copies inode info from disk inode to in-memory inode + * FUNCTION: Copies inode info from disk inode to in-memory inode * * RETURN VALUES: - * 0 - success - * -ENOMEM - insufficient memory + * 0 - success + * -ENOMEM - insufficient memory */ static int copy_from_dinode(struct dinode * dip, struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); jfs_ip->fileset = le32_to_cpu(dip->di_fileset); jfs_ip->mode2 = le32_to_cpu(dip->di_mode); + jfs_set_inode_flags(ip); ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff; - ip->i_nlink = le32_to_cpu(dip->di_nlink); - ip->i_uid = le32_to_cpu(dip->di_uid); - ip->i_gid = le32_to_cpu(dip->di_gid); + if (sbi->umask != -1) { + ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask); + /* For directories, add x permission if r is allowed by umask */ + if (S_ISDIR(ip->i_mode)) { + if (ip->i_mode & 0400) + ip->i_mode |= 0100; + if (ip->i_mode & 0040) + ip->i_mode |= 0010; + if (ip->i_mode & 0004) + ip->i_mode |= 0001; + } + } + set_nlink(ip, le32_to_cpu(dip->di_nlink)); + + jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid)); + if (!uid_valid(sbi->uid)) + ip->i_uid = jfs_ip->saved_uid; + else { + ip->i_uid = sbi->uid; + } + + jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid)); + if (!gid_valid(sbi->gid)) + ip->i_gid = jfs_ip->saved_gid; + else { + ip->i_gid = sbi->gid; + } + ip->i_size = le64_to_cpu(dip->di_size); ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec); ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec); @@ -3093,7 +3088,6 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec); ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec); ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec); - ip->i_blksize = ip->i_sb->s_blocksize; ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks)); ip->i_generation = le32_to_cpu(dip->di_gen); @@ -3129,28 +3123,43 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip) } /* - * NAME: copy_to_dinode() + * NAME: copy_to_dinode() * - * FUNCTION: Copies inode info from in-memory inode to disk inode + * FUNCTION: Copies inode info from in-memory inode to disk inode */ static void copy_to_dinode(struct dinode * dip, struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); + struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); dip->di_fileset = cpu_to_le32(jfs_ip->fileset); - dip->di_inostamp = cpu_to_le32(JFS_SBI(ip->i_sb)->inostamp); + dip->di_inostamp = cpu_to_le32(sbi->inostamp); dip->di_number = cpu_to_le32(ip->i_ino); dip->di_gen = cpu_to_le32(ip->i_generation); dip->di_size = cpu_to_le64(ip->i_size); dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks)); dip->di_nlink = cpu_to_le32(ip->i_nlink); - dip->di_uid = cpu_to_le32(ip->i_uid); - dip->di_gid = cpu_to_le32(ip->i_gid); + if (!uid_valid(sbi->uid)) + dip->di_uid = cpu_to_le32(i_uid_read(ip)); + else + dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns, + jfs_ip->saved_uid)); + if (!gid_valid(sbi->gid)) + dip->di_gid = cpu_to_le32(i_gid_read(ip)); + else + dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns, + jfs_ip->saved_gid)); + jfs_get_inode_flags(jfs_ip); /* * mode2 is only needed for storing the higher order bits. * Trust i_mode for the lower order ones */ - dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | ip->i_mode); + if (sbi->umask == -1) + dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) | + ip->i_mode); + else /* Leave the original permissions alone */ + dip->di_mode = cpu_to_le32(jfs_ip->mode2); + dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec); dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec); dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec); diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h index 6b59adec036..610a0e9d894 100644 --- a/fs/jfs/jfs_imap.h +++ b/fs/jfs/jfs_imap.h @@ -1,18 +1,18 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2002 + * Copyright (C) International Business Machines Corp., 2000-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_IMAP @@ -24,17 +24,17 @@ * jfs_imap.h: disk inode manager */ -#define EXTSPERIAG 128 /* number of disk inode extent per iag */ -#define IMAPBLKNO 0 /* lblkno of dinomap within inode map */ -#define SMAPSZ 4 /* number of words per summary map */ +#define EXTSPERIAG 128 /* number of disk inode extent per iag */ +#define IMAPBLKNO 0 /* lblkno of dinomap within inode map */ +#define SMAPSZ 4 /* number of words per summary map */ #define EXTSPERSUM 32 /* number of extents per summary map entry */ #define L2EXTSPERSUM 5 /* l2 number of extents per summary map */ #define PGSPERIEXT 4 /* number of 4K pages per dinode extent */ -#define MAXIAGS ((1<<20)-1) /* maximum number of iags */ -#define MAXAG 128 /* maximum number of allocation groups */ +#define MAXIAGS ((1<<20)-1) /* maximum number of iags */ +#define MAXAG 128 /* maximum number of allocation groups */ -#define AMAPSIZE 512 /* bytes in the IAG allocation maps */ -#define SMAPSIZE 16 /* bytes in the IAG summary maps */ +#define AMAPSIZE 512 /* bytes in the IAG allocation maps */ +#define SMAPSIZE 16 /* bytes in the IAG summary maps */ /* convert inode number to iag number */ #define INOTOIAG(ino) ((ino) >> L2INOSPERIAG) @@ -45,13 +45,13 @@ /* get the starting block number of the 4K page of an inode extent * that contains ino. */ -#define INOPBLK(pxd,ino,l2nbperpg) (addressPXD((pxd)) + \ +#define INOPBLK(pxd,ino,l2nbperpg) (addressPXD((pxd)) + \ ((((ino) & (INOSPEREXT-1)) >> L2INOSPERPAGE) << (l2nbperpg))) /* * inode allocation map: - * - * inode allocation map consists of + * + * inode allocation map consists of * . the inode map control page and * . inode allocation group pages (per 4096 inodes) * which are addressed by standard JFS xtree. @@ -60,31 +60,31 @@ * inode allocation group page (per 4096 inodes of an AG) */ struct iag { - __le64 agstart; /* 8: starting block of ag */ - __le32 iagnum; /* 4: inode allocation group number */ - __le32 inofreefwd; /* 4: ag inode free list forward */ - __le32 inofreeback; /* 4: ag inode free list back */ - __le32 extfreefwd; /* 4: ag inode extent free list forward */ - __le32 extfreeback; /* 4: ag inode extent free list back */ - __le32 iagfree; /* 4: iag free list */ + __le64 agstart; /* 8: starting block of ag */ + __le32 iagnum; /* 4: inode allocation group number */ + __le32 inofreefwd; /* 4: ag inode free list forward */ + __le32 inofreeback; /* 4: ag inode free list back */ + __le32 extfreefwd; /* 4: ag inode extent free list forward */ + __le32 extfreeback; /* 4: ag inode extent free list back */ + __le32 iagfree; /* 4: iag free list */ /* summary map: 1 bit per inode extent */ __le32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes; - * note: this indicates free and backed - * inodes, if the extent is not backed the - * value will be 1. if the extent is - * backed but all inodes are being used the - * value will be 1. if the extent is - * backed but at least one of the inodes is - * free the value will be 0. + * note: this indicates free and backed + * inodes, if the extent is not backed the + * value will be 1. if the extent is + * backed but all inodes are being used the + * value will be 1. if the extent is + * backed but at least one of the inodes is + * free the value will be 0. */ __le32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */ - __le32 nfreeinos; /* 4: number of free inodes */ - __le32 nfreeexts; /* 4: number of free extents */ + __le32 nfreeinos; /* 4: number of free inodes */ + __le32 nfreeexts; /* 4: number of free extents */ /* (72) */ u8 pad[1976]; /* 1976: pad to 2048 bytes */ /* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */ - __le32 wmap[EXTSPERIAG]; /* 512: working allocation map */ + __le32 wmap[EXTSPERIAG]; /* 512: working allocation map */ __le32 pmap[EXTSPERIAG]; /* 512: persistent allocation map */ pxd_t inoext[EXTSPERIAG]; /* 1024: inode extent addresses */ }; /* (4096) */ @@ -93,44 +93,44 @@ struct iag { * per AG control information (in inode map control page) */ struct iagctl_disk { - __le32 inofree; /* 4: free inode list anchor */ - __le32 extfree; /* 4: free extent list anchor */ - __le32 numinos; /* 4: number of backed inodes */ - __le32 numfree; /* 4: number of free inodes */ + __le32 inofree; /* 4: free inode list anchor */ + __le32 extfree; /* 4: free extent list anchor */ + __le32 numinos; /* 4: number of backed inodes */ + __le32 numfree; /* 4: number of free inodes */ }; /* (16) */ struct iagctl { - int inofree; /* free inode list anchor */ - int extfree; /* free extent list anchor */ - int numinos; /* number of backed inodes */ - int numfree; /* number of free inodes */ + int inofree; /* free inode list anchor */ + int extfree; /* free extent list anchor */ + int numinos; /* number of backed inodes */ + int numfree; /* number of free inodes */ }; /* * per fileset/aggregate inode map control page */ struct dinomap_disk { - __le32 in_freeiag; /* 4: free iag list anchor */ - __le32 in_nextiag; /* 4: next free iag number */ - __le32 in_numinos; /* 4: num of backed inodes */ + __le32 in_freeiag; /* 4: free iag list anchor */ + __le32 in_nextiag; /* 4: next free iag number */ + __le32 in_numinos; /* 4: num of backed inodes */ __le32 in_numfree; /* 4: num of free backed inodes */ __le32 in_nbperiext; /* 4: num of blocks per inode extent */ - __le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */ - __le32 in_diskblock; /* 4: for standalone test driver */ - __le32 in_maxag; /* 4: for standalone test driver */ - u8 pad[2016]; /* 2016: pad to 2048 */ + __le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */ + __le32 in_diskblock; /* 4: for standalone test driver */ + __le32 in_maxag; /* 4: for standalone test driver */ + u8 pad[2016]; /* 2016: pad to 2048 */ struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */ }; /* (4096) */ struct dinomap { - int in_freeiag; /* free iag list anchor */ - int in_nextiag; /* next free iag number */ - int in_numinos; /* num of backed inodes */ - int in_numfree; /* num of free backed inodes */ + int in_freeiag; /* free iag list anchor */ + int in_nextiag; /* next free iag number */ + int in_numinos; /* num of backed inodes */ + int in_numfree; /* num of free backed inodes */ int in_nbperiext; /* num of blocks per inode extent */ - int in_l2nbperiext; /* l2 of in_nbperiext */ - int in_diskblock; /* for standalone test driver */ - int in_maxag; /* for standalone test driver */ + int in_l2nbperiext; /* l2 of in_nbperiext */ + int in_diskblock; /* for standalone test driver */ + int in_maxag; /* for standalone test driver */ struct iagctl in_agctl[MAXAG]; /* AG control information */ }; @@ -139,9 +139,9 @@ struct dinomap { */ struct inomap { struct dinomap im_imap; /* 4096: inode allocation control */ - struct inode *im_ipimap; /* 4: ptr to inode for imap */ - struct semaphore im_freelock; /* 4: iag free list lock */ - struct semaphore im_aglock[MAXAG]; /* 512: per AG locks */ + struct inode *im_ipimap; /* 4: ptr to inode for imap */ + struct mutex im_freelock; /* 4: iag free list lock */ + struct mutex im_aglock[MAXAG]; /* 512: per AG locks */ u32 *im_DBGdimap; atomic_t im_numinos; /* num of backed inodes */ atomic_t im_numfree; /* num of free backed inodes */ @@ -159,11 +159,11 @@ struct inomap { #define im_maxag im_imap.in_maxag extern int diFree(struct inode *); -extern int diAlloc(struct inode *, boolean_t, struct inode *); +extern int diAlloc(struct inode *, bool, struct inode *); extern int diSync(struct inode *); /* external references */ extern int diUpdatePMap(struct inode *ipimap, unsigned long inum, - boolean_t is_free, struct tblock * tblk); + bool is_free, struct tblock * tblk); extern int diExtendFS(struct inode *ipimap, struct inode *ipbmap); extern int diMount(struct inode *); extern int diUnmount(struct inode *, int); diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index c0fd7b3eadc..cf47f09e8ac 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -4,21 +4,22 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ + */ #ifndef _H_JFS_INCORE #define _H_JFS_INCORE +#include <linux/mutex.h> #include <linux/rwsem.h> #include <linux/slab.h> #include <linux/bitops.h> @@ -37,7 +38,9 @@ struct jfs_inode_info { int fileset; /* fileset number (always 16)*/ uint mode2; /* jfs-specific mode */ - pxd_t ixpxd; /* inode extent descriptor */ + kuid_t saved_uid; /* saved for uid mount option */ + kgid_t saved_gid; /* saved for gid mount option */ + pxd_t ixpxd; /* inode extent descriptor */ dxd_t acl; /* dxd describing acl */ dxd_t ea; /* dxd describing ea */ time_t otime; /* time created */ @@ -46,9 +49,10 @@ struct jfs_inode_info { short btorder; /* access order */ short btindex; /* btpage entry index*/ struct inode *ipimap; /* inode map */ - long cflag; /* commit flags */ + unsigned long cflag; /* commit flags */ + u64 agstart; /* agstart of the containing IAG */ u16 bxflag; /* xflag of pseudo buffer? */ - unchar agno; /* ag number */ + unchar pad; signed char active_ag; /* ag currently allocating from */ lid_t blid; /* lid of pseudo buffer? */ lid_t atlhead; /* anonymous tlock list head */ @@ -58,23 +62,19 @@ struct jfs_inode_info { /* * rdwrlock serializes xtree between reads & writes and synchronizes * changes to special inodes. It's use would be redundant on - * directories since the i_sem taken in the VFS is sufficient. + * directories since the i_mutex taken in the VFS is sufficient. */ struct rw_semaphore rdwrlock; /* - * commit_sem serializes transaction processing on an inode. + * commit_mutex serializes transaction processing on an inode. * It must be taken after beginning a transaction (txBegin), since * dirty inodes may be committed while a new transaction on the * inode is blocked in txBegin or TxBeginAnon */ - struct semaphore commit_sem; - /* xattr_sem allows us to access the xattrs without taking i_sem */ + struct mutex commit_mutex; + /* xattr_sem allows us to access the xattrs without taking i_mutex */ struct rw_semaphore xattr_sem; lid_t xtlid; /* lid of xtree lock on directory */ -#ifdef CONFIG_JFS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif union { struct { xtpage_t _xtroot; /* 288: xtree root */ @@ -104,11 +104,11 @@ struct jfs_inode_info { #define i_inline u.link._inline #define i_inline_ea u.link._inline_ea -#define JFS_ACL_NOT_CACHED ((void *)-1) - -#define IREAD_LOCK(ip) down_read(&JFS_IP(ip)->rdwrlock) +#define IREAD_LOCK(ip, subclass) \ + down_read_nested(&JFS_IP(ip)->rdwrlock, subclass) #define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock) -#define IWRITE_LOCK(ip) down_write(&JFS_IP(ip)->rdwrlock) +#define IWRITE_LOCK(ip, subclass) \ + down_write_nested(&JFS_IP(ip)->rdwrlock, subclass) #define IWRITE_UNLOCK(ip) up_write(&JFS_IP(ip)->rdwrlock) /* @@ -124,6 +124,29 @@ enum cflags { COMMIT_Synclist, /* metadata pages on group commit synclist */ }; +/* + * commit_mutex nesting subclasses: + */ +enum commit_mutex_class +{ + COMMIT_MUTEX_PARENT, + COMMIT_MUTEX_CHILD, + COMMIT_MUTEX_SECOND_PARENT, /* Renaming */ + COMMIT_MUTEX_VICTIM /* Inode being unlinked due to rename */ +}; + +/* + * rdwrlock subclasses: + * The dmap inode may be locked while a normal inode or the imap inode are + * locked. + */ +enum rdwrlock_class +{ + RDWRLOCK_NORMAL, + RDWRLOCK_IMAP, + RDWRLOCK_DMAP +}; + #define set_cflag(flag, ip) set_bit(flag, &(JFS_IP(ip)->cflag)) #define clear_cflag(flag, ip) clear_bit(flag, &(JFS_IP(ip)->cflag)) #define test_cflag(flag, ip) test_bit(flag, &(JFS_IP(ip)->cflag)) @@ -162,13 +185,17 @@ struct jfs_sb_info { uint gengen; /* inode generation generator*/ uint inostamp; /* shows inode belongs to fileset*/ - /* Formerly in ipbmap */ + /* Formerly in ipbmap */ struct bmap *bmap; /* incore bmap descriptor */ struct nls_table *nls_tab; /* current codepage */ struct inode *direct_inode; /* metadata inode */ uint state; /* mount/recovery state */ unsigned long flag; /* mount time flags */ uint p_state; /* state prior to going no integrity */ + kuid_t uid; /* uid to override on-disk uid */ + kgid_t gid; /* gid to override on-disk gid */ + uint umask; /* umask to override on-disk umask */ + uint minblks_trim; /* minimum blocks, for online trim */ }; /* jfs_sb_info commit_state */ diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 2af5efbfd06..6b0f816201a 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -25,6 +25,44 @@ #include "jfs_dinode.h" #include "jfs_debug.h" + +void jfs_set_inode_flags(struct inode *inode) +{ + unsigned int flags = JFS_IP(inode)->mode2; + unsigned int new_fl = 0; + + if (flags & JFS_IMMUTABLE_FL) + new_fl |= S_IMMUTABLE; + if (flags & JFS_APPEND_FL) + new_fl |= S_APPEND; + if (flags & JFS_NOATIME_FL) + new_fl |= S_NOATIME; + if (flags & JFS_DIRSYNC_FL) + new_fl |= S_DIRSYNC; + if (flags & JFS_SYNC_FL) + new_fl |= S_SYNC; + inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND | S_NOATIME | + S_DIRSYNC | S_SYNC); +} + +void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip) +{ + unsigned int flags = jfs_ip->vfs_inode.i_flags; + + jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL | + JFS_DIRSYNC_FL | JFS_SYNC_FL); + if (flags & S_IMMUTABLE) + jfs_ip->mode2 |= JFS_IMMUTABLE_FL; + if (flags & S_APPEND) + jfs_ip->mode2 |= JFS_APPEND_FL; + if (flags & S_NOATIME) + jfs_ip->mode2 |= JFS_NOATIME_FL; + if (flags & S_DIRSYNC) + jfs_ip->mode2 |= JFS_DIRSYNC_FL; + if (flags & S_SYNC) + jfs_ip->mode2 |= JFS_SYNC_FL; +} + /* * NAME: ialloc() * @@ -41,7 +79,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode) inode = new_inode(sb); if (!inode) { jfs_warn("ialloc: new_inode returned NULL!"); - return inode; + rc = -ENOMEM; + goto fail; } jfs_inode = JFS_IP(inode); @@ -49,36 +88,46 @@ struct inode *ialloc(struct inode *parent, umode_t mode) rc = diAlloc(parent, S_ISDIR(mode), inode); if (rc) { jfs_warn("ialloc: diAlloc returned %d!", rc); - make_bad_inode(inode); - iput(inode); - return NULL; + if (rc == -EIO) + make_bad_inode(inode); + goto fail_put; + } + + if (insert_inode_locked(inode) < 0) { + rc = -EINVAL; + goto fail_put; } - inode->i_uid = current->fsuid; - if (parent->i_mode & S_ISGID) { - inode->i_gid = parent->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } else - inode->i_gid = current->fsgid; + inode_init_owner(inode, parent, mode); + /* + * New inodes need to save sane values on disk when + * uid & gid mount options are used + */ + jfs_inode->saved_uid = inode->i_uid; + jfs_inode->saved_gid = inode->i_gid; /* * Allocate inode to quota. */ - if (DQUOT_ALLOC_INODE(inode)) { - DQUOT_DROP(inode); - inode->i_flags |= S_NOQUOTA; - inode->i_nlink = 0; - iput(inode); - return NULL; + dquot_initialize(inode); + rc = dquot_alloc_inode(inode); + if (rc) + goto fail_drop; + + /* inherit flags from parent */ + jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT; + + if (S_ISDIR(mode)) { + jfs_inode->mode2 |= IDIRECTORY; + jfs_inode->mode2 &= ~JFS_DIRSYNC_FL; } + else { + jfs_inode->mode2 |= INLINEEA | ISPARSE; + if (S_ISLNK(mode)) + jfs_inode->mode2 &= ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL); + } + jfs_inode->mode2 |= inode->i_mode; - inode->i_mode = mode; - if (S_ISDIR(mode)) - jfs_inode->mode2 = IDIRECTORY | mode; - else - jfs_inode->mode2 = INLINEEA | ISPARSE | mode; - inode->i_blksize = sb->s_blocksize; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; jfs_inode->otime = inode->i_ctime.tv_sec; @@ -98,8 +147,19 @@ struct inode *ialloc(struct inode *parent, umode_t mode) jfs_inode->atlhead = 0; jfs_inode->atltail = 0; jfs_inode->xtlid = 0; + jfs_set_inode_flags(inode); jfs_info("ialloc returns inode = 0x%p\n", inode); return inode; + +fail_drop: + dquot_drop(inode); + inode->i_flags |= S_NOQUOTA; + clear_nlink(inode); + unlock_new_inode(inode); +fail_put: + iput(inode); +fail: + return ERR_PTR(rc); } diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index b54bac576cb..9271cfe4a14 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -3,38 +3,51 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_INODE #define _H_JFS_INODE +struct fid; + extern struct inode *ialloc(struct inode *, umode_t); -extern int jfs_fsync(struct file *, struct dentry *, int); -extern void jfs_read_inode(struct inode *); +extern int jfs_fsync(struct file *, loff_t, loff_t, int); +extern long jfs_ioctl(struct file *, unsigned int, unsigned long); +extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); +extern struct inode *jfs_iget(struct super_block *, unsigned long); extern int jfs_commit_inode(struct inode *, int); -extern int jfs_write_inode(struct inode*, int); -extern void jfs_delete_inode(struct inode *); -extern void jfs_dirty_inode(struct inode *); +extern int jfs_write_inode(struct inode *, struct writeback_control *); +extern void jfs_evict_inode(struct inode *); +extern void jfs_dirty_inode(struct inode *, int); extern void jfs_truncate(struct inode *); extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); extern struct dentry *jfs_get_parent(struct dentry *dentry); +extern void jfs_get_inode_flags(struct jfs_inode_info *); +extern struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); +extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type); +extern void jfs_set_inode_flags(struct inode *); +extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int); +extern int jfs_setattr(struct dentry *, struct iattr *); -extern struct address_space_operations jfs_aops; -extern struct inode_operations jfs_dir_inode_operations; -extern struct file_operations jfs_dir_operations; -extern struct inode_operations jfs_file_inode_operations; -extern struct file_operations jfs_file_operations; -extern struct inode_operations jfs_symlink_inode_operations; -extern struct dentry_operations jfs_ci_dentry_operations; +extern const struct address_space_operations jfs_aops; +extern const struct inode_operations jfs_dir_inode_operations; +extern const struct file_operations jfs_dir_operations; +extern const struct inode_operations jfs_file_inode_operations; +extern const struct file_operations jfs_file_operations; +extern const struct inode_operations jfs_symlink_inode_operations; +extern const struct inode_operations jfs_fast_symlink_inode_operations; +extern const struct dentry_operations jfs_ci_dentry_operations; #endif /* _H_JFS_INODE */ diff --git a/fs/jfs/jfs_lock.h b/fs/jfs/jfs_lock.h index 10ad1d08668..ecf04882265 100644 --- a/fs/jfs/jfs_lock.h +++ b/fs/jfs/jfs_lock.h @@ -1,25 +1,26 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2001 - * Portions Copyright (c) Christoph Hellwig, 2001-2002 + * Copyright (C) International Business Machines Corp., 2000-2001 + * Portions Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_LOCK #define _H_JFS_LOCK #include <linux/spinlock.h> +#include <linux/mutex.h> #include <linux/sched.h> /* @@ -41,10 +42,10 @@ do { \ if (cond) \ break; \ unlock_cmd; \ - schedule(); \ + io_schedule(); \ lock_cmd; \ } \ - current->state = TASK_RUNNING; \ + __set_current_state(TASK_RUNNING); \ remove_wait_queue(&wq, &__wait); \ } while (0) diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index d27bac6acaa..0acddf60af5 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -4,16 +4,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -62,12 +62,16 @@ #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/interrupt.h> -#include <linux/smp_lock.h> #include <linux/completion.h> +#include <linux/kthread.h> #include <linux/buffer_head.h> /* for sync_blockdev() */ #include <linux/bio.h> -#include <linux/suspend.h> +#include <linux/freezer.h> +#include <linux/export.h> #include <linux/delay.h> +#include <linux/mutex.h> +#include <linux/seq_file.h> +#include <linux/slab.h> #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_metapage.h" @@ -81,15 +85,14 @@ */ static struct lbuf *log_redrive_list; static DEFINE_SPINLOCK(log_redrive_lock); -DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait); /* * log read/write serialization (per log) */ -#define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock) -#define LOG_LOCK(log) down(&((log)->loglock)) -#define LOG_UNLOCK(log) up(&((log)->loglock)) +#define LOG_LOCK_INIT(log) mutex_init(&(log)->loglock) +#define LOG_LOCK(log) mutex_lock(&((log)->loglock)) +#define LOG_UNLOCK(log) mutex_unlock(&((log)->loglock)) /* @@ -164,8 +167,8 @@ do { \ * Global list of active external journals */ static LIST_HEAD(jfs_external_logs); -static struct jfs_log *dummy_log = NULL; -static DECLARE_MUTEX(jfs_log_sem); +static struct jfs_log *dummy_log; +static DEFINE_MUTEX(jfs_log_mutex); /* * forward references @@ -208,6 +211,17 @@ static struct lmStat { } lmStat; #endif +static void write_special_inodes(struct jfs_log *log, + int (*writer)(struct address_space *)) +{ + struct jfs_sb_info *sbi; + + list_for_each_entry(sbi, &log->sb_list, log_list) { + writer(sbi->ipbmap->i_mapping); + writer(sbi->ipimap->i_mapping); + writer(sbi->direct_inode->i_mapping); + } +} /* * NAME: lmLog() @@ -244,7 +258,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, goto writeRecord; /* - * initialize/update page/transaction recovery lsn + * initialize/update page/transaction recovery lsn */ lsn = log->lsn; @@ -263,7 +277,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * initialize/update lsn of tblock of the page + * initialize/update lsn of tblock of the page * * transaction inherits oldest lsn of pages associated * with allocation/deallocation of resources (their @@ -307,7 +321,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, LOGSYNC_UNLOCK(log, flags); /* - * write the log record + * write the log record */ writeRecord: lsn = lmWriteRecord(log, tblk, lrd, tlck); @@ -336,7 +350,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * PARAMETER: cd - commit descriptor * * RETURN: end-of-log address - * + * * serialization: LOG_LOCK() held on entry/exit */ static int @@ -372,7 +386,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, goto moveLrd; /* - * move log record data + * move log record data */ /* retrieve source meta-data page to log */ if (tlck->flag & tlckPAGELOCK) { @@ -465,7 +479,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * move log record descriptor + * move log record descriptor */ moveLrd: lrd->length = cpu_to_le16(len); @@ -553,7 +567,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * PARAMETER: log * * RETURN: 0 - * + * * serialization: LOG_LOCK() held on entry/exit */ static int lmNextPage(struct jfs_log * log) @@ -574,7 +588,7 @@ static int lmNextPage(struct jfs_log * log) LOGGC_LOCK(log); /* - * write or queue the full page at the tail of write queue + * write or queue the full page at the tail of write queue */ /* get the tail tblk on commit queue */ if (list_empty(&log->cqueue)) @@ -625,7 +639,7 @@ static int lmNextPage(struct jfs_log * log) LOGGC_UNLOCK(log); /* - * allocate/initialize next page + * allocate/initialize next page */ /* if log wraps, the first data page of log is 2 * (0 never used, 1 is superblock). @@ -655,7 +669,7 @@ static int lmNextPage(struct jfs_log * log) * page number - redrive pageout of the page at the head of * pageout queue until full page has been written. * - * RETURN: + * RETURN: * * NOTE: * LOGGC_LOCK serializes log group commit queue, and @@ -919,10 +933,10 @@ static void lmPostGC(struct lbuf * bp) * this code is called again. * * PARAMETERS: log - log structure - * hard_sync - 1 to force all metadata to be written + * hard_sync - 1 to force all metadata to be written * * RETURN: 0 - * + * * serialization: LOG_LOCK() held on entry/exit */ static int lmLogSync(struct jfs_log * log, int hard_sync) @@ -935,25 +949,16 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) struct lrd lrd; int lsn; struct logsyncblk *lp; - struct jfs_sb_info *sbi; unsigned long flags; /* push dirty metapages out to disk */ if (hard_sync) - list_for_each_entry(sbi, &log->sb_list, log_list) { - filemap_fdatawrite(sbi->ipbmap->i_mapping); - filemap_fdatawrite(sbi->ipimap->i_mapping); - filemap_fdatawrite(sbi->direct_inode->i_mapping); - } + write_special_inodes(log, filemap_fdatawrite); else - list_for_each_entry(sbi, &log->sb_list, log_list) { - filemap_flush(sbi->ipbmap->i_mapping); - filemap_flush(sbi->ipimap->i_mapping); - filemap_flush(sbi->direct_inode->i_mapping); - } + write_special_inodes(log, filemap_flush); /* - * forward syncpt + * forward syncpt */ /* if last sync is same as last syncpt, * invoke sync point forward processing to update sync. @@ -989,7 +994,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) lsn = log->lsn; /* - * setup next syncpt trigger (SWAG) + * setup next syncpt trigger (SWAG) */ logsize = log->logsize; @@ -1000,21 +1005,19 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) if (more < 2 * LOGPSIZE) { jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); /* - * log wrapping + * log wrapping * * option 1 - panic ? No.! * option 2 - shutdown file systems - * associated with log ? + * associated with log ? * option 3 - extend log ? - */ - /* * option 4 - second chance * * mark log wrapped, and continue. * when all active transactions are completed, - * mark log vaild for recovery. + * mark log valid for recovery. * if crashed during invalid state, log state - * implies invald log, forcing fsck(). + * implies invalid log, forcing fsck(). */ /* mark log state log wrap in log superblock */ /* log->state = LOGWRAP; */ @@ -1051,22 +1054,23 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) * FUNCTION: write log SYNCPT record for specified log * * PARAMETERS: log - log structure - * hard_sync - set to 1 to force metadata to be written + * hard_sync - set to 1 to force metadata to be written */ void jfs_syncpt(struct jfs_log *log, int hard_sync) { LOG_LOCK(log); - lmLogSync(log, hard_sync); + if (!test_bit(log_QUIESCE, &log->flag)) + lmLogSync(log, hard_sync); LOG_UNLOCK(log); } /* * NAME: lmLogOpen() * - * FUNCTION: open the log on first open; + * FUNCTION: open the log on first open; * insert filesystem in the active list of the log. * * PARAMETER: ipmnt - file system mount inode - * iplog - log inode (out) + * iplog - log inode (out) * * RETURN: * @@ -1081,62 +1085,58 @@ int lmLogOpen(struct super_block *sb) if (sbi->flag & JFS_NOINTEGRITY) return open_dummy_log(sb); - + if (sbi->mntflag & JFS_INLINELOG) return open_inline_log(sb); - down(&jfs_log_sem); + mutex_lock(&jfs_log_mutex); list_for_each_entry(log, &jfs_external_logs, journal_list) { if (log->bdev->bd_dev == sbi->logdev) { if (memcmp(log->uuid, sbi->loguuid, sizeof(log->uuid))) { jfs_warn("wrong uuid on JFS journal\n"); - up(&jfs_log_sem); + mutex_unlock(&jfs_log_mutex); return -EINVAL; } /* * add file system to log active file system list */ if ((rc = lmLogFileSystem(log, sbi, 1))) { - up(&jfs_log_sem); + mutex_unlock(&jfs_log_mutex); return rc; } goto journal_found; } } - if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) { - up(&jfs_log_sem); + if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) { + mutex_unlock(&jfs_log_mutex); return -ENOMEM; } - memset(log, 0, sizeof(struct jfs_log)); INIT_LIST_HEAD(&log->sb_list); init_waitqueue_head(&log->syncwait); /* - * external log as separate logical volume + * external log as separate logical volume * * file systems to log may have n-to-1 relationship; */ - bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE); + bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + log); if (IS_ERR(bdev)) { - rc = -PTR_ERR(bdev); + rc = PTR_ERR(bdev); goto free; } - if ((rc = bd_claim(bdev, log))) { - goto close; - } - log->bdev = bdev; memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); - + /* * initialize log: */ if ((rc = lmLogInit(log))) - goto unclaim; + goto close; list_add(&log->journal_list, &jfs_external_logs); @@ -1152,24 +1152,21 @@ journal_found: sbi->log = log; LOG_UNLOCK(log); - up(&jfs_log_sem); + mutex_unlock(&jfs_log_mutex); return 0; /* - * unwind on error + * unwind on error */ shutdown: /* unwind lbmLogInit() */ list_del(&log->journal_list); lbmLogShutdown(log); - unclaim: - bd_release(bdev); - close: /* close external log device */ - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); free: /* free log descriptor */ - up(&jfs_log_sem); + mutex_unlock(&jfs_log_mutex); kfree(log); jfs_warn("lmLogOpen: exit(%d)", rc); @@ -1181,9 +1178,8 @@ static int open_inline_log(struct super_block *sb) struct jfs_log *log; int rc; - if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) + if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) return -ENOMEM; - memset(log, 0, sizeof(struct jfs_log)); INIT_LIST_HEAD(&log->sb_list); init_waitqueue_head(&log->syncwait); @@ -1214,14 +1210,13 @@ static int open_dummy_log(struct super_block *sb) { int rc; - down(&jfs_log_sem); + mutex_lock(&jfs_log_mutex); if (!dummy_log) { - dummy_log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL); + dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL); if (!dummy_log) { - up(&jfs_log_sem); + mutex_unlock(&jfs_log_mutex); return -ENOMEM; } - memset(dummy_log, 0, sizeof(struct jfs_log)); INIT_LIST_HEAD(&dummy_log->sb_list); init_waitqueue_head(&dummy_log->syncwait); dummy_log->no_integrity = 1; @@ -1232,7 +1227,7 @@ static int open_dummy_log(struct super_block *sb) if (rc) { kfree(dummy_log); dummy_log = NULL; - up(&jfs_log_sem); + mutex_unlock(&jfs_log_mutex); return rc; } } @@ -1241,7 +1236,7 @@ static int open_dummy_log(struct super_block *sb) list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list); JFS_SBI(sb)->log = dummy_log; LOG_UNLOCK(dummy_log); - up(&jfs_log_sem); + mutex_unlock(&jfs_log_mutex); return 0; } @@ -1255,13 +1250,13 @@ static int open_dummy_log(struct super_block *sb) * initialize the log from log superblock. * set the log state in the superblock to LOGMOUNT and * write SYNCPT log record. - * + * * PARAMETER: log - log structure * * RETURN: 0 - if ok * -EINVAL - bad log magic number or superblock dirty * error returned from logwait() - * + * * serialization: single first open thread */ int lmLogInit(struct jfs_log * log) @@ -1299,7 +1294,7 @@ int lmLogInit(struct jfs_log * log) if (!test_bit(log_INLINELOG, &log->flag)) log->l2bsize = L2LOGPSIZE; - + /* check for disabled journaling to disk */ if (log->no_integrity) { /* @@ -1430,7 +1425,7 @@ int lmLogInit(struct jfs_log * log) return 0; /* - * unwind on error + * unwind on error */ errout30: /* release log page */ log->wqueue = NULL; @@ -1469,7 +1464,7 @@ int lmLogClose(struct super_block *sb) jfs_info("lmLogClose: log:0x%p", log); - down(&jfs_log_sem); + mutex_lock(&jfs_log_mutex); LOG_LOCK(log); list_del(&sbi->log_list); LOG_UNLOCK(log); @@ -1483,7 +1478,7 @@ int lmLogClose(struct super_block *sb) if (test_bit(log_INLINELOG, &log->flag)) { /* - * in-line log in host file system + * in-line log in host file system */ rc = lmLogShutdown(log); kfree(log); @@ -1507,19 +1502,18 @@ int lmLogClose(struct super_block *sb) goto out; /* - * external log as separate logical volume + * external log as separate logical volume */ list_del(&log->journal_list); bdev = log->bdev; rc = lmLogShutdown(log); - bd_release(bdev); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); kfree(log); out: - up(&jfs_log_sem); + mutex_unlock(&jfs_log_mutex); jfs_info("lmLogClose: exit(%d)", rc); return rc; } @@ -1539,7 +1533,6 @@ void jfs_flush_journal(struct jfs_log *log, int wait) { int i; struct tblock *target = NULL; - struct jfs_sb_info *sbi; /* jfs_write_inode may call us during read-only mount */ if (!log) @@ -1592,7 +1585,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait) set_current_state(TASK_UNINTERRUPTIBLE); LOGGC_UNLOCK(log); schedule(); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); LOGGC_LOCK(log); remove_wait_queue(&target->gcwait, &__wait); } @@ -1601,11 +1594,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait) if (wait < 2) return; - list_for_each_entry(sbi, &log->sb_list, log_list) { - filemap_fdatawrite(sbi->ipbmap->i_mapping); - filemap_fdatawrite(sbi->ipimap->i_mapping); - filemap_fdatawrite(sbi->direct_inode->i_mapping); - } + write_special_inodes(log, filemap_fdatawrite); /* * If there was recent activity, we may need to wait @@ -1614,6 +1603,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait) if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { for (i = 0; i < 200; i++) { /* Too much? */ msleep(250); + write_special_inodes(log, filemap_fdatawrite); if (list_empty(&log->cqueue) && list_empty(&log->synclist)) break; @@ -1625,20 +1615,26 @@ void jfs_flush_journal(struct jfs_log *log, int wait) if (!list_empty(&log->synclist)) { struct logsyncblk *lp; + printk(KERN_ERR "jfs_flush_journal: synclist not empty\n"); list_for_each_entry(lp, &log->synclist, synclist) { if (lp->xflag & COMMIT_PAGE) { struct metapage *mp = (struct metapage *)lp; - dump_mem("orphan metapage", lp, - sizeof(struct metapage)); - dump_mem("page", mp->page, sizeof(struct page)); - } - else - dump_mem("orphan tblock", lp, - sizeof(struct tblock)); + print_hex_dump(KERN_ERR, "metapage: ", + DUMP_PREFIX_ADDRESS, 16, 4, + mp, sizeof(struct metapage), 0); + print_hex_dump(KERN_ERR, "page: ", + DUMP_PREFIX_ADDRESS, 16, + sizeof(long), mp->page, + sizeof(struct page), 0); + } else + print_hex_dump(KERN_ERR, "tblock:", + DUMP_PREFIX_ADDRESS, 16, 4, + lp, sizeof(struct tblock), 0); } } +#else + WARN_ON(!list_empty(&log->synclist)); #endif - //assert(list_empty(&log->synclist)); clear_bit(log_FLUSH, &log->flag); } @@ -1653,7 +1649,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait) * PARAMETER: log - log inode * * RETURN: 0 - success - * + * * serialization: single last close thread */ int lmLogShutdown(struct jfs_log * log) @@ -1679,7 +1675,7 @@ int lmLogShutdown(struct jfs_log * log) lrd.type = cpu_to_le16(LOG_SYNCPT); lrd.length = 0; lrd.log.syncpt.sync = 0; - + lsn = lmWriteRecord(log, NULL, &lrd, NULL); bp = log->bp; lp = (struct logpage *) bp->l_ldata; @@ -1705,7 +1701,7 @@ int lmLogShutdown(struct jfs_log * log) jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d", lsn, log->page, log->eor); - out: + out: /* * shutdown per log i/o */ @@ -1726,7 +1722,7 @@ int lmLogShutdown(struct jfs_log * log) * * PARAMETE: log - pointer to logs inode. * fsdev - kdev_t of filesystem. - * serial - pointer to returned log serial number + * serial - pointer to returned log serial number * activate - insert/remove device from active list. * * RETURN: 0 - success @@ -1771,7 +1767,7 @@ static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, lbmFree(bpsuper); return -EIO; } - + } /* @@ -1963,10 +1959,10 @@ static void lbmfree(struct lbuf * bp) /* * NAME: lbmRedrive * - * FUNCTION: add a log buffer to the the log redrive list + * FUNCTION: add a log buffer to the log redrive list * * PARAMETER: - * bp - log buffer + * bp - log buffer * * NOTES: * Takes log_redrive_lock. @@ -1980,7 +1976,7 @@ static inline void lbmRedrive(struct lbuf *bp) log_redrive_list = bp; spin_unlock_irqrestore(&log_redrive_lock, flags); - wake_up(&jfs_IO_thread_wait); + wake_up_process(jfsIOthread); } @@ -2002,19 +1998,24 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) bio = bio_alloc(GFP_NOFS, 1); - bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); + bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9); bio->bi_bdev = log->bdev; bio->bi_io_vec[0].bv_page = bp->l_page; bio->bi_io_vec[0].bv_len = LOGPSIZE; bio->bi_io_vec[0].bv_offset = bp->l_offset; bio->bi_vcnt = 1; - bio->bi_idx = 0; - bio->bi_size = LOGPSIZE; + bio->bi_iter.bi_size = LOGPSIZE; bio->bi_end_io = lbmIODone; bio->bi_private = bp; - submit_bio(READ_SYNC, bio); + /*check if journaling to disk has been disabled*/ + if (log->no_integrity) { + bio->bi_iter.bi_size = 0; + lbmIODone(bio, 0); + } else { + submit_bio(READ_SYNC, bio); + } wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); @@ -2057,7 +2058,7 @@ static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, bp->l_flag = flag; /* - * insert bp at tail of write queue associated with log + * insert bp at tail of write queue associated with log * * (request is either for bp already/currently at head of queue * or new bp to be inserted at tail) @@ -2120,7 +2121,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag) log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); /* - * initiate pageout of the page + * initiate pageout of the page */ lbmStartIO(bp); } @@ -2131,7 +2132,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag) * * FUNCTION: Interface to DD strategy routine * - * RETURN: none + * RETURN: none * * serialization: LCACHE_LOCK() is NOT held during log i/o; */ @@ -2143,23 +2144,22 @@ static void lbmStartIO(struct lbuf * bp) jfs_info("lbmStartIO\n"); bio = bio_alloc(GFP_NOFS, 1); - bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); + bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9); bio->bi_bdev = log->bdev; bio->bi_io_vec[0].bv_page = bp->l_page; bio->bi_io_vec[0].bv_len = LOGPSIZE; bio->bi_io_vec[0].bv_offset = bp->l_offset; bio->bi_vcnt = 1; - bio->bi_idx = 0; - bio->bi_size = LOGPSIZE; + bio->bi_iter.bi_size = LOGPSIZE; bio->bi_end_io = lbmIODone; bio->bi_private = bp; /* check if journaling to disk has been disabled */ if (log->no_integrity) { - bio->bi_size = 0; - lbmIODone(bio, 0, 0); + bio->bi_iter.bi_size = 0; + lbmIODone(bio, 0); } else { submit_bio(WRITE_SYNC, bio); INCREMENT(lmStat.submitted); @@ -2197,16 +2197,13 @@ static int lbmIOWait(struct lbuf * bp, int flag) * * executed at INTIODONE level */ -static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) +static void lbmIODone(struct bio *bio, int error) { struct lbuf *bp = bio->bi_private; struct lbuf *nextbp, *tail; struct jfs_log *log; unsigned long flags; - if (bio->bi_size) - return 1; - /* * get back jfs buffer bound to the i/o buffer */ @@ -2225,7 +2222,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) bio_put(bio); /* - * pagein completion + * pagein completion */ if (bp->l_flag & lbmREAD) { bp->l_flag &= ~lbmREAD; @@ -2235,11 +2232,11 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) /* wakeup I/O initiator */ LCACHE_WAKEUP(&bp->l_ioevent); - return 0; + return; } /* - * pageout completion + * pageout completion * * the bp at the head of write queue has completed pageout. * @@ -2260,7 +2257,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) if (bp->l_flag & lbmDIRECT) { LCACHE_WAKEUP(&bp->l_ioevent); LCACHE_UNLOCK(flags); - return 0; + return; } tail = log->wqueue; @@ -2305,7 +2302,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) } /* - * synchronous pageout: + * synchronous pageout: * * buffer has not necessarily been removed from write queue * (e.g., synchronous write of partial-page with COMMIT): @@ -2319,7 +2316,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) } /* - * Group Commit pageout: + * Group Commit pageout: */ else if (bp->l_flag & lbmGC) { LCACHE_UNLOCK(flags); @@ -2327,7 +2324,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) } /* - * asynchronous pageout: + * asynchronous pageout: * * buffer must have been removed from write queue: * insert buffer at head of freelist where it can be recycled @@ -2339,44 +2336,35 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) LCACHE_UNLOCK(flags); /* unlock+enable */ } - - return 0; } int jfsIOWait(void *arg) { struct lbuf *bp; - daemonize("jfsIO"); - - complete(&jfsIOwait); - do { - DECLARE_WAITQUEUE(wq, current); - spin_lock_irq(&log_redrive_lock); - while ((bp = log_redrive_list) != 0) { + while ((bp = log_redrive_list)) { log_redrive_list = bp->l_redrive_next; bp->l_redrive_next = NULL; spin_unlock_irq(&log_redrive_lock); lbmStartIO(bp); spin_lock_irq(&log_redrive_lock); } + if (freezing(current)) { spin_unlock_irq(&log_redrive_lock); - refrigerator(); + try_to_freeze(); } else { - add_wait_queue(&jfs_IO_thread_wait, &wq); set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irq(&log_redrive_lock); schedule(); - current->state = TASK_RUNNING; - remove_wait_queue(&jfs_IO_thread_wait, &wq); + __set_current_state(TASK_RUNNING); } - } while (!jfs_stop_threads); + } while (!kthread_should_stop()); jfs_info("jfsIOWait being killed!"); - complete_and_exit(&jfsIOwait, 0); + return 0; } /* @@ -2385,7 +2373,7 @@ int jfsIOWait(void *arg) * FUNCTION: format file system log * * PARAMETERS: - * log - volume log + * log - volume log * logAddress - start address of log space in FS block * logSize - length of log space in FS block; * @@ -2417,16 +2405,16 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) npages = logSize >> sbi->l2nbperpage; /* - * log space: + * log space: * * page 0 - reserved; * page 1 - log superblock; * page 2 - log data page: A SYNC log record is written - * into this page at logform time; + * into this page at logform time; * pages 3-N - log data page: set to empty log data pages; */ /* - * init log superblock: log page 1 + * init log superblock: log page 1 */ logsuper = (struct logsuper *) bp->l_ldata; @@ -2446,7 +2434,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) goto exit; /* - * init pages 2 to npages-1 as log data pages: + * init pages 2 to npages-1 as log data pages: * * log page sequence number (lpsn) initialization: * @@ -2489,7 +2477,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) goto exit; /* - * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) + * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) */ for (lspn = 0; lspn < npages - 3; lspn++) { lp->h.page = lp->t.page = cpu_to_le32(lspn); @@ -2505,7 +2493,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) rc = 0; exit: /* - * finalize log + * finalize log */ /* release the buffer */ lbmFree(bp); @@ -2514,13 +2502,9 @@ exit: } #ifdef CONFIG_JFS_STATISTICS -int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, - int *eof, void *data) +static int jfs_lmstats_proc_show(struct seq_file *m, void *v) { - int len = 0; - off_t begin; - - len += sprintf(buffer, + seq_printf(m, "JFS Logmgr stats\n" "================\n" "commits = %d\n" @@ -2533,19 +2517,19 @@ int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, lmStat.pagedone, lmStat.full_page, lmStat.partial_page); + return 0; +} - begin = offset; - *start = buffer + begin; - len -= begin; - - if (len > length) - len = length; - else - *eof = 1; - - if (len < 0) - len = 0; - - return len; +static int jfs_lmstats_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, jfs_lmstats_proc_show, NULL); } + +const struct file_operations jfs_lmstats_proc_fops = { + .owner = THIS_MODULE, + .open = jfs_lmstats_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* CONFIG_JFS_STATISTICS */ diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h index e4978b5b65e..e38c2159885 100644 --- a/fs/jfs/jfs_logmgr.h +++ b/fs/jfs/jfs_logmgr.h @@ -4,16 +4,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_LOGMGR @@ -35,19 +35,19 @@ /* * log logical volume * - * a log is used to make the commit operation on journalled + * a log is used to make the commit operation on journalled * files within the same logical volume group atomic. * a log is implemented with a logical volume. - * there is one log per logical volume group. + * there is one log per logical volume group. * * block 0 of the log logical volume is not used (ipl etc). * block 1 contains a log "superblock" and is used by logFormat(), - * lmLogInit(), lmLogShutdown(), and logRedo() to record status - * of the log but is not otherwise used during normal processing. + * lmLogInit(), lmLogShutdown(), and logRedo() to record status + * of the log but is not otherwise used during normal processing. * blocks 2 - (N-1) are used to contain log records. * - * when a volume group is varied-on-line, logRedo() must have - * been executed before the file systems (logical volumes) in + * when a volume group is varied-on-line, logRedo() must have + * been executed before the file systems (logical volumes) in * the volume group can be mounted. */ /* @@ -97,26 +97,26 @@ struct logsuper { * log logical page * * (this comment should be rewritten !) - * the header and trailer structures (h,t) will normally have + * the header and trailer structures (h,t) will normally have * the same page and eor value. - * An exception to this occurs when a complete page write is not + * An exception to this occurs when a complete page write is not * accomplished on a power failure. Since the hardware may "split write" - * sectors in the page, any out of order sequence may occur during powerfail + * sectors in the page, any out of order sequence may occur during powerfail * and needs to be recognized during log replay. The xor value is * an "exclusive or" of all log words in the page up to eor. This * 32 bit eor is stored with the top 16 bits in the header and the * bottom 16 bits in the trailer. logredo can easily recognize pages - * that were not completed by reconstructing this eor and checking + * that were not completed by reconstructing this eor and checking * the log page. * - * Previous versions of the operating system did not allow split - * writes and detected partially written records in logredo by - * ordering the updates to the header, trailer, and the move of data - * into the logdata area. The order: (1) data is moved (2) header - * is updated (3) trailer is updated. In logredo, when the header - * differed from the trailer, the header and trailer were reconciled - * as follows: if h.page != t.page they were set to the smaller of - * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) + * Previous versions of the operating system did not allow split + * writes and detected partially written records in logredo by + * ordering the updates to the header, trailer, and the move of data + * into the logdata area. The order: (1) data is moved (2) header + * is updated (3) trailer is updated. In logredo, when the header + * differed from the trailer, the header and trailer were reconciled + * as follows: if h.page != t.page they were set to the smaller of + * the two and h.eor and t.eor set to 8 (i.e. empty page). if (only) * h.eor != t.eor they were set to the smaller of their two values. */ struct logpage { @@ -144,23 +144,23 @@ struct logpage { * * (this comment should be rewritten !) * jfs uses only "after" log records (only a single writer is allowed - * in a page, pages are written to temporary paging space if + * in a page, pages are written to temporary paging space if * if they must be written to disk before commit, and i/o is * scheduled for modified pages to their home location after - * the log records containing the after values and the commit + * the log records containing the after values and the commit * record is written to the log on disk, undo discards the copy * in main-memory.) * - * a log record consists of a data area of variable length followed by + * a log record consists of a data area of variable length followed by * a descriptor of fixed size LOGRDSIZE bytes. - * the data area is rounded up to an integral number of 4-bytes and + * the data area is rounded up to an integral number of 4-bytes and * must be no longer than LOGPSIZE. - * the descriptor is of size of multiple of 4-bytes and aligned on a - * 4-byte boundary. + * the descriptor is of size of multiple of 4-bytes and aligned on a + * 4-byte boundary. * records are packed one after the other in the data area of log pages. - * (sometimes a DUMMY record is inserted so that at least one record ends + * (sometimes a DUMMY record is inserted so that at least one record ends * on every page or the longest record is placed on at most two pages). - * the field eor in page header/trailer points to the byte following + * the field eor in page header/trailer points to the byte following * the last record on a page. */ @@ -215,13 +215,13 @@ struct lrd { union { /* - * COMMIT: commit + * COMMIT: commit * * transaction commit: no type-dependent information; */ /* - * REDOPAGE: after-image + * REDOPAGE: after-image * * apply after-image; * @@ -236,7 +236,7 @@ struct lrd { } redopage; /* (20) */ /* - * NOREDOPAGE: the page is freed + * NOREDOPAGE: the page is freed * * do not apply after-image records which precede this record * in the log with the same page block number to this page. @@ -252,7 +252,7 @@ struct lrd { } noredopage; /* (20) */ /* - * UPDATEMAP: update block allocation map + * UPDATEMAP: update block allocation map * * either in-line PXD, * or out-of-line XADLIST; @@ -268,13 +268,13 @@ struct lrd { } updatemap; /* (20) */ /* - * NOREDOINOEXT: the inode extent is freed + * NOREDOINOEXT: the inode extent is freed + * + * do not apply after-image records which precede this + * record in the log with the any of the 4 page block + * numbers in this inode extent. * - * do not apply after-image records which precede this - * record in the log with the any of the 4 page block - * numbers in this inode extent. - * - * NOTE: The fileset and pxd fields MUST remain in + * NOTE: The fileset and pxd fields MUST remain in * the same fields in the REDOPAGE record format. * */ @@ -286,22 +286,22 @@ struct lrd { } noredoinoext; /* (20) */ /* - * SYNCPT: log sync point + * SYNCPT: log sync point * - * replay log upto syncpt address specified; + * replay log up to syncpt address specified; */ struct { __le32 sync; /* 4: syncpt address (0 = here) */ } syncpt; /* - * MOUNT: file system mount + * MOUNT: file system mount * * file system mount: no type-dependent information; */ /* - * ? FREEXTENT: free specified extent(s) + * ? FREEXTENT: free specified extent(s) * * free specified extent(s) from block allocation map * N.B.: nextents should be length of data/sizeof(xad_t) @@ -314,17 +314,15 @@ struct lrd { } freextent; /* - * ? NOREDOFILE: this file is freed + * ? NOREDOFILE: this file is freed * * do not apply records which precede this record in the log * with the same inode number. * - * NOREDILE must be the first to be written at commit + * NOREDOFILE must be the first to be written at commit * (last to be read in logredo()) - it prevents * replay of preceding updates of all preceding generations - * of the inumber esp. the on-disk inode itself, - * but does NOT prevent - * replay of the + * of the inumber esp. the on-disk inode itself. */ struct { __le32 fileset; /* 4: fileset number */ @@ -332,7 +330,7 @@ struct lrd { } noredofile; /* - * ? NEWPAGE: + * ? NEWPAGE: * * metadata type dependent */ @@ -344,7 +342,7 @@ struct lrd { } newpage; /* - * ? DUMMY: filler + * ? DUMMY: filler * * no type-dependent information */ @@ -378,7 +376,7 @@ struct jfs_log { int size; /* 4: log size in log page (in page) */ int l2bsize; /* 4: log2 of bsize */ - long flag; /* 4: flag */ + unsigned long flag; /* 4: flag */ struct lbuf *lbuf_free; /* 4: free lbufs */ wait_queue_head_t free_wait; /* 4: */ @@ -389,7 +387,7 @@ struct jfs_log { int eor; /* 4: eor of last record in eol page */ struct lbuf *bp; /* 4: current log page buffer */ - struct semaphore loglock; /* 4: log write serialization lock */ + struct mutex loglock; /* 4: log write serialization lock */ /* syncpt */ int nextsync; /* 4: bytes to write before next syncpt */ @@ -464,7 +462,7 @@ struct lbuf { s64 l_blkno; /* 8: log page block number */ caddr_t l_ldata; /* 4: data page */ struct page *l_page; /* The page itself */ - uint l_offset; /* Offset of l_ldata within the page */ + uint l_offset; /* Offset of l_ldata within the page */ wait_queue_head_t l_ioevent; /* 4: i/o done event */ }; diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 8a53981f9f2..49ba7ff1bbb 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -4,25 +4,28 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> #include <linux/mm.h> +#include <linux/module.h> #include <linux/bio.h> +#include <linux/slab.h> #include <linux/init.h> #include <linux/buffer_head.h> #include <linux/mempool.h> +#include <linux/seq_file.h> #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" @@ -39,11 +42,11 @@ static struct { #endif #define metapage_locked(mp) test_bit(META_locked, &(mp)->flag) -#define trylock_metapage(mp) test_and_set_bit(META_locked, &(mp)->flag) +#define trylock_metapage(mp) test_and_set_bit_lock(META_locked, &(mp)->flag) static inline void unlock_metapage(struct metapage *mp) { - clear_bit(META_locked, &mp->flag); + clear_bit_unlock(META_locked, &mp->flag); wake_up(&mp->wait); } @@ -56,7 +59,7 @@ static inline void __lock_metapage(struct metapage *mp) set_current_state(TASK_UNINTERRUPTIBLE); if (metapage_locked(mp)) { unlock_page(mp->page); - schedule(); + io_schedule(); lock_page(mp->page); } } while (trylock_metapage(mp)); @@ -74,7 +77,7 @@ static inline void lock_metapage(struct metapage *mp) } #define METAPOOL_MIN_PAGES 32 -static kmem_cache_t *metapage_cache; +static struct kmem_cache *metapage_cache; static mempool_t *metapage_mempool; #define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE) @@ -88,7 +91,7 @@ struct meta_anchor { }; #define mp_anchor(page) ((struct meta_anchor *)page_private(page)) -static inline struct metapage *page_to_mp(struct page *page, uint offset) +static inline struct metapage *page_to_mp(struct page *page, int offset) { if (!PagePrivate(page)) return NULL; @@ -104,10 +107,9 @@ static inline int insert_metapage(struct page *page, struct metapage *mp) if (PagePrivate(page)) a = mp_anchor(page); else { - a = kmalloc(sizeof(struct meta_anchor), GFP_NOFS); + a = kzalloc(sizeof(struct meta_anchor), GFP_NOFS); if (!a) return -ENOMEM; - memset(a, 0, sizeof(struct meta_anchor)); set_page_private(page, (unsigned long)a); SetPagePrivate(page); kmap(page); @@ -154,7 +156,7 @@ static inline void dec_io(struct page *page, void (*handler) (struct page *)) } #else -static inline struct metapage *page_to_mp(struct page *page, uint offset) +static inline struct metapage *page_to_mp(struct page *page, int offset) { return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL; } @@ -181,21 +183,18 @@ static inline void remove_metapage(struct page *page, struct metapage *mp) #endif -static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void init_once(void *foo) { struct metapage *mp = (struct metapage *)foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { - mp->lid = 0; - mp->lsn = 0; - mp->flag = 0; - mp->data = NULL; - mp->clsn = 0; - mp->log = NULL; - set_bit(META_free, &mp->flag); - init_waitqueue_head(&mp->wait); - } + mp->lid = 0; + mp->lsn = 0; + mp->flag = 0; + mp->data = NULL; + mp->clsn = 0; + mp->log = NULL; + set_bit(META_free, &mp->flag); + init_waitqueue_head(&mp->wait); } static inline struct metapage *alloc_metapage(gfp_t gfp_mask) @@ -217,12 +216,12 @@ int __init metapage_init(void) * Allocate the metapage structures */ metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage), - 0, 0, init_once, NULL); + 0, 0, init_once); if (metapage_cache == NULL) return -ENOMEM; - metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mempool_alloc_slab, - mempool_free_slab, metapage_cache); + metapage_mempool = mempool_create_slab_pool(METAPOOL_MIN_PAGES, + metapage_cache); if (metapage_mempool == NULL) { kmem_cache_destroy(metapage_cache); @@ -253,12 +252,12 @@ static inline void drop_metapage(struct page *page, struct metapage *mp) */ static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock, - unsigned int *len) + int *len) { int rc = 0; int xflag; s64 xaddr; - sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >> + sector_t file_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1) >> inode->i_blkbits; if (lblock >= file_blocks) @@ -284,14 +283,10 @@ static void last_read_complete(struct page *page) unlock_page(page); } -static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done, - int err) +static void metapage_read_end_io(struct bio *bio, int err) { struct page *page = bio->bi_private; - if (bio->bi_size) - return 1; - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { printk(KERN_ERR "metapage_read_end_io: I/O error\n"); SetPageError(page); @@ -299,8 +294,6 @@ static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done, dec_io(page, last_read_complete); bio_put(bio); - - return 0; } static void remove_from_logsync(struct metapage *mp) @@ -345,47 +338,45 @@ static void last_write_complete(struct page *page) end_page_writeback(page); } -static int metapage_write_end_io(struct bio *bio, unsigned int bytes_done, - int err) +static void metapage_write_end_io(struct bio *bio, int err) { struct page *page = bio->bi_private; BUG_ON(!PagePrivate(page)); - if (bio->bi_size) - return 1; - if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) { printk(KERN_ERR "metapage_write_end_io: I/O error\n"); SetPageError(page); } dec_io(page, last_write_complete); bio_put(bio); - return 0; } static int metapage_writepage(struct page *page, struct writeback_control *wbc) { struct bio *bio = NULL; - unsigned int block_offset; /* block offset of mp within page */ + int block_offset; /* block offset of mp within page */ struct inode *inode = page->mapping->host; - unsigned int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage; - unsigned int len; - unsigned int xlen; + int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage; + int len; + int xlen; struct metapage *mp; int redirty = 0; sector_t lblock; + int nr_underway = 0; sector_t pblock; sector_t next_block = 0; sector_t page_start; unsigned long bio_bytes = 0; unsigned long bio_offset = 0; - unsigned int offset; + int offset; + int bad_blocks = 0; page_start = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); + set_page_writeback(page); for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { mp = page_to_mp(page, offset); @@ -405,6 +396,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) } clear_bit(META_dirty, &mp->flag); + set_bit(META_io, &mp->flag); block_offset = offset >> inode->i_blkbits; lblock = page_start + block_offset; if (bio) { @@ -413,7 +405,6 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) len = min(xlen, blocks_per_mp); xlen -= len; bio_bytes += len << inode->i_blkbits; - set_bit(META_io, &mp->flag); continue; } /* Not contiguous */ @@ -425,28 +416,29 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) * count from hitting zero before we're through */ inc_io(page); - if (!bio->bi_size) + if (!bio->bi_iter.bi_size) goto dump_bio; submit_bio(WRITE, bio); + nr_underway++; bio = NULL; - } else { - set_page_writeback(page); + } else inc_io(page); - } xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits; pblock = metapage_get_blocks(inode, lblock, &xlen); if (!pblock) { - /* Need better error handling */ printk(KERN_ERR "JFS: metapage_get_blocks failed\n"); - dec_io(page, last_write_complete); + /* + * We already called inc_io(), but can't cancel it + * with dec_io() until we're done with the page + */ + bad_blocks++; continue; } - set_bit(META_io, &mp->flag); - len = min(xlen, (uint) JFS_SBI(inode->i_sb)->nbperpage); + len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage); bio = bio_alloc(GFP_NOFS, 1); bio->bi_bdev = inode->i_sb->s_bdev; - bio->bi_sector = pblock << (inode->i_blkbits - 9); + bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9); bio->bi_end_io = metapage_write_end_io; bio->bi_private = page; @@ -460,28 +452,38 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc) if (bio) { if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes) goto add_failed; - if (!bio->bi_size) + if (!bio->bi_iter.bi_size) goto dump_bio; - + submit_bio(WRITE, bio); + nr_underway++; } if (redirty) redirty_page_for_writepage(wbc, page); unlock_page(page); + if (bad_blocks) + goto err_out; + + if (nr_underway == 0) + end_page_writeback(page); + return 0; add_failed: /* We should never reach here, since we're only adding one vec */ printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n"); goto skip; dump_bio: - dump_mem("bio", bio, sizeof(*bio)); + print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16, + 4, bio, sizeof(*bio), 0); skip: bio_put(bio); unlock_page(page); dec_io(page, last_write_complete); - +err_out: + while (bad_blocks--) + dec_io(page, last_write_complete); return -EIO; } @@ -489,13 +491,13 @@ static int metapage_readpage(struct file *fp, struct page *page) { struct inode *inode = page->mapping->host; struct bio *bio = NULL; - unsigned int block_offset; - unsigned int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits; + int block_offset; + int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits; sector_t page_start; /* address of page in fs blocks */ sector_t pblock; - unsigned int xlen; + int xlen; unsigned int len; - unsigned int offset; + int offset; BUG_ON(!PageLocked(page)); page_start = (sector_t)page->index << @@ -515,7 +517,8 @@ static int metapage_readpage(struct file *fp, struct page *page) bio = bio_alloc(GFP_NOFS, 1); bio->bi_bdev = inode->i_sb->s_bdev; - bio->bi_sector = pblock << (inode->i_blkbits - 9); + bio->bi_iter.bi_sector = + pblock << (inode->i_blkbits - 9); bio->bi_end_io = metapage_read_end_io; bio->bi_private = page; len = xlen << inode->i_blkbits; @@ -543,8 +546,8 @@ add_failed: static int metapage_releasepage(struct page *page, gfp_t gfp_mask) { struct metapage *mp; - int busy = 0; - unsigned int offset; + int ret = 1; + int offset; for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) { mp = page_to_mp(page, offset); @@ -553,46 +556,35 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask) continue; jfs_info("metapage_releasepage: mp = 0x%p", mp); - if (mp->count || mp->nohomeok) { + if (mp->count || mp->nohomeok || + test_bit(META_dirty, &mp->flag)) { jfs_info("count = %ld, nohomeok = %d", mp->count, mp->nohomeok); - busy = 1; + ret = 0; continue; } - wait_on_page_writeback(page); - //WARN_ON(test_bit(META_dirty, &mp->flag)); - if (test_bit(META_dirty, &mp->flag)) { - dump_mem("dirty mp in metapage_releasepage", mp, - sizeof(struct metapage)); - dump_mem("page", page, sizeof(struct page)); - dump_stack(); - } if (mp->lsn) remove_from_logsync(mp); remove_metapage(page, mp); INCREMENT(mpStat.pagefree); free_metapage(mp); } - if (busy) - return -1; - - return 0; + return ret; } -static int metapage_invalidatepage(struct page *page, unsigned long offset) +static void metapage_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) { - BUG_ON(offset); + BUG_ON(offset || length < PAGE_CACHE_SIZE); - if (PageWriteback(page)) - return 0; + BUG_ON(PageWriteback(page)); - return metapage_releasepage(page, 0); + metapage_releasepage(page, 0); } -struct address_space_operations jfs_metapage_aops = { +const struct address_space_operations jfs_metapage_aops = { .readpage = metapage_readpage, .writepage = metapage_writepage, - .sync_page = block_sync_page, .releasepage = metapage_releasepage, .invalidatepage = metapage_invalidatepage, .set_page_dirty = __set_page_dirty_nobuffers, @@ -644,10 +636,9 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, } SetPageUptodate(page); } else { - page = read_cache_page(mapping, page_index, - (filler_t *)mapping->a_ops->readpage, NULL); + page = read_mapping_page(mapping, page_index, NULL); if (IS_ERR(page) || !PageUptodate(page)) { - jfs_err("read_cache_page failed!"); + jfs_err("read_mapping_page failed!"); return NULL; } lock_page(page); @@ -657,21 +648,20 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, if (mp) { if (mp->logical_size != size) { jfs_error(inode->i_sb, - "__get_metapage: mp->logical_size != size"); + "get_mp->logical_size != size\n"); jfs_err("logical_size = %d, size = %d", mp->logical_size, size); dump_stack(); - goto unlock; + goto unlock; } mp->count++; lock_metapage(mp); if (test_bit(META_discard, &mp->flag)) { if (!new) { jfs_error(inode->i_sb, - "__get_metapage: using a " - "discarded metapage"); + "using a discarded metapage\n"); discard_metapage(mp); - goto unlock; + goto unlock; } clear_bit(META_discard, &mp->flag); } @@ -777,22 +767,9 @@ void release_metapage(struct metapage * mp) } else if (mp->lsn) /* discard_metapage doesn't remove it */ remove_from_logsync(mp); -#if MPS_PER_PAGE == 1 - /* - * If we know this is the only thing in the page, we can throw - * the page out of the page cache. If pages are larger, we - * don't want to do this. - */ - - /* Retest mp->count since we may have released page lock */ - if (test_bit(META_discard, &mp->flag) && !mp->count) { - clear_page_dirty(page); - ClearPageUptodate(page); - } -#else /* Try to keep metapages from using up too much memory */ drop_metapage(page, mp); -#endif + unlock_page(page); page_cache_release(page); } @@ -838,13 +815,9 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len) } #ifdef CONFIG_JFS_STATISTICS -int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length, - int *eof, void *data) +static int jfs_mpstat_proc_show(struct seq_file *m, void *v) { - int len = 0; - off_t begin; - - len += sprintf(buffer, + seq_printf(m, "JFS Metapage statistics\n" "=======================\n" "page allocations = %d\n" @@ -853,19 +826,19 @@ int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length, mpStat.pagealloc, mpStat.pagefree, mpStat.lockwait); + return 0; +} - begin = offset; - *start = buffer + begin; - len -= begin; - - if (len > length) - len = length; - else - *eof = 1; - - if (len < 0) - len = 0; - - return len; +static int jfs_mpstat_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, jfs_mpstat_proc_show, NULL); } + +const struct file_operations jfs_mpstat_proc_fops = { + .owner = THIS_MODULE, + .open = jfs_mpstat_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h index f0b7d3282b0..a78beda85f6 100644 --- a/fs/jfs/jfs_metapage.h +++ b/fs/jfs/jfs_metapage.h @@ -4,16 +4,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_METAPAGE @@ -33,7 +33,7 @@ struct metapage { unsigned long flag; /* See Below */ unsigned long count; /* Reference count */ void *data; /* Data pointer */ - sector_t index; /* block address of page */ + sector_t index; /* block address of page */ wait_queue_head_t wait; /* implementation */ @@ -65,17 +65,17 @@ extern struct metapage *__get_metapage(struct inode *inode, int absolute, unsigned long new); #define read_metapage(inode, lblock, size, absolute)\ - __get_metapage(inode, lblock, size, absolute, FALSE) + __get_metapage(inode, lblock, size, absolute, false) #define get_metapage(inode, lblock, size, absolute)\ - __get_metapage(inode, lblock, size, absolute, TRUE) + __get_metapage(inode, lblock, size, absolute, true) extern void release_metapage(struct metapage *); extern void grab_metapage(struct metapage *); extern void force_metapage(struct metapage *); /* - * hold_metapage and put_metapage are used in conjuction. The page lock + * hold_metapage and put_metapage are used in conjunction. The page lock * is not dropped between the two, so no other threads can get or release * the metapage */ @@ -139,7 +139,7 @@ static inline void metapage_homeok(struct metapage *mp) put_metapage(mp); } -extern struct address_space_operations jfs_metapage_aops; +extern const struct address_space_operations jfs_metapage_aops; /* * This routines invalidate all pages for an extent. diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 032d111bc33..9895595fd2f 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -21,18 +21,18 @@ * * note: file system in transition to aggregate/fileset: * - * file system mount is interpreted as the mount of aggregate, - * if not already mounted, and mount of the single/only fileset in + * file system mount is interpreted as the mount of aggregate, + * if not already mounted, and mount of the single/only fileset in * the aggregate; * * a file system/aggregate is represented by an internal inode * (aka mount inode) initialized with aggregate superblock; - * each vfs represents a fileset, and points to its "fileset inode + * each vfs represents a fileset, and points to its "fileset inode * allocation map inode" (aka fileset inode): - * (an aggregate itself is structured recursively as a filset: - * an internal vfs is constructed and points to its "fileset inode - * allocation map inode" (aka aggregate inode) where each inode - * represents a fileset inode) so that inode number is mapped to + * (an aggregate itself is structured recursively as a filset: + * an internal vfs is constructed and points to its "fileset inode + * allocation map inode" (aka aggregate inode) where each inode + * represents a fileset inode) so that inode number is mapped to * on-disk inode in uniform way at both aggregate and fileset level; * * each vnode/inode of a fileset is linked to its vfs (to facilitate @@ -41,7 +41,7 @@ * per aggregate information, e.g., block size, etc.) as well as * its file set inode. * - * aggregate + * aggregate * ipmnt * mntvfs -> fileset ipimap+ -> aggregate ipbmap -> aggregate ipaimap; * fileset vfs -> vp(1) <-> ... <-> vp(n) <->vproot; @@ -80,7 +80,7 @@ static int logMOUNT(struct super_block *sb); */ int jfs_mount(struct super_block *sb) { - int rc = 0; /* Return code */ + int rc = 0; /* Return code */ struct jfs_sb_info *sbi = JFS_SBI(sb); struct inode *ipaimap = NULL; struct inode *ipaimap2 = NULL; @@ -88,7 +88,7 @@ int jfs_mount(struct super_block *sb) struct inode *ipbmap = NULL; /* - * read/validate superblock + * read/validate superblock * (initialize mount inode from the superblock) */ if ((rc = chkSuper(sb))) { @@ -97,7 +97,7 @@ int jfs_mount(struct super_block *sb) ipaimap = diReadSpecial(sb, AGGREGATE_I, 0); if (ipaimap == NULL) { - jfs_err("jfs_mount: Faild to read AGGREGATE_I"); + jfs_err("jfs_mount: Failed to read AGGREGATE_I"); rc = -EIO; goto errout20; } @@ -147,8 +147,8 @@ int jfs_mount(struct super_block *sb) */ if ((sbi->mntflag & JFS_BAD_SAIT) == 0) { ipaimap2 = diReadSpecial(sb, AGGREGATE_I, 1); - if (ipaimap2 == 0) { - jfs_err("jfs_mount: Faild to read AGGREGATE_I"); + if (!ipaimap2) { + jfs_err("jfs_mount: Failed to read AGGREGATE_I"); rc = -EIO; goto errout35; } @@ -169,7 +169,7 @@ int jfs_mount(struct super_block *sb) sbi->ipaimap2 = NULL; /* - * mount (the only/single) fileset + * mount (the only/single) fileset */ /* * open fileset inode allocation map (aka fileset inode) @@ -195,7 +195,7 @@ int jfs_mount(struct super_block *sb) goto out; /* - * unwind on error + * unwind on error */ errout41: /* close fileset inode allocation map inode */ diFreeSpecial(ipimap); @@ -238,7 +238,7 @@ int jfs_mount(struct super_block *sb) */ int jfs_mount_rw(struct super_block *sb, int remount) { - struct jfs_sb_info *sbi = JFS_SBI(sb); + struct jfs_sb_info *sbi = JFS_SBI(sb); int rc; /* @@ -291,7 +291,7 @@ int jfs_mount_rw(struct super_block *sb, int remount) /* * chkSuper() * - * validate the superblock of the file system to be mounted and + * validate the superblock of the file system to be mounted and * get the file system parameters. * * returns @@ -426,7 +426,7 @@ int updateSuper(struct super_block *sb, uint state) jfs_err("updateSuper: bad state"); } else if (sbi->state == FM_DIRTY) return 0; - + if ((rc = readSuper(sb, &bh))) return rc; @@ -486,9 +486,9 @@ int readSuper(struct super_block *sb, struct buffer_head **bpp) * for this file system past this point in log. * it is harmless if mount fails. * - * note: MOUNT record is at aggregate level, not at fileset level, + * note: MOUNT record is at aggregate level, not at fileset level, * since log records of previous mounts of a fileset - * (e.g., AFTER record of extent allocation) have to be processed + * (e.g., AFTER record of extent allocation) have to be processed * to update block allocation map at aggregate level. */ static int logMOUNT(struct super_block *sb) diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h index fcf781bf31c..04847b8d307 100644 --- a/fs/jfs/jfs_superblock.h +++ b/fs/jfs/jfs_superblock.h @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_SUPERBLOCK @@ -21,14 +21,14 @@ /* * make the magic number something a human could read */ -#define JFS_MAGIC "JFS1" /* Magic word */ +#define JFS_MAGIC "JFS1" /* Magic word */ #define JFS_VERSION 2 /* Version number: Version 2 */ #define LV_NAME_SIZE 11 /* MUST BE 11 for OS/2 boot sector */ -/* - * aggregate superblock +/* + * aggregate superblock * * The name superblock is too close to super_block, so the name has been * changed to jfs_superblock. The utilities are still using the old name. @@ -40,7 +40,7 @@ struct jfs_superblock { __le64 s_size; /* 8: aggregate size in hardware/LVM blocks; * VFS: number of blocks */ - __le32 s_bsize; /* 4: aggregate block size in bytes; + __le32 s_bsize; /* 4: aggregate block size in bytes; * VFS: fragment size */ __le16 s_l2bsize; /* 2: log2 of s_bsize */ @@ -54,7 +54,7 @@ struct jfs_superblock { __le32 s_flag; /* 4: aggregate attributes: * see jfs_filsys.h */ - __le32 s_state; /* 4: mount/unmount/recovery state: + __le32 s_state; /* 4: mount/unmount/recovery state: * see jfs_filsys.h */ __le32 s_compress; /* 4: > 0 if data compression */ @@ -75,11 +75,11 @@ struct jfs_superblock { struct timestruc_t s_time; /* 8: time last updated */ __le32 s_fsckloglen; /* 4: Number of filesystem blocks reserved for - * the fsck service log. + * the fsck service log. * N.B. These blocks are divided among the * versions kept. This is not a per * version size. - * N.B. These blocks are included in the + * N.B. These blocks are included in the * length field of s_fsckpxd. */ s8 s_fscklog; /* 1: which fsck service log is most recent @@ -87,7 +87,7 @@ struct jfs_superblock { * 1 => the first one * 2 => the 2nd one */ - char s_fpack[11]; /* 11: file system volume name + char s_fpack[11]; /* 11: file system volume name * N.B. This must be 11 bytes to * conform with the OS/2 BootSector * requirements @@ -108,17 +108,15 @@ struct jfs_superblock { extern int readSuper(struct super_block *, struct buffer_head **); extern int updateSuper(struct super_block *, uint); +__printf(2, 3) extern void jfs_error(struct super_block *, const char *, ...); extern int jfs_mount(struct super_block *); extern int jfs_mount_rw(struct super_block *, int); extern int jfs_umount(struct super_block *); extern int jfs_umount_rw(struct super_block *); - -extern int jfs_stop_threads; -extern struct completion jfsIOwait; -extern wait_queue_head_t jfs_IO_thread_wait; -extern wait_queue_head_t jfs_commit_thread_wait; -extern wait_queue_head_t jfs_sync_thread_wait; extern int jfs_extendfs(struct super_block *, s64, int); +extern struct task_struct *jfsIOthread; +extern struct task_struct *jfsSyncThread; + #endif /*_H_JFS_SUPERBLOCK */ diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index b660c93c92d..564c4f279ac 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -4,21 +4,21 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* - * jfs_txnmgr.c: transaction manager + * jfs_txnmgr.c: transaction manager * * notes: * transaction starts with txBegin() and ends with txCommit() @@ -44,11 +44,12 @@ #include <linux/fs.h> #include <linux/vmalloc.h> -#include <linux/smp_lock.h> #include <linux/completion.h> -#include <linux/suspend.h> +#include <linux/freezer.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/kthread.h> +#include <linux/seq_file.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" @@ -60,7 +61,7 @@ #include "jfs_debug.h" /* - * transaction management structures + * transaction management structures */ static struct { int freetid; /* index of a free tid structure */ @@ -103,26 +104,25 @@ module_param(nTxLock, int, 0); MODULE_PARM_DESC(nTxLock, "Number of transaction locks (max:65536)"); -struct tblock *TxBlock; /* transaction block table */ -static int TxLockLWM; /* Low water mark for number of txLocks used */ -static int TxLockHWM; /* High water mark for number of txLocks used */ -static int TxLockVHWM; /* Very High water mark */ -struct tlock *TxLock; /* transaction lock table */ +struct tblock *TxBlock; /* transaction block table */ +static int TxLockLWM; /* Low water mark for number of txLocks used */ +static int TxLockHWM; /* High water mark for number of txLocks used */ +static int TxLockVHWM; /* Very High water mark */ +struct tlock *TxLock; /* transaction lock table */ /* - * transaction management lock + * transaction management lock */ static DEFINE_SPINLOCK(jfsTxnLock); -#define TXN_LOCK() spin_lock(&jfsTxnLock) -#define TXN_UNLOCK() spin_unlock(&jfsTxnLock) +#define TXN_LOCK() spin_lock(&jfsTxnLock) +#define TXN_UNLOCK() spin_unlock(&jfsTxnLock) #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock); #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) -DECLARE_WAIT_QUEUE_HEAD(jfs_sync_thread_wait); -DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); +static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); static int jfs_commit_thread_waking; /* @@ -135,8 +135,8 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) add_wait_queue(event, &wait); set_current_state(TASK_UNINTERRUPTIBLE); TXN_UNLOCK(); - schedule(); - current->state = TASK_RUNNING; + io_schedule(); + __set_current_state(TASK_RUNNING); remove_wait_queue(event, &wait); } @@ -149,7 +149,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) #define TXN_WAKEUP(event) wake_up_all(event) /* - * statistics + * statistics */ static struct { tid_t maxtid; /* 4: biggest tid ever used */ @@ -182,8 +182,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, static void LogSyncRelease(struct metapage * mp); /* - * transaction block/lock management - * --------------------------------- + * transaction block/lock management + * --------------------------------- */ /* @@ -207,7 +207,7 @@ static lid_t txLockAlloc(void) if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { jfs_info("txLockAlloc tlocks low"); jfs_tlocks_low = 1; - wake_up(&jfs_sync_thread_wait); + wake_up_process(jfsSyncThread); } return lid; @@ -228,9 +228,9 @@ static void txLockFree(lid_t lid) } /* - * NAME: txInit() + * NAME: txInit() * - * FUNCTION: initialize transaction management structures + * FUNCTION: initialize transaction management structures * * RETURN: * @@ -282,7 +282,7 @@ int txInit(void) TxLockVHWM = (nTxLock * 8) / 10; size = sizeof(struct tblock) * nTxBlock; - TxBlock = (struct tblock *) vmalloc(size); + TxBlock = vmalloc(size); if (TxBlock == NULL) return -ENOMEM; @@ -307,7 +307,7 @@ int txInit(void) * tlock id = 0 is reserved. */ size = sizeof(struct tlock) * nTxLock; - TxLock = (struct tlock *) vmalloc(size); + TxLock = vmalloc(size); if (TxLock == NULL) { vfree(TxBlock); return -ENOMEM; @@ -334,9 +334,9 @@ int txInit(void) } /* - * NAME: txExit() + * NAME: txExit() * - * FUNCTION: clean up when module is unloaded + * FUNCTION: clean up when module is unloaded */ void txExit(void) { @@ -347,12 +347,12 @@ void txExit(void) } /* - * NAME: txBegin() + * NAME: txBegin() * - * FUNCTION: start a transaction. + * FUNCTION: start a transaction. * - * PARAMETER: sb - superblock - * flag - force for nested tx; + * PARAMETER: sb - superblock + * flag - force for nested tx; * * RETURN: tid - transaction id * @@ -448,13 +448,13 @@ tid_t txBegin(struct super_block *sb, int flag) } /* - * NAME: txBeginAnon() + * NAME: txBeginAnon() * - * FUNCTION: start an anonymous transaction. + * FUNCTION: start an anonymous transaction. * Blocks if logsync or available tlocks are low to prevent * anonymous tlocks from depleting supply. * - * PARAMETER: sb - superblock + * PARAMETER: sb - superblock * * RETURN: none */ @@ -490,11 +490,11 @@ void txBeginAnon(struct super_block *sb) } /* - * txEnd() + * txEnd() * * function: free specified transaction block. * - * logsync barrier processing: + * logsync barrier processing: * * serialization: */ @@ -578,13 +578,13 @@ wakeup: } /* - * txLock() + * txLock() * * function: acquire a transaction lock on the specified <mp> * * parameter: * - * return: transaction lock id + * return: transaction lock id * * serialization: */ @@ -636,7 +636,7 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, * the inode of the page and available to all anonymous * transactions until txCommit() time at which point * they are transferred to the transaction tlock list of - * the commiting transaction of the inode) + * the committing transaction of the inode) */ if (xtid == 0) { tlck->tid = tid; @@ -830,19 +830,23 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, /* Only locks on ipimap or ipaimap should reach here */ /* assert(jfs_ip->fileset == AGGREGATE_I); */ if (jfs_ip->fileset != AGGREGATE_I) { - jfs_err("txLock: trying to lock locked page!"); - dump_mem("ip", ip, sizeof(struct inode)); - dump_mem("mp", mp, sizeof(struct metapage)); - dump_mem("Locker's tblk", tid_to_tblock(tid), - sizeof(struct tblock)); - dump_mem("Tlock", tlck, sizeof(struct tlock)); + printk(KERN_ERR "txLock: trying to lock locked page!"); + print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4, + ip, sizeof(*ip), 0); + print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4, + mp, sizeof(*mp), 0); + print_hex_dump(KERN_ERR, "Locker's tblock: ", + DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid), + sizeof(struct tblock), 0); + print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4, + tlck, sizeof(*tlck), 0); BUG(); } INCREMENT(stattx.waitlock); /* statistics */ TXN_UNLOCK(); release_metapage(mp); TXN_LOCK(); - xtid = tlck->tid; /* reaquire after dropping TXN_LOCK */ + xtid = tlck->tid; /* reacquire after dropping TXN_LOCK */ jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", tid, xtid, lid); @@ -858,17 +862,17 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, } /* - * NAME: txRelease() + * NAME: txRelease() * - * FUNCTION: Release buffers associated with transaction locks, but don't + * FUNCTION: Release buffers associated with transaction locks, but don't * mark homeok yet. The allows other transactions to modify * buffers, but won't let them go to disk until commit record * actually gets written. * * PARAMETER: - * tblk - + * tblk - * - * RETURN: Errors from subroutines. + * RETURN: Errors from subroutines. */ static void txRelease(struct tblock * tblk) { @@ -897,10 +901,10 @@ static void txRelease(struct tblock * tblk) } /* - * NAME: txUnlock() + * NAME: txUnlock() * - * FUNCTION: Initiates pageout of pages modified by tid in journalled - * objects and frees their lockwords. + * FUNCTION: Initiates pageout of pages modified by tid in journalled + * objects and frees their lockwords. */ static void txUnlock(struct tblock * tblk) { @@ -984,10 +988,10 @@ static void txUnlock(struct tblock * tblk) } /* - * txMaplock() + * txMaplock() * * function: allocate a transaction lock for freed page/entry; - * for freed page, maplock is used as xtlock/dtlock type; + * for freed page, maplock is used as xtlock/dtlock type; */ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) { @@ -1058,7 +1062,7 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) } /* - * txLinelock() + * txLinelock() * * function: allocate a transaction lock for log vector list */ @@ -1093,39 +1097,39 @@ struct linelock *txLinelock(struct linelock * tlock) } /* - * transaction commit management - * ----------------------------- + * transaction commit management + * ----------------------------- */ /* - * NAME: txCommit() - * - * FUNCTION: commit the changes to the objects specified in - * clist. For journalled segments only the - * changes of the caller are committed, ie by tid. - * for non-journalled segments the data are flushed to - * disk and then the change to the disk inode and indirect - * blocks committed (so blocks newly allocated to the - * segment will be made a part of the segment atomically). - * - * all of the segments specified in clist must be in - * one file system. no more than 6 segments are needed - * to handle all unix svcs. - * - * if the i_nlink field (i.e. disk inode link count) - * is zero, and the type of inode is a regular file or - * directory, or symbolic link , the inode is truncated - * to zero length. the truncation is committed but the - * VM resources are unaffected until it is closed (see - * iput and iclose). + * NAME: txCommit() + * + * FUNCTION: commit the changes to the objects specified in + * clist. For journalled segments only the + * changes of the caller are committed, ie by tid. + * for non-journalled segments the data are flushed to + * disk and then the change to the disk inode and indirect + * blocks committed (so blocks newly allocated to the + * segment will be made a part of the segment atomically). + * + * all of the segments specified in clist must be in + * one file system. no more than 6 segments are needed + * to handle all unix svcs. + * + * if the i_nlink field (i.e. disk inode link count) + * is zero, and the type of inode is a regular file or + * directory, or symbolic link , the inode is truncated + * to zero length. the truncation is committed but the + * VM resources are unaffected until it is closed (see + * iput and iclose). * * PARAMETER: * * RETURN: * * serialization: - * on entry the inode lock on each segment is assumed - * to be held. + * on entry the inode lock on each segment is assumed + * to be held. * * i/o error: */ @@ -1139,7 +1143,6 @@ int txCommit(tid_t tid, /* transaction identifier */ struct jfs_log *log; struct tblock *tblk; struct lrd *lrd; - int lsn; struct inode *ip; struct jfs_inode_info *jfs_ip; int k, n; @@ -1176,7 +1179,7 @@ int txCommit(tid_t tid, /* transaction identifier */ if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) tblk->xflag |= COMMIT_LAZY; /* - * prepare non-journaled objects for commit + * prepare non-journaled objects for commit * * flush data pages of non-journaled file * to prevent the file getting non-initialized disk blocks @@ -1187,7 +1190,7 @@ int txCommit(tid_t tid, /* transaction identifier */ cd.nip = nip; /* - * acquire transaction lock on (on-disk) inodes + * acquire transaction lock on (on-disk) inodes * * update on-disk inode from in-memory inode * acquiring transaction locks for AFTER records @@ -1231,10 +1234,8 @@ int txCommit(tid_t tid, /* transaction identifier */ * when we don't need to worry about it at all. * * if ((!S_ISDIR(ip->i_mode)) - * && (tblk->flag & COMMIT_DELETE) == 0) { - * filemap_fdatawrite(ip->i_mapping); - * filemap_fdatawait(ip->i_mapping); - * } + * && (tblk->flag & COMMIT_DELETE) == 0) + * filemap_write_and_wait(ip->i_mapping); */ /* @@ -1265,7 +1266,7 @@ int txCommit(tid_t tid, /* transaction identifier */ } /* - * write log records from transaction locks + * write log records from transaction locks * * txUpdateMap() resets XAD_NEW in XAD. */ @@ -1277,7 +1278,7 @@ int txCommit(tid_t tid, /* transaction identifier */ * lazy commit thread finishes processing */ if (tblk->xflag & COMMIT_DELETE) { - atomic_inc(&tblk->u.ip->i_count); + ihold(tblk->u.ip); /* * Avoid a rare deadlock * @@ -1288,7 +1289,14 @@ int txCommit(tid_t tid, /* transaction identifier */ * commit the transaction synchronously, so the last iput * will be done by the calling thread (or later) */ - if (tblk->u.ip->i_state & I_LOCK) + /* + * I believe this code is no longer needed. Splitting I_LOCK + * into two bits, I_NEW and I_SYNC should prevent this + * deadlock as well. But since I don't have a JFS testload + * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. + * Joern + */ + if (tblk->u.ip->i_state & I_SYNC) tblk->xflag &= ~COMMIT_LAZY; } @@ -1297,16 +1305,16 @@ int txCommit(tid_t tid, /* transaction identifier */ !test_cflag(COMMIT_Nolink, tblk->u.ip))); /* - * write COMMIT log record + * write COMMIT log record */ lrd->type = cpu_to_le16(LOG_COMMIT); lrd->length = 0; - lsn = lmLog(log, tblk, lrd, NULL); + lmLog(log, tblk, lrd, NULL); lmGroupCommit(log, tblk); /* - * - transaction is now committed - + * - transaction is now committed - */ /* @@ -1317,11 +1325,11 @@ int txCommit(tid_t tid, /* transaction identifier */ txForce(tblk); /* - * update allocation map. + * update allocation map. * * update inode allocation map and inode: * free pager lock on memory object of inode if any. - * update block allocation map. + * update block allocation map. * * txUpdateMap() resets XAD_NEW in XAD. */ @@ -1329,7 +1337,7 @@ int txCommit(tid_t tid, /* transaction identifier */ txUpdateMap(tblk); /* - * free transaction locks and pageout/free pages + * free transaction locks and pageout/free pages */ txRelease(tblk); @@ -1338,7 +1346,7 @@ int txCommit(tid_t tid, /* transaction identifier */ /* - * reset in-memory object state + * reset in-memory object state */ for (k = 0; k < cd.nip; k++) { ip = cd.iplist[k]; @@ -1361,11 +1369,11 @@ int txCommit(tid_t tid, /* transaction identifier */ } /* - * NAME: txLog() + * NAME: txLog() * - * FUNCTION: Writes AFTER log records for all lines modified - * by tid for segments specified by inodes in comdata. - * Code assumes only WRITELOCKS are recorded in lockwords. + * FUNCTION: Writes AFTER log records for all lines modified + * by tid for segments specified by inodes in comdata. + * Code assumes only WRITELOCKS are recorded in lockwords. * * PARAMETERS: * @@ -1424,12 +1432,12 @@ static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd) } /* - * diLog() + * diLog() * - * function: log inode tlock and format maplock to update bmap; + * function: log inode tlock and format maplock to update bmap; */ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck, struct commit * cd) + struct tlock * tlck, struct commit * cd) { int rc = 0; struct metapage *mp; @@ -1445,7 +1453,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, pxd = &lrd->log.redopage.pxd; /* - * inode after image + * inode after image */ if (tlck->type & tlckENTRY) { /* log after-image for logredo(): */ @@ -1459,7 +1467,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, tlck->flag |= tlckWRITEPAGE; } else if (tlck->type & tlckFREE) { /* - * free inode extent + * free inode extent * * (pages of the freed inode extent have been invalidated and * a maplock for free of the extent has been formatted at @@ -1501,7 +1509,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, jfs_err("diLog: UFO type tlck:0x%p", tlck); #ifdef _JFS_WIP /* - * alloc/free external EA extent + * alloc/free external EA extent * * a maplock for txUpdateMap() to update bPWMAP for alloc/free * of the extent has been formatted at txLock() time; @@ -1537,9 +1545,9 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * dataLog() + * dataLog() * - * function: log data tlock + * function: log data tlock */ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) @@ -1583,9 +1591,9 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * dtLog() + * dtLog() * - * function: log dtree tlock and format maplock to update bmap; + * function: log dtree tlock and format maplock to update bmap; */ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) @@ -1606,10 +1614,10 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); /* - * page extension via relocation: entry insertion; - * page extension in-place: entry insertion; - * new right page from page split, reinitialized in-line - * root from root page split: entry insertion; + * page extension via relocation: entry insertion; + * page extension in-place: entry insertion; + * new right page from page split, reinitialized in-line + * root from root page split: entry insertion; */ if (tlck->type & (tlckNEW | tlckEXTEND)) { /* log after-image of the new page for logredo(): @@ -1644,8 +1652,8 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * entry insertion/deletion, - * sibling page link update (old right page before split); + * entry insertion/deletion, + * sibling page link update (old right page before split); */ if (tlck->type & (tlckENTRY | tlckRELINK)) { /* log after-image for logredo(): */ @@ -1661,11 +1669,11 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * page deletion: page has been invalidated - * page relocation: source extent + * page deletion: page has been invalidated + * page relocation: source extent * - * a maplock for free of the page has been formatted - * at txLock() time); + * a maplock for free of the page has been formatted + * at txLock() time); */ if (tlck->type & (tlckFREE | tlckRELOCATE)) { /* log LOG_NOREDOPAGE of the deleted page for logredo() @@ -1686,9 +1694,9 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * xtLog() + * xtLog() * - * function: log xtree tlock and format maplock to update bmap; + * function: log xtree tlock and format maplock to update bmap; */ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) @@ -1728,8 +1736,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, xadlock = (struct xdlistlock *) maplock; /* - * entry insertion/extension; - * sibling page link update (old right page before split); + * entry insertion/extension; + * sibling page link update (old right page before split); */ if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { /* log after-image for logredo(): @@ -1804,7 +1812,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * page deletion: file deletion/truncation (ref. xtTruncate()) + * page deletion: file deletion/truncation (ref. xtTruncate()) * * (page will be invalidated after log is written and bmap * is updated from the page); @@ -1911,17 +1919,18 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * page/entry truncation: file truncation (ref. xtTruncate()) + * page/entry truncation: file truncation (ref. xtTruncate()) * - * |----------+------+------+---------------| - * | | | - * | | hwm - hwm before truncation - * | next - truncation point - * lwm - lwm before truncation + * |----------+------+------+---------------| + * | | | + * | | hwm - hwm before truncation + * | next - truncation point + * lwm - lwm before truncation * header ? */ if (tlck->type & tlckTRUNCATE) { - pxd_t pxd; /* truncated extent of xad */ + /* This odd declaration suppresses a bogus gcc warning */ + pxd_t pxd = pxd; /* truncated extent of xad */ int twm; /* @@ -1939,7 +1948,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, twm = xtlck->twm.offset; /* - * write log records + * write log records */ /* log after-image for logredo(): * @@ -1999,7 +2008,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * format maplock(s) for txUpdateMap() to update bmap + * format maplock(s) for txUpdateMap() to update bmap */ maplock->index = 0; @@ -2028,8 +2037,6 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * truncate entry XAD[twm == next - 1]: */ if (twm == next - 1) { - struct pxd_lock *pxdlock; - /* format a maplock for txUpdateMap() to update bmap * to free truncated delta extent of the truncated * entry XAD[next - 1]; @@ -2073,9 +2080,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * mapLog() + * mapLog() * - * function: log from maplock of freed data extents; + * function: log from maplock of freed data extents; */ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) @@ -2085,7 +2092,7 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, pxd_t *pxd; /* - * page relocation: free the source page extent + * page relocation: free the source page extent * * a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time saving the src @@ -2159,10 +2166,10 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, } /* - * txEA() + * txEA() * - * function: acquire maplock for EA/ACL extents or - * set COMMIT_INLINE flag; + * function: acquire maplock for EA/ACL extents or + * set COMMIT_INLINE flag; */ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) { @@ -2211,10 +2218,10 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) } /* - * txForce() + * txForce() * * function: synchronously write pages locked by transaction - * after txLog() but before txUpdateMap(); + * after txLog() but before txUpdateMap(); */ static void txForce(struct tblock * tblk) { @@ -2277,10 +2284,10 @@ static void txForce(struct tblock * tblk) } /* - * txUpdateMap() + * txUpdateMap() * - * function: update persistent allocation map (and working map - * if appropriate); + * function: update persistent allocation map (and working map + * if appropriate); * * parameter: */ @@ -2302,7 +2309,7 @@ static void txUpdateMap(struct tblock * tblk) /* - * update block allocation map + * update block allocation map * * update allocation state in pmap (and wmap) and * update lsn of the pmap page; @@ -2386,7 +2393,7 @@ static void txUpdateMap(struct tblock * tblk) } } /* - * update inode allocation map + * update inode allocation map * * update allocation state in pmap and * update lsn of the pmap page; @@ -2395,7 +2402,7 @@ static void txUpdateMap(struct tblock * tblk) * unlock mapper/write lock */ if (tblk->xflag & COMMIT_CREATE) { - diUpdatePMap(ipimap, tblk->ino, FALSE, tblk); + diUpdatePMap(ipimap, tblk->ino, false, tblk); /* update persistent block allocation map * for the allocation of inode extent; */ @@ -2405,30 +2412,30 @@ static void txUpdateMap(struct tblock * tblk) txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk); } else if (tblk->xflag & COMMIT_DELETE) { ip = tblk->u.ip; - diUpdatePMap(ipimap, ip->i_ino, TRUE, tblk); + diUpdatePMap(ipimap, ip->i_ino, true, tblk); iput(ip); } } /* - * txAllocPMap() + * txAllocPMap() * * function: allocate from persistent map; * * parameter: - * ipbmap - - * malock - - * xad list: - * pxd: - * - * maptype - - * allocate from persistent map; - * free from persistent map; - * (e.g., tmp file - free from working map at releae - * of last reference); - * free from persistent and working map; - * - * lsn - log sequence number; + * ipbmap - + * malock - + * xad list: + * pxd: + * + * maptype - + * allocate from persistent map; + * free from persistent map; + * (e.g., tmp file - free from working map at releae + * of last reference); + * free from persistent and working map; + * + * lsn - log sequence number; */ static void txAllocPMap(struct inode *ip, struct maplock * maplock, struct tblock * tblk) @@ -2453,7 +2460,7 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock, if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { xaddr = addressXAD(xad); xlen = lengthXAD(xad); - dbUpdatePMap(ipbmap, FALSE, xaddr, + dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); xad->flag &= ~(XAD_NEW | XAD_EXTENDED); jfs_info("allocPMap: xaddr:0x%lx xlen:%d", @@ -2464,7 +2471,7 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock, pxdlock = (struct pxd_lock *) maplock; xaddr = addressPXD(&pxdlock->pxd); xlen = lengthPXD(&pxdlock->pxd); - dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, tblk); + dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } else { /* (maplock->flag & mlckALLOCPXDLIST) */ @@ -2473,7 +2480,7 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock, for (n = 0; n < pxdlistlock->count; n++, pxd++) { xaddr = addressPXD(pxd); xlen = lengthPXD(pxd); - dbUpdatePMap(ipbmap, FALSE, xaddr, (s64) xlen, + dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); @@ -2482,9 +2489,9 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock, } /* - * txFreeMap() + * txFreeMap() * - * function: free from persistent and/or working map; + * function: free from persistent and/or working map; * * todo: optimization */ @@ -2515,7 +2522,7 @@ void txFreeMap(struct inode *ip, if (!(xad->flag & XAD_NEW)) { xaddr = addressXAD(xad); xlen = lengthXAD(xad); - dbUpdatePMap(ipbmap, TRUE, xaddr, + dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, tblk); jfs_info("freePMap: xaddr:0x%lx " "xlen:%d", @@ -2526,7 +2533,7 @@ void txFreeMap(struct inode *ip, pxdlock = (struct pxd_lock *) maplock; xaddr = addressPXD(&pxdlock->pxd); xlen = lengthPXD(&pxdlock->pxd); - dbUpdatePMap(ipbmap, TRUE, xaddr, (s64) xlen, + dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, tblk); jfs_info("freePMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); @@ -2537,7 +2544,7 @@ void txFreeMap(struct inode *ip, for (n = 0; n < pxdlistlock->count; n++, pxd++) { xaddr = addressPXD(pxd); xlen = lengthPXD(pxd); - dbUpdatePMap(ipbmap, TRUE, xaddr, + dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, tblk); jfs_info("freePMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); @@ -2583,9 +2590,9 @@ void txFreeMap(struct inode *ip, } /* - * txFreelock() + * txFreelock() * - * function: remove tlock from inode anonymous locklist + * function: remove tlock from inode anonymous locklist */ void txFreelock(struct inode *ip) { @@ -2623,7 +2630,7 @@ void txFreelock(struct inode *ip) } /* - * txAbort() + * txAbort() * * function: abort tx before commit; * @@ -2677,13 +2684,13 @@ void txAbort(tid_t tid, int dirty) * mark filesystem dirty */ if (dirty) - jfs_error(tblk->sb, "txAbort"); + jfs_error(tblk->sb, "\n"); return; } /* - * txLazyCommit(void) + * txLazyCommit(void) * * All transactions except those changing ipimap (COMMIT_FORCE) are * processed by this routine. This insures that the inode and block @@ -2732,7 +2739,7 @@ static void txLazyCommit(struct tblock * tblk) } /* - * jfs_lazycommit(void) + * jfs_lazycommit(void) * * To be run as a kernel daemon. If lbmIODone is called in an interrupt * context, or where blocking is not wanted, this routine will process @@ -2745,10 +2752,6 @@ int jfs_lazycommit(void *arg) unsigned long flags; struct jfs_sb_info *sbi; - daemonize("jfsCommit"); - - complete(&jfsIOwait); - do { LAZY_LOCK(flags); jfs_commit_thread_waking = 0; /* OK to wake another thread */ @@ -2797,7 +2800,7 @@ int jfs_lazycommit(void *arg) if (freezing(current)) { LAZY_UNLOCK(flags); - refrigerator(); + try_to_freeze(); } else { DECLARE_WAITQUEUE(wq, current); @@ -2805,16 +2808,16 @@ int jfs_lazycommit(void *arg) set_current_state(TASK_INTERRUPTIBLE); LAZY_UNLOCK(flags); schedule(); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); remove_wait_queue(&jfs_commit_thread_wait, &wq); } - } while (!jfs_stop_threads); + } while (!kthread_should_stop()); if (!list_empty(&TxAnchor.unlock_queue)) jfs_err("jfs_lazycommit being killed w/pending transactions!"); else jfs_info("jfs_lazycommit being killed\n"); - complete_and_exit(&jfsIOwait, 0); + return 0; } void txLazyUnlock(struct tblock * tblk) @@ -2878,10 +2881,10 @@ restart: */ TXN_UNLOCK(); tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); - down(&jfs_ip->commit_sem); + mutex_lock(&jfs_ip->commit_mutex); txCommit(tid, 1, &ip, 0); txEnd(tid); - up(&jfs_ip->commit_sem); + mutex_unlock(&jfs_ip->commit_mutex); /* * Just to be safe. I don't know how * long we can run without blocking @@ -2921,7 +2924,7 @@ void txResume(struct super_block *sb) } /* - * jfs_sync(void) + * jfs_sync(void) * * To be run as a kernel daemon. This is awakened when tlocks run low. * We write any inodes that have anonymous tlocks so they will become @@ -2931,13 +2934,8 @@ int jfs_sync(void *arg) { struct inode *ip; struct jfs_inode_info *jfs_ip; - int rc; tid_t tid; - daemonize("jfsSync"); - - complete(&jfsIOwait); - do { /* * write each inode on the anonymous inode list @@ -2954,16 +2952,16 @@ int jfs_sync(void *arg) * Inode is being freed */ list_del_init(&jfs_ip->anon_inode_list); - } else if (! down_trylock(&jfs_ip->commit_sem)) { + } else if (mutex_trylock(&jfs_ip->commit_mutex)) { /* * inode will be removed from anonymous list * when it is committed */ TXN_UNLOCK(); tid = txBegin(ip->i_sb, COMMIT_INODE); - rc = txCommit(tid, 1, &ip, 0); + txCommit(tid, 1, &ip, 0); txEnd(tid); - up(&jfs_ip->commit_sem); + mutex_unlock(&jfs_ip->commit_mutex); iput(ip); /* @@ -2973,18 +2971,15 @@ int jfs_sync(void *arg) cond_resched(); TXN_LOCK(); } else { - /* We can't get the commit semaphore. It may + /* We can't get the commit mutex. It may * be held by a thread waiting for tlock's * so let's not block here. Save it to * put back on the anon_list. */ - /* Take off anon_list */ - list_del(&jfs_ip->anon_inode_list); - - /* Put on anon_list2 */ - list_add(&jfs_ip->anon_inode_list, - &TxAnchor.anon_list2); + /* Move from anon_list to anon_list2 */ + list_move(&jfs_ip->anon_inode_list, + &TxAnchor.anon_list2); TXN_UNLOCK(); iput(ip); @@ -2996,29 +2991,22 @@ int jfs_sync(void *arg) if (freezing(current)) { TXN_UNLOCK(); - refrigerator(); + try_to_freeze(); } else { - DECLARE_WAITQUEUE(wq, current); - - add_wait_queue(&jfs_sync_thread_wait, &wq); set_current_state(TASK_INTERRUPTIBLE); TXN_UNLOCK(); schedule(); - current->state = TASK_RUNNING; - remove_wait_queue(&jfs_sync_thread_wait, &wq); + __set_current_state(TASK_RUNNING); } - } while (!jfs_stop_threads); + } while (!kthread_should_stop()); jfs_info("jfs_sync being killed"); - complete_and_exit(&jfsIOwait, 0); + return 0; } #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) -int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, - int *eof, void *data) +static int jfs_txanchor_proc_show(struct seq_file *m, void *v) { - int len = 0; - off_t begin; char *freewait; char *freelockwait; char *lowlockwait; @@ -3030,7 +3018,7 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, lowlockwait = waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; - len += sprintf(buffer, + seq_printf(m, "JFS TxAnchor\n" "============\n" "freetid = %d\n" @@ -3049,31 +3037,27 @@ int jfs_txanchor_read(char *buffer, char **start, off_t offset, int length, TxAnchor.tlocksInUse, jfs_tlocks_low, list_empty(&TxAnchor.unlock_queue) ? "" : "not "); + return 0; +} - begin = offset; - *start = buffer + begin; - len -= begin; - - if (len > length) - len = length; - else - *eof = 1; - - if (len < 0) - len = 0; - - return len; +static int jfs_txanchor_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, jfs_txanchor_proc_show, NULL); } + +const struct file_operations jfs_txanchor_proc_fops = { + .owner = THIS_MODULE, + .open = jfs_txanchor_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) -int jfs_txstats_read(char *buffer, char **start, off_t offset, int length, - int *eof, void *data) +static int jfs_txstats_proc_show(struct seq_file *m, void *v) { - int len = 0; - off_t begin; - - len += sprintf(buffer, + seq_printf(m, "JFS TxStats\n" "===========\n" "calls to txBegin = %d\n" @@ -3094,19 +3078,19 @@ int jfs_txstats_read(char *buffer, char **start, off_t offset, int length, TxStat.txBeginAnon_lockslow, TxStat.txLockAlloc, TxStat.txLockAlloc_freelock); + return 0; +} - begin = offset; - *start = buffer + begin; - len -= begin; - - if (len > length) - len = length; - else - *eof = 1; - - if (len < 0) - len = 0; - - return len; +static int jfs_txstats_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, jfs_txstats_proc_show, NULL); } + +const struct file_operations jfs_txstats_proc_fops = { + .owner = THIS_MODULE, + .open = jfs_txstats_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h index 0e4dc4514c4..ab728893701 100644 --- a/fs/jfs/jfs_txnmgr.h +++ b/fs/jfs/jfs_txnmgr.h @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_TXNMGR @@ -94,7 +94,7 @@ extern struct tblock *TxBlock; /* transaction block table */ */ struct tlock { lid_t next; /* 2: index next lockword on tid locklist - * next lockword on freelist + * next lockword on freelist */ tid_t tid; /* 2: transaction id holding lock */ @@ -179,7 +179,7 @@ struct linelock { /* (8) */ struct lv lv[20]; /* 40: */ -}; /* (48) */ +}; /* (48) */ #define dt_lock linelock @@ -211,8 +211,8 @@ struct xtlock { * at tlock.lock/linelock: watch for alignment; * N.B. next field may be set by linelock, and should not * be modified by maplock; - * N.B. index of the first pxdlock specifies index of next - * free maplock (i.e., number of maplock) in the tlock; + * N.B. index of the first pxdlock specifies index of next + * free maplock (i.e., number of maplock) in the tlock; */ struct maplock { lid_t next; /* 2: */ diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h index 5bfad39a207..43ea3713c08 100644 --- a/fs/jfs/jfs_types.h +++ b/fs/jfs/jfs_types.h @@ -21,7 +21,7 @@ /* * jfs_types.h: * - * basic type/utility definitions + * basic type/utility definitions * * note: this header file must be the 1st include file * of JFS include list in all JFS .c file. @@ -54,41 +54,8 @@ struct timestruc_t { */ #define LEFTMOSTONE 0x80000000 -#define HIGHORDER 0x80000000u /* high order bit on */ -#define ONES 0xffffffffu /* all bit on */ - -typedef int boolean_t; -#define TRUE 1 -#define FALSE 0 - -/* - * logical xd (lxd) - */ -typedef struct { - unsigned len:24; - unsigned off1:8; - u32 off2; -} lxd_t; - -/* lxd_t field construction */ -#define LXDlength(lxd, length32) ( (lxd)->len = length32 ) -#define LXDoffset(lxd, offset64)\ -{\ - (lxd)->off1 = ((s64)offset64) >> 32;\ - (lxd)->off2 = (offset64) & 0xffffffff;\ -} - -/* lxd_t field extraction */ -#define lengthLXD(lxd) ( (lxd)->len ) -#define offsetLXD(lxd)\ - ( ((s64)((lxd)->off1)) << 32 | (lxd)->off2 ) - -/* lxd list */ -struct lxdlist { - s16 maxnlxd; - s16 nlxd; - lxd_t *lxd; -}; +#define HIGHORDER 0x80000000u /* high order bit on */ +#define ONES 0xffffffffu /* all bit on */ /* * physical xd (pxd) @@ -152,7 +119,7 @@ typedef struct { #define sizeDXD(dxd) le32_to_cpu((dxd)->size) /* - * directory entry argument + * directory entry argument */ struct component_name { int namlen; @@ -164,14 +131,14 @@ struct component_name { * DASD limit information - stored in directory inode */ struct dasd { - u8 thresh; /* Alert Threshold (in percent) */ - u8 delta; /* Alert Threshold delta (in percent) */ + u8 thresh; /* Alert Threshold (in percent) */ + u8 delta; /* Alert Threshold delta (in percent) */ u8 rsrvd1; - u8 limit_hi; /* DASD limit (in logical blocks) */ - __le32 limit_lo; /* DASD limit (in logical blocks) */ + u8 limit_hi; /* DASD limit (in logical blocks) */ + __le32 limit_lo; /* DASD limit (in logical blocks) */ u8 rsrvd2[3]; - u8 used_hi; /* DASD usage (in logical blocks) */ - __le32 used_lo; /* DASD usage (in logical blocks) */ + u8 used_hi; /* DASD usage (in logical blocks) */ + __le32 used_lo; /* DASD usage (in logical blocks) */ }; #define DASDLIMIT(dasdp) \ diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index 5cf91785b54..7971f37534a 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -22,8 +22,8 @@ * note: file system in transition to aggregate/fileset: * (ref. jfs_mount.c) * - * file system unmount is interpreted as mount of the single/only - * fileset in the aggregate and, if unmount of the last fileset, + * file system unmount is interpreted as mount of the single/only + * fileset in the aggregate and, if unmount of the last fileset, * as unmount of the aggerate; */ @@ -60,13 +60,13 @@ int jfs_umount(struct super_block *sb) jfs_info("UnMount JFS: sb:0x%p", sb); /* - * update superblock and close log + * update superblock and close log * * if mounted read-write and log based recovery was enabled */ if ((log = sbi->log)) /* - * Wait for outstanding transactions to be written to log: + * Wait for outstanding transactions to be written to log: */ jfs_flush_journal(log, 2); @@ -108,22 +108,21 @@ int jfs_umount(struct super_block *sb) * Make sure all metadata makes it to disk before we mark * the superblock as clean */ - filemap_fdatawrite(sbi->direct_inode->i_mapping); - filemap_fdatawait(sbi->direct_inode->i_mapping); + filemap_write_and_wait(sbi->direct_inode->i_mapping); /* * ensure all file system file pages are propagated to their - * home blocks on disk (and their in-memory buffer pages are + * home blocks on disk (and their in-memory buffer pages are * invalidated) BEFORE updating file system superblock state - * (to signify file system is unmounted cleanly, and thus in - * consistent state) and log superblock active file system + * (to signify file system is unmounted cleanly, and thus in + * consistent state) and log superblock active file system * list (to signify skip logredo()). */ if (log) { /* log = NULL if read-only mount */ updateSuper(sb, FM_CLEAN); /* - * close log: + * close log: * * remove file system from log active file system list. */ @@ -143,7 +142,7 @@ int jfs_umount_rw(struct super_block *sb) return 0; /* - * close log: + * close log: * * remove file system from log active file system list. */ @@ -161,8 +160,7 @@ int jfs_umount_rw(struct super_block *sb) * mark the superblock clean before everything is flushed to * disk. */ - filemap_fdatawrite(sbi->direct_inode->i_mapping); - filemap_fdatawait(sbi->direct_inode->i_mapping); + filemap_write_and_wait(sbi->direct_inode->i_mapping); updateSuper(sb, FM_CLEAN); diff --git a/fs/jfs/jfs_unicode.c b/fs/jfs/jfs_unicode.c index f327decfb15..c7de6f5bbef 100644 --- a/fs/jfs/jfs_unicode.c +++ b/fs/jfs/jfs_unicode.c @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -57,8 +57,8 @@ int jfs_strfromUCS_le(char *to, const __le16 * from, warn--; warn_again--; printk(KERN_ERR - "non-latin1 character 0x%x found in JFS file name\n", - le16_to_cpu(from[i])); + "non-latin1 character 0x%x found in JFS file name\n", + le16_to_cpu(from[i])); printk(KERN_ERR "mount with iocharset=utf8 to access\n"); } @@ -124,7 +124,7 @@ int get_UCSname(struct component_name * uniName, struct dentry *dentry) kmalloc((length + 1) * sizeof(wchar_t), GFP_NOFS); if (uniName->name == NULL) - return -ENOSPC; + return -ENOMEM; uniName->namlen = jfs_strtoUCS(uniName->name, dentry->d_name.name, length, nls_tab); diff --git a/fs/jfs/jfs_unicode.h b/fs/jfs/jfs_unicode.h index 69e25ebe87a..8f0f02cb6ca 100644 --- a/fs/jfs/jfs_unicode.h +++ b/fs/jfs/jfs_unicode.h @@ -1,24 +1,25 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2002 - * Portions Copyright (c) Christoph Hellwig, 2001-2002 + * Copyright (C) International Business Machines Corp., 2000-2002 + * Portions Copyright (C) Christoph Hellwig, 2001-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_UNICODE #define _H_JFS_UNICODE +#include <linux/slab.h> #include <asm/byteorder.h> #include "jfs_types.h" diff --git a/fs/jfs/jfs_uniupr.c b/fs/jfs/jfs_uniupr.c index 4ab185d2630..cfe50666d31 100644 --- a/fs/jfs/jfs_uniupr.c +++ b/fs/jfs/jfs_uniupr.c @@ -1,18 +1,18 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2002 + * Copyright (C) International Business Machines Corp., 2000-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h index 25e9990bccd..e8d717dabca 100644 --- a/fs/jfs/jfs_xattr.h +++ b/fs/jfs/jfs_xattr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2002 + * Copyright (C) International Business Machines Corp., 2000-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -61,11 +61,14 @@ extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t); extern ssize_t jfs_listxattr(struct dentry *, char *, size_t); extern int jfs_removexattr(struct dentry *, const char *); +extern const struct xattr_handler *jfs_xattr_handlers[]; + #ifdef CONFIG_JFS_SECURITY -extern int jfs_init_security(tid_t, struct inode *, struct inode *); +extern int jfs_init_security(tid_t, struct inode *, struct inode *, + const struct qstr *); #else static inline int jfs_init_security(tid_t tid, struct inode *inode, - struct inode *dir) + struct inode *dir, const struct qstr *qstr) { return 0; } diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index e72f4ebb6e9..5ad7748860c 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -3,24 +3,26 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* - * jfs_xtree.c: extent allocation descriptor B+-tree manager + * jfs_xtree.c: extent allocation descriptor B+-tree manager */ #include <linux/fs.h> +#include <linux/module.h> #include <linux/quotaops.h> +#include <linux/seq_file.h> #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_metapage.h" @@ -32,57 +34,58 @@ /* * xtree local flag */ -#define XT_INSERT 0x00000001 +#define XT_INSERT 0x00000001 /* - * xtree key/entry comparison: extent offset + * xtree key/entry comparison: extent offset * * return: - * -1: k < start of extent - * 0: start_of_extent <= k <= end_of_extent - * 1: k > end_of_extent + * -1: k < start of extent + * 0: start_of_extent <= k <= end_of_extent + * 1: k > end_of_extent */ #define XT_CMP(CMP, K, X, OFFSET64)\ {\ - OFFSET64 = offsetXAD(X);\ - (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ - ((K) < OFFSET64) ? -1 : 0;\ + OFFSET64 = offsetXAD(X);\ + (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\ + ((K) < OFFSET64) ? -1 : 0;\ } /* write a xad entry */ #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\ {\ - (XAD)->flag = (FLAG);\ - XADoffset((XAD), (OFF));\ - XADlength((XAD), (LEN));\ - XADaddress((XAD), (ADDR));\ + (XAD)->flag = (FLAG);\ + XADoffset((XAD), (OFF));\ + XADlength((XAD), (LEN));\ + XADaddress((XAD), (ADDR));\ } #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot) /* get page buffer for specified block address */ /* ToDo: Replace this ugly macro with a function */ -#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ -{\ - BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ - if (!(RC))\ - {\ - if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ - (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ - (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ - {\ - jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ - BT_PUTPAGE(MP);\ - MP = NULL;\ - RC = -EIO;\ - }\ - }\ -} +#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ +do { \ + BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot); \ + if (!(RC)) { \ + if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \ + (le16_to_cpu((P)->header.nextindex) > \ + le16_to_cpu((P)->header.maxentry)) || \ + (le16_to_cpu((P)->header.maxentry) > \ + (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \ + jfs_error((IP)->i_sb, \ + "XT_GETPAGE: xtree page corrupt\n"); \ + BT_PUTPAGE(MP); \ + MP = NULL; \ + RC = -EIO; \ + } \ + } \ +} while (0) /* for consistency */ #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) -#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ +#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \ BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot) /* xtree entry parameter descriptor */ struct xtsplit { @@ -97,7 +100,7 @@ struct xtsplit { /* - * statistics + * statistics */ #ifdef CONFIG_JFS_STATISTICS static struct { @@ -136,7 +139,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp); #endif /* _STILL_TO_PORT */ /* - * xtLookup() + * xtLookup() * * function: map a single page into a physical extent; */ @@ -162,11 +165,8 @@ int xtLookup(struct inode *ip, s64 lstart, /* is lookup offset beyond eof ? */ size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> JFS_SBI(ip->i_sb)->l2bsize; - if (lstart >= size) { - jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)", - (ulong) lstart, (ulong) size); + if (lstart >= size) return 0; - } } /* @@ -179,7 +179,7 @@ int xtLookup(struct inode *ip, s64 lstart, } /* - * compute the physical extent covering logical extent + * compute the physical extent covering logical extent * * N.B. search may have failed (e.g., hole in sparse file), * and returned the index of the next entry. @@ -218,281 +218,23 @@ int xtLookup(struct inode *ip, s64 lstart, return rc; } - /* - * xtLookupList() + * xtSearch() * - * function: map a single logical extent into a list of physical extent; - * - * parameter: - * struct inode *ip, - * struct lxdlist *lxdlist, lxd list (in) - * struct xadlist *xadlist, xad list (in/out) - * int flag) - * - * coverage of lxd by xad under assumption of - * . lxd's are ordered and disjoint. - * . xad's are ordered and disjoint. - * - * return: - * 0: success - * - * note: a page being written (even a single byte) is backed fully, - * except the last page which is only backed with blocks - * required to cover the last byte; - * the extent backing a page is fully contained within an xad; - */ -int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, - struct xadlist * xadlist, int flag) -{ - int rc = 0; - struct btstack btstack; - int cmp; - s64 bn; - struct metapage *mp; - xtpage_t *p; - int index; - lxd_t *lxd; - xad_t *xad, *pxd; - s64 size, lstart, lend, xstart, xend, pstart; - s64 llen, xlen, plen; - s64 xaddr, paddr; - int nlxd, npxd, maxnpxd; - - npxd = xadlist->nxad = 0; - maxnpxd = xadlist->maxnxad; - pxd = xadlist->xad; - - nlxd = lxdlist->nlxd; - lxd = lxdlist->lxd; - - lstart = offsetLXD(lxd); - llen = lengthLXD(lxd); - lend = lstart + llen; - - size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >> - JFS_SBI(ip->i_sb)->l2bsize; - - /* - * search for the xad entry covering the logical extent - */ - search: - if (lstart >= size) - return 0; - - if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0))) - return rc; - - /* - * compute the physical extent covering logical extent - * - * N.B. search may have failed (e.g., hole in sparse file), - * and returned the index of the next entry. - */ -//map: - /* retrieve search result */ - XT_GETSEARCH(ip, btstack.top, bn, mp, p, index); - - /* is xad on the next sibling page ? */ - if (index == le16_to_cpu(p->header.nextindex)) { - if (p->header.flag & BT_ROOT) - goto mapend; - - if ((bn = le64_to_cpu(p->header.next)) == 0) - goto mapend; - - XT_PUTPAGE(mp); - - /* get next sibling page */ - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - index = XTENTRYSTART; - } - - xad = &p->xad[index]; - - /* - * is lxd covered by xad ? - */ - compare: - xstart = offsetXAD(xad); - xlen = lengthXAD(xad); - xend = xstart + xlen; - xaddr = addressXAD(xad); - - compare1: - if (xstart < lstart) - goto compare2; - - /* (lstart <= xstart) */ - - /* lxd is NOT covered by xad */ - if (lend <= xstart) { - /* - * get next lxd - */ - if (--nlxd == 0) - goto mapend; - lxd++; - - lstart = offsetLXD(lxd); - llen = lengthLXD(lxd); - lend = lstart + llen; - if (lstart >= size) - goto mapend; - - /* compare with the current xad */ - goto compare1; - } - /* lxd is covered by xad */ - else { /* (xstart < lend) */ - - /* initialize new pxd */ - pstart = xstart; - plen = min(lend - xstart, xlen); - paddr = xaddr; - - goto cover; - } - - /* (xstart < lstart) */ - compare2: - /* lxd is covered by xad */ - if (lstart < xend) { - /* initialize new pxd */ - pstart = lstart; - plen = min(xend - lstart, llen); - paddr = xaddr + (lstart - xstart); - - goto cover; - } - /* lxd is NOT covered by xad */ - else { /* (xend <= lstart) */ - - /* - * get next xad - * - * linear search next xad covering lxd on - * the current xad page, and then tree search - */ - if (index == le16_to_cpu(p->header.nextindex) - 1) { - if (p->header.flag & BT_ROOT) - goto mapend; - - XT_PUTPAGE(mp); - goto search; - } else { - index++; - xad++; - - /* compare with new xad */ - goto compare; - } - } - - /* - * lxd is covered by xad and a new pxd has been initialized - * (lstart <= xstart < lend) or (xstart < lstart < xend) - */ - cover: - /* finalize pxd corresponding to current xad */ - XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr); - - if (++npxd >= maxnpxd) - goto mapend; - pxd++; - - /* - * lxd is fully covered by xad - */ - if (lend <= xend) { - /* - * get next lxd - */ - if (--nlxd == 0) - goto mapend; - lxd++; - - lstart = offsetLXD(lxd); - llen = lengthLXD(lxd); - lend = lstart + llen; - if (lstart >= size) - goto mapend; - - /* - * test for old xad covering new lxd - * (old xstart < new lstart) - */ - goto compare2; - } - /* - * lxd is partially covered by xad - */ - else { /* (xend < lend) */ - - /* - * get next xad - * - * linear search next xad covering lxd on - * the current xad page, and then next xad page search - */ - if (index == le16_to_cpu(p->header.nextindex) - 1) { - if (p->header.flag & BT_ROOT) - goto mapend; - - if ((bn = le64_to_cpu(p->header.next)) == 0) - goto mapend; - - XT_PUTPAGE(mp); - - /* get next sibling page */ - XT_GETPAGE(ip, bn, mp, PSIZE, p, rc); - if (rc) - return rc; - - index = XTENTRYSTART; - xad = &p->xad[index]; - } else { - index++; - xad++; - } - - /* - * test for new xad covering old lxd - * (old lstart < new xstart) - */ - goto compare; - } - - mapend: - xadlist->nxad = npxd; - -//out: - XT_PUTPAGE(mp); - - return rc; -} - - -/* - * xtSearch() - * - * function: search for the xad entry covering specified offset. + * function: search for the xad entry covering specified offset. * * parameters: - * ip - file object; - * xoff - extent offset; - * nextp - address of next extent (if any) for search miss - * cmpp - comparison result: - * btstack - traverse stack; - * flag - search process flag (XT_INSERT); + * ip - file object; + * xoff - extent offset; + * nextp - address of next extent (if any) for search miss + * cmpp - comparison result: + * btstack - traverse stack; + * flag - search process flag (XT_INSERT); * * returns: - * btstack contains (bn, index) of search path traversed to the entry. - * *cmpp is set to result of comparison with the entry returned. - * the page containing the entry is pinned at exit. + * btstack contains (bn, index) of search path traversed to the entry. + * *cmpp is set to result of comparison with the entry returned. + * the page containing the entry is pinned at exit. */ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, int *cmpp, struct btstack * btstack, int flag) @@ -517,7 +259,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, btstack->nsplit = 0; /* - * search down tree from root: + * search down tree from root: * * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of * internal page, child page Pi contains entry with k, Ki <= K < Kj. @@ -642,7 +384,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, XT_CMP(cmp, xoff, &p->xad[index], t64); if (cmp == 0) { /* - * search hit + * search hit */ /* search hit - leaf page: * return the entry found @@ -692,7 +434,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, } /* - * search miss + * search miss * * base is the smallest index with key (Kj) greater than * search key (K) and may be zero or maxentry index. @@ -757,6 +499,11 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, nsplit = 0; /* push (bn, index) of the parent page/entry */ + if (BT_STACK_FULL(btstack)) { + jfs_error(ip->i_sb, "stack overrun!\n"); + XT_PUTPAGE(mp); + return -EIO; + } BT_PUSH(btstack, bn, index); /* get the child page block number */ @@ -768,22 +515,22 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, } /* - * xtInsert() + * xtInsert() * * function: * * parameter: - * tid - transaction id; - * ip - file object; - * xflag - extent flag (XAD_NOTRECORDED): - * xoff - extent offset; - * xlen - extent length; - * xaddrp - extent address pointer (in/out): - * if (*xaddrp) - * caller allocated data extent at *xaddrp; - * else - * allocate data extent and return its xaddr; - * flag - + * tid - transaction id; + * ip - file object; + * xflag - extent flag (XAD_NOTRECORDED): + * xoff - extent offset; + * xlen - extent length; + * xaddrp - extent address pointer (in/out): + * if (*xaddrp) + * caller allocated data extent at *xaddrp; + * else + * allocate data extent and return its xaddr; + * flag - * * return: */ @@ -808,7 +555,7 @@ int xtInsert(tid_t tid, /* transaction id */ jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen); /* - * search for the entry location at which to insert: + * search for the entry location at which to insert: * * xtFastSearch() and xtSearch() both returns (leaf page * pinned, index at which to insert). @@ -839,22 +586,22 @@ int xtInsert(tid_t tid, /* transaction id */ hint = addressXAD(xad) + lengthXAD(xad) - 1; } else hint = 0; - if ((rc = DQUOT_ALLOC_BLOCK(ip, xlen))) + if ((rc = dquot_alloc_block(ip, xlen))) goto out; if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) { - DQUOT_FREE_BLOCK(ip, xlen); + dquot_free_block(ip, xlen); goto out; } } /* - * insert entry for new extent + * insert entry for new extent */ xflag |= XAD_NEW; /* - * if the leaf page is full, split the page and - * propagate up the router entry for the new page from split + * if the leaf page is full, split the page and + * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page. */ @@ -871,7 +618,7 @@ int xtInsert(tid_t tid, /* transaction id */ /* undo data extent allocation */ if (*xaddrp == 0) { dbFree(ip, xaddr, (s64) xlen); - DQUOT_FREE_BLOCK(ip, xlen); + dquot_free_block(ip, xlen); } return rc; } @@ -881,7 +628,7 @@ int xtInsert(tid_t tid, /* transaction id */ } /* - * insert the new entry into the leaf page + * insert the new entry into the leaf page */ /* * acquire a transaction lock on the leaf page; @@ -900,8 +647,7 @@ int xtInsert(tid_t tid, /* transaction id */ XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); /* advance next available entry index */ - p->header.nextindex = - cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); + le16_add_cpu(&p->header.nextindex, 1); /* Don't log it if there are no links to the file */ if (!test_cflag(COMMIT_Nolink, ip)) { @@ -925,16 +671,16 @@ int xtInsert(tid_t tid, /* transaction id */ /* - * xtSplitUp() + * xtSplitUp() * * function: - * split full pages as propagating insertion up the tree + * split full pages as propagating insertion up the tree * * parameter: - * tid - transaction id; - * ip - file object; - * split - entry parameter descriptor; - * btstack - traverse stack from xtSearch() + * tid - transaction id; + * ip - file object; + * split - entry parameter descriptor; + * btstack - traverse stack from xtSearch() * * return: */ @@ -992,8 +738,7 @@ xtSplitUp(tid_t tid, split->addr); /* advance next available entry index */ - sp->header.nextindex = - cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1); + le16_add_cpu(&sp->header.nextindex, 1); /* Don't log it if there are no links to the file */ if (!test_cflag(COMMIT_Nolink, ip)) { @@ -1162,9 +907,7 @@ xtSplitUp(tid_t tid, JFS_SBI(ip->i_sb)->nbperpage, rcbn); /* advance next available entry index. */ - sp->header.nextindex = - cpu_to_le16(le16_to_cpu(sp->header.nextindex) + - 1); + le16_add_cpu(&sp->header.nextindex, 1); /* Don't log it if there are no links to the file */ if (!test_cflag(COMMIT_Nolink, ip)) { @@ -1194,22 +937,22 @@ xtSplitUp(tid_t tid, /* - * xtSplitPage() + * xtSplitPage() * * function: - * split a full non-root page into - * original/split/left page and new right page - * i.e., the original/split page remains as left page. + * split a full non-root page into + * original/split/left page and new right page + * i.e., the original/split page remains as left page. * * parameter: - * int tid, - * struct inode *ip, - * struct xtsplit *split, - * struct metapage **rmpp, - * u64 *rbnp, + * int tid, + * struct inode *ip, + * struct xtsplit *split, + * struct metapage **rmpp, + * u64 *rbnp, * * return: - * Pointer to page in which to insert or NULL on error. + * Pointer to page in which to insert or NULL on error. */ static int xtSplitPage(tid_t tid, struct inode *ip, @@ -1243,10 +986,9 @@ xtSplitPage(tid_t tid, struct inode *ip, rbn = addressPXD(pxd); /* Allocate blocks to quota. */ - if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { - rc = -EDQUOT; - goto clean_up; - } + rc = dquot_alloc_block(ip, lengthPXD(pxd)); + if (rc) + goto clean_up; quota_allocation += lengthPXD(pxd); @@ -1299,7 +1041,7 @@ xtSplitPage(tid_t tid, struct inode *ip, skip = split->index; /* - * sequential append at tail (after last entry of last page) + * sequential append at tail (after last entry of last page) * * if splitting the last page on a level because of appending * a entry to it (skip is maxentry), it's likely that the access is @@ -1337,7 +1079,7 @@ xtSplitPage(tid_t tid, struct inode *ip, } /* - * non-sequential insert (at possibly middle page) + * non-sequential insert (at possibly middle page) */ /* @@ -1453,32 +1195,31 @@ xtSplitPage(tid_t tid, struct inode *ip, /* Rollback quota allocation. */ if (quota_allocation) - DQUOT_FREE_BLOCK(ip, quota_allocation); + dquot_free_block(ip, quota_allocation); return (rc); } /* - * xtSplitRoot() + * xtSplitRoot() * * function: - * split the full root page into - * original/root/split page and new right page - * i.e., root remains fixed in tree anchor (inode) and - * the root is copied to a single new right child page - * since root page << non-root page, and - * the split root page contains a single entry for the - * new right child page. + * split the full root page into original/root/split page and new + * right page + * i.e., root remains fixed in tree anchor (inode) and the root is + * copied to a single new right child page since root page << + * non-root page, and the split root page contains a single entry + * for the new right child page. * * parameter: - * int tid, - * struct inode *ip, - * struct xtsplit *split, - * struct metapage **rmpp) + * int tid, + * struct inode *ip, + * struct xtsplit *split, + * struct metapage **rmpp) * * return: - * Pointer to page in which to insert or NULL on error. + * Pointer to page in which to insert or NULL on error. */ static int xtSplitRoot(tid_t tid, @@ -1494,13 +1235,14 @@ xtSplitRoot(tid_t tid, struct pxdlist *pxdlist; struct tlock *tlck; struct xtlock *xtlck; + int rc; sp = &JFS_IP(ip)->i_xtroot; INCREMENT(xtStat.split); /* - * allocate a single (right) child page + * allocate a single (right) child page */ pxdlist = split->pxdlist; pxd = &pxdlist->pxd[pxdlist->npxd]; @@ -1511,9 +1253,10 @@ xtSplitRoot(tid_t tid, return -EIO; /* Allocate blocks to quota. */ - if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) { + rc = dquot_alloc_block(ip, lengthPXD(pxd)); + if (rc) { release_metapage(rmp); - return -EDQUOT; + return rc; } jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp); @@ -1568,7 +1311,7 @@ xtSplitRoot(tid_t tid, } /* - * reset the root + * reset the root * * init root with the single entry for the new right page * set the 1st entry offset to 0, which force the left-most key @@ -1605,7 +1348,7 @@ xtSplitRoot(tid_t tid, /* - * xtExtend() + * xtExtend() * * function: extend in-place; * @@ -1643,7 +1386,7 @@ int xtExtend(tid_t tid, /* transaction id */ if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); + jfs_error(ip->i_sb, "xtSearch did not find extent\n"); return -EIO; } @@ -1651,7 +1394,7 @@ int xtExtend(tid_t tid, /* transaction id */ xad = &p->xad[index]; if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); + jfs_error(ip->i_sb, "extension is not contiguous\n"); return -EIO; } @@ -1672,7 +1415,7 @@ int xtExtend(tid_t tid, /* transaction id */ goto extendOld; /* - * extent overflow: insert entry for new extent + * extent overflow: insert entry for new extent */ //insertNew: xoff = offsetXAD(xad) + MAXXLEN; @@ -1680,8 +1423,8 @@ int xtExtend(tid_t tid, /* transaction id */ nextindex = le16_to_cpu(p->header.nextindex); /* - * if the leaf page is full, insert the new entry and - * propagate up the router entry for the new page from split + * if the leaf page is full, insert the new entry and + * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page. */ @@ -1726,7 +1469,7 @@ int xtExtend(tid_t tid, /* transaction id */ } } /* - * insert the new entry into the leaf page + * insert the new entry into the leaf page */ else { /* insert the new entry: mark the entry NEW */ @@ -1734,8 +1477,7 @@ int xtExtend(tid_t tid, /* transaction id */ XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr); /* advance next available entry index */ - p->header.nextindex = - cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); + le16_add_cpu(&p->header.nextindex, 1); } /* get back old entry */ @@ -1766,11 +1508,11 @@ int xtExtend(tid_t tid, /* transaction id */ #ifdef _NOTYET /* - * xtTailgate() + * xtTailgate() * * function: split existing 'tail' extent - * (split offset >= start offset of tail extent), and - * relocate and extend the split tail half; + * (split offset >= start offset of tail extent), and + * relocate and extend the split tail half; * * note: existing extent may or may not have been committed. * caller is responsible for pager buffer cache update, and @@ -1799,7 +1541,7 @@ int xtTailgate(tid_t tid, /* transaction id */ /* printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", - (ulong)xoff, xlen, (ulong)xaddr); + (ulong)xoff, xlen, (ulong)xaddr); */ /* there must exist extent to be tailgated */ @@ -1811,7 +1553,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); + jfs_error(ip->i_sb, "couldn't find extent\n"); return -EIO; } @@ -1819,8 +1561,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", nextindex = le16_to_cpu(p->header.nextindex); if (index != nextindex - 1) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, - "xtTailgate: the entry found is not the last entry"); + jfs_error(ip->i_sb, "the entry found is not the last entry\n"); return -EIO; } @@ -1837,18 +1578,18 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", xad = &p->xad[index]; /* printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", - (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); + (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad)); */ if ((llen = xoff - offsetXAD(xad)) == 0) goto updateOld; /* - * partially replace extent: insert entry for new extent + * partially replace extent: insert entry for new extent */ //insertNew: /* - * if the leaf page is full, insert the new entry and - * propagate up the router entry for the new page from split + * if the leaf page is full, insert the new entry and + * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page. */ @@ -1893,7 +1634,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", } } /* - * insert the new entry into the leaf page + * insert the new entry into the leaf page */ else { /* insert the new entry: mark the entry NEW */ @@ -1901,8 +1642,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr); /* advance next available entry index */ - p->header.nextindex = - cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); + le16_add_cpu(&p->header.nextindex, 1); } /* get back old XAD */ @@ -1950,17 +1690,17 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n", #endif /* _NOTYET */ /* - * xtUpdate() + * xtUpdate() * * function: update XAD; * - * update extent for allocated_but_not_recorded or - * compressed extent; + * update extent for allocated_but_not_recorded or + * compressed extent; * * parameter: - * nxad - new XAD; - * logical extent of the specified XAD must be completely - * contained by an existing XAD; + * nxad - new XAD; + * logical extent of the specified XAD must be completely + * contained by an existing XAD; */ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) { /* new XAD */ @@ -1994,7 +1734,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); + jfs_error(ip->i_sb, "Could not find extent\n"); return -EIO; } @@ -2018,7 +1758,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) (nxoff + nxlen > xoff + xlen)) { XT_PUTPAGE(mp); jfs_error(ip->i_sb, - "xtUpdate: nXAD in not completely contained within XAD"); + "nXAD in not completely contained within XAD\n"); return -EIO; } @@ -2167,7 +1907,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) if (xoff >= nxoff) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); + jfs_error(ip->i_sb, "xoff >= nxoff\n"); return -EIO; } /* #endif _JFS_WIP_COALESCE */ @@ -2308,14 +2048,13 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); + jfs_error(ip->i_sb, "xtSearch failed\n"); return -EIO; } if (index0 != index) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, - "xtUpdate: unexpected value of index"); + jfs_error(ip->i_sb, "unexpected value of index\n"); return -EIO; } } @@ -2411,24 +2150,24 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p); /* - * xtAppend() + * xtAppend() * * function: grow in append mode from contiguous region specified ; * * parameter: - * tid - transaction id; - * ip - file object; - * xflag - extent flag: - * xoff - extent offset; - * maxblocks - max extent length; - * xlen - extent length (in/out); - * xaddrp - extent address pointer (in/out): - * flag - + * tid - transaction id; + * ip - file object; + * xflag - extent flag: + * xoff - extent offset; + * maxblocks - max extent length; + * xlen - extent length (in/out); + * xaddrp - extent address pointer (in/out): + * flag - * * return: */ int xtAppend(tid_t tid, /* transaction id */ - struct inode *ip, int xflag, s64 xoff, s32 maxblocks, + struct inode *ip, int xflag, s64 xoff, s32 maxblocks, s32 * xlenp, /* (in/out) */ s64 * xaddrp, /* (in/out) */ int flag) @@ -2455,7 +2194,7 @@ int xtAppend(tid_t tid, /* transaction id */ (ulong) xoff, maxblocks, xlen, (ulong) xaddr); /* - * search for the entry location at which to insert: + * search for the entry location at which to insert: * * xtFastSearch() and xtSearch() both returns (leaf page * pinned, index at which to insert). @@ -2477,13 +2216,13 @@ int xtAppend(tid_t tid, /* transaction id */ xlen = min(xlen, (int)(next - xoff)); //insert: /* - * insert entry for new extent + * insert entry for new extent */ xflag |= XAD_NEW; /* - * if the leaf page is full, split the page and - * propagate up the router entry for the new page from split + * if the leaf page is full, split the page and + * propagate up the router entry for the new page from split * * The xtSplitUp() will insert the entry and unpin the leaf page. */ @@ -2499,7 +2238,7 @@ int xtAppend(tid_t tid, /* transaction id */ pxdlist.maxnpxd = pxdlist.npxd = 0; pxd = &pxdlist.pxd[0]; nblocks = JFS_SBI(ip->i_sb)->nbperpage; - for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { + for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) { PXDaddress(pxd, xaddr); PXDlength(pxd, nblocks); @@ -2514,7 +2253,7 @@ int xtAppend(tid_t tid, /* transaction id */ goto out; } - xlen = min(xlen, maxblocks); + xlen = min(xlen, maxblocks); /* * allocate data extent requested @@ -2540,7 +2279,7 @@ int xtAppend(tid_t tid, /* transaction id */ return 0; /* - * insert the new entry into the leaf page + * insert the new entry into the leaf page */ insertLeaf: /* @@ -2563,8 +2302,7 @@ int xtAppend(tid_t tid, /* transaction id */ XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr); /* advance next available entry index */ - p->header.nextindex = - cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1); + le16_add_cpu(&p->header.nextindex, 1); xtlck->lwm.offset = (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index; @@ -2584,17 +2322,17 @@ int xtAppend(tid_t tid, /* transaction id */ /* - TBD for defragmentaion/reorganization - * - * xtDelete() + * xtDelete() * * function: - * delete the entry with the specified key. + * delete the entry with the specified key. * - * N.B.: whole extent of the entry is assumed to be deleted. + * N.B.: whole extent of the entry is assumed to be deleted. * * parameter: * * return: - * ENOENT: if the entry is not found. + * ENOENT: if the entry is not found. * * exception: */ @@ -2627,8 +2365,7 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) * delete the entry from the leaf page */ nextindex = le16_to_cpu(p->header.nextindex); - p->header.nextindex = - cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1); + le16_add_cpu(&p->header.nextindex, -1); /* * if the leaf page bocome empty, free the page @@ -2660,10 +2397,10 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag) /* - TBD for defragmentaion/reorganization - * - * xtDeleteUp() + * xtDeleteUp() * * function: - * free empty pages as propagating deletion up the tree + * free empty pages as propagating deletion up the tree * * parameter: * @@ -2791,9 +2528,7 @@ xtDeleteUp(tid_t tid, struct inode *ip, (nextindex - index - 1) << L2XTSLOTSIZE); - p->header.nextindex = - cpu_to_le16(le16_to_cpu(p->header.nextindex) - - 1); + le16_add_cpu(&p->header.nextindex, -1); jfs_info("xtDeleteUp(entry): 0x%lx[%d]", (ulong) parent->bn, index); } @@ -2810,15 +2545,15 @@ xtDeleteUp(tid_t tid, struct inode *ip, /* - * NAME: xtRelocate() + * NAME: xtRelocate() * - * FUNCTION: relocate xtpage or data extent of regular file; - * This function is mainly used by defragfs utility. + * FUNCTION: relocate xtpage or data extent of regular file; + * This function is mainly used by defragfs utility. * - * NOTE: This routine does not have the logic to handle - * uncommitted allocated extent. The caller should call - * txCommit() to commit all the allocation before call - * this routine. + * NOTE: This routine does not have the logic to handle + * uncommitted allocated extent. The caller should call + * txCommit() to commit all the allocation before call + * this routine. */ int xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ @@ -2860,8 +2595,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr); /* - * 1. get and validate the parent xtpage/xad entry - * covering the source extent to be relocated; + * 1. get and validate the parent xtpage/xad entry + * covering the source extent to be relocated; */ if (xtype == DATAEXT) { /* search in leaf entry */ @@ -2905,7 +2640,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ jfs_info("xtRelocate: parent xad entry validated."); /* - * 2. relocate the extent + * 2. relocate the extent */ if (xtype == DATAEXT) { /* if the extent is allocated-but-not-recorded @@ -2918,7 +2653,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ XT_PUTPAGE(pmp); /* - * cmRelocate() + * cmRelocate() * * copy target data pages to be relocated; * @@ -2940,8 +2675,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ pno = offset >> CM_L2BSIZE; npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE; /* - npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - - (offset >> CM_L2BSIZE) + 1; + npages = ((offset + nbytes - 1) >> CM_L2BSIZE) - + (offset >> CM_L2BSIZE) + 1; */ sxaddr = oxaddr; dxaddr = nxaddr; @@ -2964,7 +2699,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ cmSetXD(ip, cp, pno, dxaddr, nblks); /* release the cbuf, mark it as modified */ - cmPut(cp, TRUE); + cmPut(cp, true); dxaddr += nblks; sxaddr += nblks; @@ -2976,7 +2711,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index); jfs_info("xtRelocate: target data extent relocated."); - } else { /* (xtype == XTPAGE) */ + } else { /* (xtype == XTPAGE) */ /* * read in the target xtpage from the source extent; @@ -3021,16 +2756,14 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ */ if (lmp) { BT_MARK_DIRTY(lmp, ip); - tlck = - txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); + tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK); lp->header.next = cpu_to_le64(nxaddr); XT_PUTPAGE(lmp); } if (rmp) { BT_MARK_DIRTY(rmp, ip); - tlck = - txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); + tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK); rp->header.prev = cpu_to_le64(nxaddr); XT_PUTPAGE(rmp); } @@ -3057,7 +2790,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ * scan may be skipped by commit() and logredo(); */ BT_MARK_DIRTY(mp, ip); - /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ + /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */ tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW); xtlck = (struct xtlock *) & tlck->lock; @@ -3079,7 +2812,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ } /* - * 3. acquire maplock for the source extent to be freed; + * 3. acquire maplock for the source extent to be freed; * * acquire a maplock saving the src relocated extent address; * to free of the extent at commit time; @@ -3100,7 +2833,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ * is no buffer associated with this lock since the buffer * has been redirected to the target location. */ - else /* (xtype == XTPAGE) */ + else /* (xtype == XTPAGE) */ tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE); pxdlock = (struct pxd_lock *) & tlck->lock; @@ -3110,7 +2843,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ pxdlock->index = 1; /* - * 4. update the parent xad entry for relocation; + * 4. update the parent xad entry for relocation; * * acquire tlck for the parent entry with XAD_NEW as entry * update which will write LOG_REDOPAGE and update bmap for @@ -3138,22 +2871,22 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad, /* old XAD */ /* - * xtSearchNode() + * xtSearchNode() * - * function: search for the internal xad entry covering specified extent. - * This function is mainly used by defragfs utility. + * function: search for the internal xad entry covering specified extent. + * This function is mainly used by defragfs utility. * * parameters: - * ip - file object; - * xad - extent to find; - * cmpp - comparison result: - * btstack - traverse stack; - * flag - search process flag; + * ip - file object; + * xad - extent to find; + * cmpp - comparison result: + * btstack - traverse stack; + * flag - search process flag; * * returns: - * btstack contains (bn, index) of search path traversed to the entry. - * *cmpp is set to result of comparison with the entry returned. - * the page containing the entry is pinned at exit. + * btstack contains (bn, index) of search path traversed to the entry. + * *cmpp is set to result of comparison with the entry returned. + * the page containing the entry is pinned at exit. */ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ int *cmpp, struct btstack * btstack, int flag) @@ -3176,7 +2909,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ xaddr = addressXAD(xad); /* - * search down tree from root: + * search down tree from root: * * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of * internal page, child page Pi contains entry with k, Ki <= K < Kj. @@ -3212,7 +2945,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ XT_CMP(cmp, xoff, &p->xad[index], t64); if (cmp == 0) { /* - * search hit + * search hit * * verify for exact match; */ @@ -3240,7 +2973,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ } /* - * search miss - non-leaf page: + * search miss - non-leaf page: * * base is the smallest index with key (Kj) greater than * search key (K) and may be zero or maxentry index. @@ -3263,15 +2996,15 @@ static int xtSearchNode(struct inode *ip, xad_t * xad, /* required XAD entry */ /* - * xtRelink() + * xtRelink() * * function: - * link around a freed page. + * link around a freed page. * * Parameter: - * int tid, - * struct inode *ip, - * xtpage_t *p) + * int tid, + * struct inode *ip, + * xtpage_t *p) * * returns: */ @@ -3333,7 +3066,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p) /* - * xtInitRoot() + * xtInitRoot() * * initialize file root (inline in inode) */ @@ -3380,42 +3113,42 @@ void xtInitRoot(tid_t tid, struct inode *ip) #define MAX_TRUNCATE_LEAVES 50 /* - * xtTruncate() + * xtTruncate() * * function: - * traverse for truncation logging backward bottom up; - * terminate at the last extent entry at the current subtree - * root page covering new down size. - * truncation may occur within the last extent entry. + * traverse for truncation logging backward bottom up; + * terminate at the last extent entry at the current subtree + * root page covering new down size. + * truncation may occur within the last extent entry. * * parameter: - * int tid, - * struct inode *ip, - * s64 newsize, - * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} + * int tid, + * struct inode *ip, + * s64 newsize, + * int type) {PWMAP, PMAP, WMAP; DELETE, TRUNCATE} * * return: * * note: - * PWMAP: - * 1. truncate (non-COMMIT_NOLINK file) - * by jfs_truncate() or jfs_open(O_TRUNC): - * xtree is updated; + * PWMAP: + * 1. truncate (non-COMMIT_NOLINK file) + * by jfs_truncate() or jfs_open(O_TRUNC): + * xtree is updated; * 2. truncate index table of directory when last entry removed - * map update via tlock at commit time; - * PMAP: + * map update via tlock at commit time; + * PMAP: * Call xtTruncate_pmap instead - * WMAP: - * 1. remove (free zero link count) on last reference release - * (pmap has been freed at commit zero link count); - * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): - * xtree is updated; - * map update directly at truncation time; - * - * if (DELETE) - * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); - * else if (TRUNCATE) - * must write LOG_NOREDOPAGE for deleted index page; + * WMAP: + * 1. remove (free zero link count) on last reference release + * (pmap has been freed at commit zero link count); + * 2. truncate (COMMIT_NOLINK file, i.e., tmp file): + * xtree is updated; + * map update directly at truncation time; + * + * if (DELETE) + * no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient); + * else if (TRUNCATE) + * must write LOG_NOREDOPAGE for deleted index page; * * pages may already have been tlocked by anonymous transactions * during file growth (i.e., write) before truncation; @@ -3488,7 +3221,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) * retained in the new sized file. * if type is PMAP, the data and index pages are NOT * freed, and the data and index blocks are NOT freed - * from working map. + * from working map. * (this will allow continued access of data/index of * temporary file (zerolink count file truncated to zero-length)). */ @@ -3537,7 +3270,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) goto getChild; /* - * leaf page + * leaf page */ freed = 0; @@ -3911,10 +3644,15 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) } /* - * internal page: go down to child page of current entry + * internal page: go down to child page of current entry */ getChild: /* save current parent entry for the child page */ + if (BT_STACK_FULL(&btstack)) { + jfs_error(ip->i_sb, "stack overrun!\n"); + XT_PUTPAGE(mp); + return -EIO; + } BT_PUSH(&btstack, bn, index); /* get child page */ @@ -3942,7 +3680,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) ip->i_size = newsize; /* update quota allocation to reflect freed blocks */ - DQUOT_FREE_BLOCK(ip, nfreed); + dquot_free_block(ip, nfreed); /* * free tlock of invalidated pages @@ -3955,18 +3693,18 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) /* - * xtTruncate_pmap() + * xtTruncate_pmap() * * function: - * Perform truncate to zero lenghth for deleted file, leaving the + * Perform truncate to zero length for deleted file, leaving the * the xtree and working map untouched. This allows the file to * be accessed via open file handles, while the delete of the file * is committed to disk. * * parameter: - * tid_t tid, - * struct inode *ip, - * s64 committed_size) + * tid_t tid, + * struct inode *ip, + * s64 committed_size) * * return: new committed size * @@ -4012,8 +3750,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, - "xtTruncate_pmap: did not find extent"); + jfs_error(ip->i_sb, "did not find extent\n"); return -EIO; } } else { @@ -4040,7 +3777,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) } /* - * leaf page + * leaf page */ if (++locked_leaves > MAX_TRUNCATE_LEAVES) { @@ -4052,7 +3789,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) xoff = offsetXAD(xad); xlen = lengthXAD(xad); XT_PUTPAGE(mp); - return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; + return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize; } tlck = txLock(tid, ip, mp, tlckXTREE); tlck->type = tlckXTREE | tlckFREE; @@ -4089,8 +3826,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) */ tlck = txLock(tid, ip, mp, tlckXTREE); xtlck = (struct xtlock *) & tlck->lock; - xtlck->hwm.offset = - le16_to_cpu(p->header.nextindex) - 1; + xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1; tlck->type = tlckXTREE | tlckFREE; XT_PUTPAGE(mp); @@ -4108,10 +3844,15 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) else index--; /* - * internal page: go down to child page of current entry + * internal page: go down to child page of current entry */ getChild: /* save current parent entry for the child page */ + if (BT_STACK_FULL(&btstack)) { + jfs_error(ip->i_sb, "stack overrun!\n"); + XT_PUTPAGE(mp); + return -EIO; + } BT_PUSH(&btstack, bn, index); /* get child page */ @@ -4133,13 +3874,9 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) } #ifdef CONFIG_JFS_STATISTICS -int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, - int *eof, void *data) +static int jfs_xtstat_proc_show(struct seq_file *m, void *v) { - int len = 0; - off_t begin; - - len += sprintf(buffer, + seq_printf(m, "JFS Xtree statistics\n" "====================\n" "searches = %d\n" @@ -4148,19 +3885,19 @@ int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length, xtStat.search, xtStat.fastSearch, xtStat.split); + return 0; +} - begin = offset; - *start = buffer + begin; - len -= begin; - - if (len > length) - len = length; - else - *eof = 1; - - if (len < 0) - len = 0; - - return len; +static int jfs_xtstat_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, jfs_xtstat_proc_show, NULL); } + +const struct file_operations jfs_xtstat_proc_fops = { + .owner = THIS_MODULE, + .open = jfs_xtstat_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h index af668a80b40..08c0c749b98 100644 --- a/fs/jfs/jfs_xtree.h +++ b/fs/jfs/jfs_xtree.h @@ -1,32 +1,32 @@ /* - * Copyright (c) International Business Machines Corp., 2000-2002 + * Copyright (C) International Business Machines Corp., 2000-2002 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef _H_JFS_XTREE #define _H_JFS_XTREE /* - * jfs_xtree.h: extent allocation descriptor B+-tree manager + * jfs_xtree.h: extent allocation descriptor B+-tree manager */ #include "jfs_btree.h" /* - * extent allocation descriptor (xad) + * extent allocation descriptor (xad) */ typedef struct xad { unsigned flag:8; /* 1: flag */ @@ -38,30 +38,30 @@ typedef struct xad { __le32 addr2; /* 4: address in unit of fsblksize */ } xad_t; /* (16) */ -#define MAXXLEN ((1 << 24) - 1) +#define MAXXLEN ((1 << 24) - 1) -#define XTSLOTSIZE 16 -#define L2XTSLOTSIZE 4 +#define XTSLOTSIZE 16 +#define L2XTSLOTSIZE 4 /* xad_t field construction */ #define XADoffset(xad, offset64)\ {\ - (xad)->off1 = ((u64)offset64) >> 32;\ - (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\ + (xad)->off1 = ((u64)offset64) >> 32;\ + (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\ } #define XADaddress(xad, address64)\ {\ - (xad)->addr1 = ((u64)address64) >> 32;\ - (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ + (xad)->addr1 = ((u64)address64) >> 32;\ + (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\ } -#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32) +#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32) /* xad_t field extraction */ #define offsetXAD(xad)\ - ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2)) + ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2)) #define addressXAD(xad)\ - ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2)) -#define lengthXAD(xad) __le24_to_cpu((xad)->len) + ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2)) +#define lengthXAD(xad) __le24_to_cpu((xad)->len) /* xad list */ struct xadlist { @@ -71,22 +71,22 @@ struct xadlist { }; /* xad_t flags */ -#define XAD_NEW 0x01 /* new */ -#define XAD_EXTENDED 0x02 /* extended */ -#define XAD_COMPRESSED 0x04 /* compressed with recorded length */ +#define XAD_NEW 0x01 /* new */ +#define XAD_EXTENDED 0x02 /* extended */ +#define XAD_COMPRESSED 0x04 /* compressed with recorded length */ #define XAD_NOTRECORDED 0x08 /* allocated but not recorded */ -#define XAD_COW 0x10 /* copy-on-write */ +#define XAD_COW 0x10 /* copy-on-write */ /* possible values for maxentry */ -#define XTROOTINITSLOT_DIR 6 -#define XTROOTINITSLOT 10 -#define XTROOTMAXSLOT 18 -#define XTPAGEMAXSLOT 256 -#define XTENTRYSTART 2 +#define XTROOTINITSLOT_DIR 6 +#define XTROOTINITSLOT 10 +#define XTROOTMAXSLOT 18 +#define XTPAGEMAXSLOT 256 +#define XTENTRYSTART 2 /* - * xtree page: + * xtree page: */ typedef union { struct xtheader { @@ -106,12 +106,10 @@ typedef union { } xtpage_t; /* - * external declaration + * external declaration */ extern int xtLookup(struct inode *ip, s64 lstart, s64 llen, int *pflag, s64 * paddr, int *plen, int flag); -extern int xtLookupList(struct inode *ip, struct lxdlist * lxdlist, - struct xadlist * xadlist, int flag); extern void xtInitRoot(tid_t tid, struct inode *ip); extern int xtInsert(tid_t tid, struct inode *ip, int xflag, s64 xoff, int xlen, s64 * xaddrp, int flag); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 1abe7343f92..d59c7defb1e 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -4,22 +4,24 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> +#include <linux/namei.h> #include <linux/ctype.h> #include <linux/quotaops.h> +#include <linux/exportfs.h> #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_inode.h" @@ -34,14 +36,14 @@ /* * forward references */ -struct dentry_operations jfs_ci_dentry_operations; +const struct dentry_operations jfs_ci_dentry_operations; static s64 commitZeroLink(tid_t, struct inode *); /* * NAME: free_ea_wmap(inode) * - * FUNCTION: free uncommitted extended attributes from working map + * FUNCTION: free uncommitted extended attributes from working map * */ static inline void free_ea_wmap(struct inode *inode) @@ -62,7 +64,7 @@ static inline void free_ea_wmap(struct inode *inode) * FUNCTION: create a regular file in the parent directory <dip> * with name = <from dentry> and mode = <mode> * - * PARAMETER: dip - parent directory vnode + * PARAMETER: dip - parent directory vnode * dentry - dentry of new file * mode - create mode (rwxrwxrwx). * nd- nd struct @@ -70,8 +72,8 @@ static inline void free_ea_wmap(struct inode *inode) * RETURN: Errors from subroutines * */ -static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, - struct nameidata *nd) +static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, + bool excl) { int rc = 0; tid_t tid; /* transaction id */ @@ -84,6 +86,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, jfs_info("jfs_create: dip:0x%p name:%s", dip, dentry->d_name.name); + dquot_initialize(dip); + /* * search parent directory for entry/freespace * (dtSearch() returns parent directory page pinned) @@ -97,21 +101,21 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, * begin the transaction before we search the directory. */ ip = ialloc(dip, mode); - if (ip == NULL) { - rc = -ENOSPC; + if (IS_ERR(ip)) { + rc = PTR_ERR(ip); goto out2; } tid = txBegin(dip->i_sb, 0); - down(&JFS_IP(dip)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); + mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); rc = jfs_init_acl(tid, ip, dip); if (rc) goto out3; - rc = jfs_init_security(tid, ip, dip); + rc = jfs_init_security(tid, ip, dip, &dentry->d_name); if (rc) { txAbort(tid, 0); goto out3; @@ -154,7 +158,6 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, ip->i_fop = &jfs_file_operations; ip->i_mapping->a_ops = &jfs_aops; - insert_inode_hash(ip); mark_inode_dirty(ip); dip->i_ctime = dip->i_mtime = CURRENT_TIME; @@ -165,14 +168,17 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, out3: txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); + mutex_unlock(&JFS_IP(dip)->commit_mutex); if (rc) { free_ea_wmap(ip); - ip->i_nlink = 0; + clear_nlink(ip); + unlock_new_inode(ip); iput(ip); - } else + } else { + unlock_new_inode(ip); d_instantiate(dentry, ip); + } out2: free_UCSname(&dname); @@ -190,7 +196,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, * FUNCTION: create a child directory in the parent directory <dip> * with name = <from dentry> and mode = <mode> * - * PARAMETER: dip - parent directory vnode + * PARAMETER: dip - parent directory vnode * dentry - dentry of child directory * mode - create mode (rwxrwxrwx). * @@ -199,7 +205,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, * note: * EACCESS: user needs search+write permission on the parent directory */ -static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) +static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) { int rc = 0; tid_t tid; /* transaction id */ @@ -212,11 +218,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) jfs_info("jfs_mkdir: dip:0x%p name:%s", dip, dentry->d_name.name); - /* link count overflow on parent directory ? */ - if (dip->i_nlink == JFS_LINK_MAX) { - rc = -EMLINK; - goto out1; - } + dquot_initialize(dip); /* * search parent directory for entry/freespace @@ -231,21 +233,21 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) * begin the transaction before we search the directory. */ ip = ialloc(dip, S_IFDIR | mode); - if (ip == NULL) { - rc = -ENOSPC; + if (IS_ERR(ip)) { + rc = PTR_ERR(ip); goto out2; } tid = txBegin(dip->i_sb, 0); - down(&JFS_IP(dip)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); + mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); rc = jfs_init_acl(tid, ip, dip); if (rc) goto out3; - rc = jfs_init_security(tid, ip, dip); + rc = jfs_init_security(tid, ip, dip, &dentry->d_name); if (rc) { txAbort(tid, 0); goto out3; @@ -284,15 +286,14 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) goto out3; } - ip->i_nlink = 2; /* for '.' */ + set_nlink(ip, 2); /* for '.' */ ip->i_op = &jfs_dir_inode_operations; ip->i_fop = &jfs_dir_operations; - insert_inode_hash(ip); mark_inode_dirty(ip); /* update parent directory inode */ - dip->i_nlink++; /* for '..' from child directory */ + inc_nlink(dip); /* for '..' from child directory */ dip->i_ctime = dip->i_mtime = CURRENT_TIME; mark_inode_dirty(dip); @@ -300,14 +301,17 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) out3: txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); + mutex_unlock(&JFS_IP(dip)->commit_mutex); if (rc) { free_ea_wmap(ip); - ip->i_nlink = 0; + clear_nlink(ip); + unlock_new_inode(ip); iput(ip); - } else + } else { + unlock_new_inode(ip); d_instantiate(dentry, ip); + } out2: free_UCSname(&dname); @@ -324,18 +328,18 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) * * FUNCTION: remove a link to child directory * - * PARAMETER: dip - parent inode + * PARAMETER: dip - parent inode * dentry - child directory dentry * * RETURN: -EINVAL - if name is . or .. - * -EINVAL - if . or .. exist but are invalid. + * -EINVAL - if . or .. exist but are invalid. * errors from subroutines * * note: - * if other threads have the directory open when the last link - * is removed, the "." and ".." entries, if present, are removed before - * rmdir() returns and no new entries may be created in the directory, - * but the directory is not removed until the last reference to + * if other threads have the directory open when the last link + * is removed, the "." and ".." entries, if present, are removed before + * rmdir() returns and no new entries may be created in the directory, + * but the directory is not removed until the last reference to * the directory is released (cf.unlink() of regular file). */ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) @@ -351,7 +355,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); /* Init inode for quota operations. */ - DQUOT_INIT(ip); + dquot_initialize(dip); + dquot_initialize(ip); /* directory must be empty to be removed */ if (!dtEmpty(ip)) { @@ -365,8 +370,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) tid = txBegin(dip->i_sb, 0); - down(&JFS_IP(dip)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); + mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); iplist[0] = dip; iplist[1] = ip; @@ -384,8 +389,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) if (rc == -EIO) txAbort(tid, 1); txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); + mutex_unlock(&JFS_IP(dip)->commit_mutex); goto out2; } @@ -393,9 +398,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) /* update parent directory's link count corresponding * to ".." entry of the target directory deleted */ - dip->i_nlink--; dip->i_ctime = dip->i_mtime = CURRENT_TIME; - mark_inode_dirty(dip); + inode_dec_link_count(dip); /* * OS/2 could have created EA and/or ACL @@ -415,15 +419,15 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) JFS_IP(ip)->acl.flag = 0; /* mark the target directory as deleted */ - ip->i_nlink = 0; + clear_nlink(ip); mark_inode_dirty(ip); rc = txCommit(tid, 2, &iplist[0], 0); txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); + mutex_unlock(&JFS_IP(dip)->commit_mutex); /* * Truncating the directory index table is not guaranteed. It @@ -447,11 +451,11 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) /* * NAME: jfs_unlink(dip, dentry) * - * FUNCTION: remove a link to object <vp> named by <name> + * FUNCTION: remove a link to object <vp> named by <name> * from parent directory <dvp> * - * PARAMETER: dip - inode of parent directory - * dentry - dentry of object to be removed + * PARAMETER: dip - inode of parent directory + * dentry - dentry of object to be removed * * RETURN: errors from subroutines * @@ -479,17 +483,18 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) jfs_info("jfs_unlink: dip:0x%p name:%s", dip, dentry->d_name.name); /* Init inode for quota operations. */ - DQUOT_INIT(ip); + dquot_initialize(dip); + dquot_initialize(ip); if ((rc = get_UCSname(&dname, dentry))) goto out; - IWRITE_LOCK(ip); + IWRITE_LOCK(ip, RDWRLOCK_NORMAL); tid = txBegin(dip->i_sb, 0); - down(&JFS_IP(dip)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); + mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); iplist[0] = dip; iplist[1] = ip; @@ -503,8 +508,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) if (rc == -EIO) txAbort(tid, 1); /* Marks FS Dirty */ txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); + mutex_unlock(&JFS_IP(dip)->commit_mutex); IWRITE_UNLOCK(ip); goto out1; } @@ -515,11 +520,10 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) mark_inode_dirty(dip); /* update target's inode */ - ip->i_nlink--; - mark_inode_dirty(ip); + inode_dec_link_count(ip); /* - * commit zero link count object + * commit zero link count object */ if (ip->i_nlink == 0) { assert(!test_cflag(COMMIT_Nolink, ip)); @@ -527,8 +531,8 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) if ((new_size = commitZeroLink(tid, ip)) < 0) { txAbort(tid, 1); /* Marks FS Dirty */ txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); + mutex_unlock(&JFS_IP(dip)->commit_mutex); IWRITE_UNLOCK(ip); rc = new_size; goto out1; @@ -556,13 +560,12 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); - + mutex_unlock(&JFS_IP(ip)->commit_mutex); + mutex_unlock(&JFS_IP(dip)->commit_mutex); while (new_size && (rc == 0)) { tid = txBegin(dip->i_sb, 0); - down(&JFS_IP(ip)->commit_sem); + mutex_lock(&JFS_IP(ip)->commit_mutex); new_size = xtTruncate_pmap(tid, ip, new_size); if (new_size < 0) { txAbort(tid, 1); /* Marks FS Dirty */ @@ -570,7 +573,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) } else rc = txCommit(tid, 2, &iplist[0], COMMIT_SYNC); txEnd(tid); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); } if (ip->i_nlink == 0) @@ -599,9 +602,9 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) /* * NAME: commitZeroLink() * - * FUNCTION: for non-directory, called by jfs_remove(), + * FUNCTION: for non-directory, called by jfs_remove(), * truncate a regular file, directory or symbolic - * link to zero length. return 0 if type is not + * link to zero length. return 0 if type is not * one of these. * * if the file is currently associated with a VM segment @@ -611,7 +614,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry) * map by ctrunc1. * if there is no VM segment on entry, the resources are * freed in both work and permanent map. - * (? for temporary file - memory object is cached even + * (? for temporary file - memory object is cached even * after no reference: * reference count > 0 - ) * @@ -665,7 +668,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip) /* * free xtree/data (truncate to zero length): - * free xtree/data pages from cache if COMMIT_PWMAP, + * free xtree/data pages from cache if COMMIT_PWMAP, * free xtree/data blocks from persistent block map, and * free xtree/data blocks from working block map if COMMIT_PWMAP; */ @@ -679,8 +682,8 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip) /* * NAME: jfs_free_zero_link() * - * FUNCTION: for non-directory, called by iClose(), - * free resources of a file from cache and WORKING map + * FUNCTION: for non-directory, called by iClose(), + * free resources of a file from cache and WORKING map * for a file previously committed with zero link count * while associated with a pager object, * @@ -765,7 +768,7 @@ void jfs_free_zero_link(struct inode *ip) * FUNCTION: create a link to <vp> by the name = <name> * in the parent directory <dvp> * - * PARAMETER: vp - target object + * PARAMETER: vp - target object * dvp - parent directory of new link * name - name of new link to target object * crp - credential @@ -797,16 +800,12 @@ static int jfs_link(struct dentry *old_dentry, jfs_info("jfs_link: %s %s", old_dentry->d_name.name, dentry->d_name.name); - if (ip->i_nlink == JFS_LINK_MAX) - return -EMLINK; - - if (ip->i_nlink == 0) - return -ENOENT; + dquot_initialize(dir); tid = txBegin(ip->i_sb, 0); - down(&JFS_IP(dir)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT); + mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); /* * scan parent directory for entry/freespace @@ -825,17 +824,18 @@ static int jfs_link(struct dentry *old_dentry, goto free_dname; /* update object inode */ - ip->i_nlink++; /* for new link */ + inc_nlink(ip); /* for new link */ ip->i_ctime = CURRENT_TIME; + dir->i_ctime = dir->i_mtime = CURRENT_TIME; mark_inode_dirty(dir); - atomic_inc(&ip->i_count); + ihold(ip); iplist[0] = ip; iplist[1] = dir; rc = txCommit(tid, 2, &iplist[0], 0); if (rc) { - ip->i_nlink--; + drop_nlink(ip); /* never instantiated */ iput(ip); } else d_instantiate(dentry, ip); @@ -846,8 +846,8 @@ static int jfs_link(struct dentry *old_dentry, out: txEnd(tid); - up(&JFS_IP(dir)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); + mutex_unlock(&JFS_IP(dir)->commit_mutex); jfs_info("jfs_link: rc:%d", rc); return rc; @@ -857,12 +857,12 @@ static int jfs_link(struct dentry *old_dentry, * NAME: jfs_symlink(dip, dentry, name) * * FUNCTION: creates a symbolic link to <symlink> by name <name> - * in directory <dip> + * in directory <dip> * - * PARAMETER: dip - parent directory vnode - * dentry - dentry of symbolic link - * name - the path name of the existing object - * that will be the source of the link + * PARAMETER: dip - parent directory vnode + * dentry - dentry of symbolic link + * name - the path name of the existing object + * that will be the source of the link * * RETURN: errors from subroutines * @@ -884,7 +884,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, unchar *i_fastsymlink; s64 xlen = 0; int bmask = 0, xsize; - s64 extent = 0, xaddr; + s64 xaddr; struct metapage *mp; struct super_block *sb; struct tblock *tblk; @@ -893,6 +893,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, jfs_info("jfs_symlink: dip:0x%p name:%s", dip, name); + dquot_initialize(dip); + ssize = strlen(name) + 1; /* @@ -908,17 +910,17 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, * (iAlloc() returns new, locked inode) */ ip = ialloc(dip, S_IFLNK | 0777); - if (ip == NULL) { - rc = -ENOSPC; + if (IS_ERR(ip)) { + rc = PTR_ERR(ip); goto out2; } tid = txBegin(dip->i_sb, 0); - down(&JFS_IP(dip)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); + mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); - rc = jfs_init_security(tid, ip, dip); + rc = jfs_init_security(tid, ip, dip, &dentry->d_name); if (rc) goto out3; @@ -928,7 +930,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, tblk->u.ixpxd = JFS_IP(ip)->ixpxd; /* fix symlink access permission - * (dir_create() ANDs in the u.u_cmask, + * (dir_create() ANDs in the u.u_cmask, * but symlinks really need to be 777 access) */ ip->i_mode |= 0777; @@ -943,7 +945,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, */ if (ssize <= IDATASIZE) { - ip->i_op = &jfs_symlink_inode_operations; + ip->i_op = &jfs_fast_symlink_inode_operations; i_fastsymlink = JFS_IP(ip)->i_inline; memcpy(i_fastsymlink, name, ssize); @@ -965,11 +967,11 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, else { jfs_info("jfs_symlink: allocate extent ip:0x%p", ip); - ip->i_op = &page_symlink_inode_operations; + ip->i_op = &jfs_symlink_inode_operations; ip->i_mapping->a_ops = &jfs_aops; /* - * even though the data of symlink object (source + * even though the data of symlink object (source * path name) is treated as non-journaled user data, * it is read/written thru buffer cache for performance. */ @@ -980,10 +982,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, xlen = xsize >> JFS_SBI(sb)->l2bsize; if ((rc = xtInsert(tid, ip, 0, 0, xlen, &xaddr, 0))) { txAbort(tid, 0); - rc = -ENOSPC; goto out3; } - extent = xaddr; ip->i_size = ssize - 1; while (ssize) { /* This is kind of silly since PATH_MAX == 4K */ @@ -1021,9 +1021,10 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, goto out3; } - insert_inode_hash(ip); mark_inode_dirty(ip); + dip->i_ctime = dip->i_mtime = CURRENT_TIME; + mark_inode_dirty(dip); /* * commit update of parent directory and link object */ @@ -1034,14 +1035,17 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, out3: txEnd(tid); - up(&JFS_IP(dip)->commit_sem); - up(&JFS_IP(ip)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); + mutex_unlock(&JFS_IP(dip)->commit_mutex); if (rc) { free_ea_wmap(ip); - ip->i_nlink = 0; + clear_nlink(ip); + unlock_new_inode(ip); iput(ip); - } else + } else { + unlock_new_inode(ip); d_instantiate(dentry, ip); + } out2: free_UCSname(&dname); @@ -1053,9 +1057,9 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, /* - * NAME: jfs_rename + * NAME: jfs_rename * - * FUNCTION: rename a file or directory + * FUNCTION: rename a file or directory */ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) @@ -1081,6 +1085,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, new_dentry->d_name.name); + dquot_initialize(old_dir); + dquot_initialize(new_dir); + old_ip = old_dentry->d_inode; new_ip = new_dentry->d_inode; @@ -1103,8 +1110,8 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, * Make sure dest inode number (if any) is what we think it is */ rc = dtSearch(new_dir, &new_dname, &ino, &btstack, JFS_LOOKUP); - if (rc == 0) { - if ((new_ip == 0) || (ino != new_ip->i_ino)) { + if (!rc) { + if ((!new_ip) || (ino != new_ip->i_ino)) { rc = -ESTALE; goto out3; } @@ -1122,15 +1129,11 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, rc = -ENOTEMPTY; goto out3; } - } else if ((new_dir != old_dir) && - (new_dir->i_nlink == JFS_LINK_MAX)) { - rc = -EMLINK; - goto out3; } } else if (new_ip) { - IWRITE_LOCK(new_ip); + IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL); /* Init inode for quota operations. */ - DQUOT_INIT(new_ip); + dquot_initialize(new_ip); } /* @@ -1138,13 +1141,21 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, */ tid = txBegin(new_dir->i_sb, 0); - down(&JFS_IP(new_dir)->commit_sem); - down(&JFS_IP(old_ip)->commit_sem); + /* + * How do we know the locking is safe from deadlocks? + * The vfs does the hard part for us. Any time we are taking nested + * commit_mutexes, the vfs already has i_mutex held on the parent. + * Here, the vfs has already taken i_mutex on both old_dir and new_dir. + */ + mutex_lock_nested(&JFS_IP(new_dir)->commit_mutex, COMMIT_MUTEX_PARENT); + mutex_lock_nested(&JFS_IP(old_ip)->commit_mutex, COMMIT_MUTEX_CHILD); if (old_dir != new_dir) - down(&JFS_IP(old_dir)->commit_sem); + mutex_lock_nested(&JFS_IP(old_dir)->commit_mutex, + COMMIT_MUTEX_SECOND_PARENT); if (new_ip) { - down(&JFS_IP(new_ip)->commit_sem); + mutex_lock_nested(&JFS_IP(new_ip)->commit_mutex, + COMMIT_MUTEX_VICTIM); /* * Change existing directory entry to new inode number */ @@ -1153,18 +1164,19 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_ip->i_ino, JFS_RENAME); if (rc) goto out4; - new_ip->i_nlink--; + drop_nlink(new_ip); if (S_ISDIR(new_ip->i_mode)) { - new_ip->i_nlink--; + drop_nlink(new_ip); if (new_ip->i_nlink) { - up(&JFS_IP(new_dir)->commit_sem); - up(&JFS_IP(old_ip)->commit_sem); + mutex_unlock(&JFS_IP(new_ip)->commit_mutex); if (old_dir != new_dir) - up(&JFS_IP(old_dir)->commit_sem); + mutex_unlock(&JFS_IP(old_dir)->commit_mutex); + mutex_unlock(&JFS_IP(old_ip)->commit_mutex); + mutex_unlock(&JFS_IP(new_dir)->commit_mutex); if (!S_ISDIR(old_ip->i_mode) && new_ip) IWRITE_UNLOCK(new_ip); jfs_error(new_ip->i_sb, - "jfs_rename: new_ip->i_nlink != 0"); + "new_ip->i_nlink != 0\n"); return -EIO; } tblk = tid_to_tblock(tid); @@ -1175,7 +1187,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* free block resources */ if ((new_size = commitZeroLink(tid, new_ip)) < 0) { txAbort(tid, 1); /* Marks FS Dirty */ - rc = new_size; + rc = new_size; goto out4; } tblk = tid_to_tblock(tid); @@ -1205,7 +1217,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out4; } if (S_ISDIR(old_ip->i_mode)) - new_dir->i_nlink++; + inc_nlink(new_dir); } /* * Remove old directory entry @@ -1220,7 +1232,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out4; } if (S_ISDIR(old_ip->i_mode)) { - old_dir->i_nlink--; + drop_nlink(old_dir); if (old_dir != new_dir) { /* * Change inode number of parent for moved directory @@ -1278,25 +1290,24 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, out4: txEnd(tid); - - up(&JFS_IP(new_dir)->commit_sem); - up(&JFS_IP(old_ip)->commit_sem); - if (old_dir != new_dir) - up(&JFS_IP(old_dir)->commit_sem); if (new_ip) - up(&JFS_IP(new_ip)->commit_sem); + mutex_unlock(&JFS_IP(new_ip)->commit_mutex); + if (old_dir != new_dir) + mutex_unlock(&JFS_IP(old_dir)->commit_mutex); + mutex_unlock(&JFS_IP(old_ip)->commit_mutex); + mutex_unlock(&JFS_IP(new_dir)->commit_mutex); while (new_size && (rc == 0)) { tid = txBegin(new_ip->i_sb, 0); - down(&JFS_IP(new_ip)->commit_sem); + mutex_lock(&JFS_IP(new_ip)->commit_mutex); new_size = xtTruncate_pmap(tid, new_ip, new_size); if (new_size < 0) { txAbort(tid, 1); - rc = new_size; + rc = new_size; } else rc = txCommit(tid, 1, &new_ip, COMMIT_SYNC); txEnd(tid); - up(&JFS_IP(new_ip)->commit_sem); + mutex_unlock(&JFS_IP(new_ip)->commit_mutex); } if (new_ip && (new_ip->i_nlink == 0)) set_cflag(COMMIT_Nolink, new_ip); @@ -1324,12 +1335,12 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* - * NAME: jfs_mknod + * NAME: jfs_mknod * - * FUNCTION: Create a special file (device) + * FUNCTION: Create a special file (device) */ static int jfs_mknod(struct inode *dir, struct dentry *dentry, - int mode, dev_t rdev) + umode_t mode, dev_t rdev) { struct jfs_inode_info *jfs_ip; struct btstack btstack; @@ -1346,26 +1357,28 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, jfs_info("jfs_mknod: %s", dentry->d_name.name); + dquot_initialize(dir); + if ((rc = get_UCSname(&dname, dentry))) goto out; ip = ialloc(dir, mode); - if (ip == NULL) { - rc = -ENOSPC; + if (IS_ERR(ip)) { + rc = PTR_ERR(ip); goto out1; } jfs_ip = JFS_IP(ip); tid = txBegin(dir->i_sb, 0); - down(&JFS_IP(dir)->commit_sem); - down(&JFS_IP(ip)->commit_sem); + mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT); + mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); rc = jfs_init_acl(tid, ip, dir); if (rc) goto out3; - rc = jfs_init_security(tid, ip, dir); + rc = jfs_init_security(tid, ip, dir, &dentry->d_name); if (rc) { txAbort(tid, 0); goto out3; @@ -1391,7 +1404,6 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, jfs_ip->dev = new_encode_dev(rdev); init_special_inode(ip, ip->i_mode, rdev); - insert_inode_hash(ip); mark_inode_dirty(ip); dir->i_ctime = dir->i_mtime = CURRENT_TIME; @@ -1404,14 +1416,17 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, out3: txEnd(tid); - up(&JFS_IP(ip)->commit_sem); - up(&JFS_IP(dir)->commit_sem); + mutex_unlock(&JFS_IP(ip)->commit_mutex); + mutex_unlock(&JFS_IP(dir)->commit_mutex); if (rc) { free_ea_wmap(ip); - ip->i_nlink = 0; + clear_nlink(ip); + unlock_new_inode(ip); iput(ip); - } else + } else { + unlock_new_inode(ip); d_instantiate(dentry, ip); + } out1: free_UCSname(&dname); @@ -1421,82 +1436,78 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, return rc; } -static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd) +static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags) { struct btstack btstack; ino_t inum; struct inode *ip; struct component_name key; - const char *name = dentry->d_name.name; - int len = dentry->d_name.len; int rc; - jfs_info("jfs_lookup: name = %s", name); + jfs_info("jfs_lookup: name = %s", dentry->d_name.name); + + if ((rc = get_UCSname(&key, dentry))) + return ERR_PTR(rc); + rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP); + free_UCSname(&key); + if (rc == -ENOENT) { + ip = NULL; + } else if (rc) { + jfs_err("jfs_lookup: dtSearch returned %d", rc); + ip = ERR_PTR(rc); + } else { + ip = jfs_iget(dip->i_sb, inum); + if (IS_ERR(ip)) + jfs_err("jfs_lookup: iget failed on inum %d", (uint)inum); + } - if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2) - dentry->d_op = &jfs_ci_dentry_operations; + return d_splice_alias(ip, dentry); +} - if ((name[0] == '.') && (len == 1)) - inum = dip->i_ino; - else if (strcmp(name, "..") == 0) - inum = PARENT(dip); - else { - if ((rc = get_UCSname(&key, dentry))) - return ERR_PTR(rc); - rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP); - free_UCSname(&key); - if (rc == -ENOENT) { - d_add(dentry, NULL); - return ERR_PTR(0); - } else if (rc) { - jfs_err("jfs_lookup: dtSearch returned %d", rc); - return ERR_PTR(rc); - } - } +static struct inode *jfs_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) +{ + struct inode *inode; - ip = iget(dip->i_sb, inum); - if (ip == NULL || is_bad_inode(ip)) { - jfs_err("jfs_lookup: iget failed on inum %d", (uint) inum); - if (ip) - iput(ip); - return ERR_PTR(-EACCES); + if (ino == 0) + return ERR_PTR(-ESTALE); + inode = jfs_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + if (generation && inode->i_generation != generation) { + iput(inode); + return ERR_PTR(-ESTALE); } - dentry = d_splice_alias(ip, dentry); + return inode; +} - if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) - dentry->d_op = &jfs_ci_dentry_operations; +struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + jfs_nfs_get_inode); +} - return dentry; +struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + jfs_nfs_get_inode); } struct dentry *jfs_get_parent(struct dentry *dentry) { - struct super_block *sb = dentry->d_inode->i_sb; - struct dentry *parent = ERR_PTR(-ENOENT); - struct inode *inode; unsigned long parent_ino; parent_ino = le32_to_cpu(JFS_IP(dentry->d_inode)->i_dtroot.header.idotdot); - inode = iget(sb, parent_ino); - if (inode) { - if (is_bad_inode(inode)) { - iput(inode); - parent = ERR_PTR(-EACCES); - } else { - parent = d_alloc_anon(inode); - if (!parent) { - parent = ERR_PTR(-ENOMEM); - iput(inode); - } - } - } - return parent; + return d_obtain_alias(jfs_iget(dentry->d_inode->i_sb, parent_ino)); } -struct inode_operations jfs_dir_inode_operations = { +const struct inode_operations jfs_dir_inode_operations = { .create = jfs_create, .lookup = jfs_lookup, .link = jfs_link, @@ -1510,19 +1521,25 @@ struct inode_operations jfs_dir_inode_operations = { .getxattr = jfs_getxattr, .listxattr = jfs_listxattr, .removexattr = jfs_removexattr, -#ifdef CONFIG_JFS_POSIX_ACL .setattr = jfs_setattr, - .permission = jfs_permission, +#ifdef CONFIG_JFS_POSIX_ACL + .get_acl = jfs_get_acl, + .set_acl = jfs_set_acl, #endif }; -struct file_operations jfs_dir_operations = { +const struct file_operations jfs_dir_operations = { .read = generic_read_dir, - .readdir = jfs_readdir, + .iterate = jfs_readdir, .fsync = jfs_fsync, + .unlocked_ioctl = jfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = jfs_compat_ioctl, +#endif + .llseek = generic_file_llseek, }; -static int jfs_ci_hash(struct dentry *dir, struct qstr *this) +static int jfs_ci_hash(const struct dentry *dir, struct qstr *this) { unsigned long hash; int i; @@ -1535,32 +1552,57 @@ static int jfs_ci_hash(struct dentry *dir, struct qstr *this) return 0; } -static int jfs_ci_compare(struct dentry *dir, struct qstr *a, struct qstr *b) +static int jfs_ci_compare(const struct dentry *parent, const struct dentry *dentry, + unsigned int len, const char *str, const struct qstr *name) { int i, result = 1; - if (a->len != b->len) + if (len != name->len) goto out; - for (i=0; i < a->len; i++) { - if (tolower(a->name[i]) != tolower(b->name[i])) + for (i=0; i < len; i++) { + if (tolower(str[i]) != tolower(name->name[i])) goto out; } result = 0; +out: + return result; +} +static int jfs_ci_revalidate(struct dentry *dentry, unsigned int flags) +{ /* - * We want creates to preserve case. A negative dentry, a, that - * has a different case than b may cause a new entry to be created - * with the wrong case. Since we can't tell if a comes from a negative - * dentry, we blindly replace it with b. This should be harmless if - * a is not a negative dentry. + * This is not negative dentry. Always valid. + * + * Note, rename() to existing directory entry will have ->d_inode, + * and will use existing name which isn't specified name by user. + * + * We may be able to drop this positive dentry here. But dropping + * positive dentry isn't good idea. So it's unsupported like + * rename("filename", "FILENAME") for now. */ - memcpy((unsigned char *)a->name, b->name, a->len); -out: - return result; + if (dentry->d_inode) + return 1; + + /* + * This may be nfsd (or something), anyway, we can't see the + * intent of this. So, since this can be for creation, drop it. + */ + if (!flags) + return 0; + + /* + * Drop the negative dentry, in order to make sure to use the + * case sensitive name which is specified by user if this is + * for creation. + */ + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + return 0; + return 1; } -struct dentry_operations jfs_ci_dentry_operations = +const struct dentry_operations jfs_ci_dentry_operations = { .d_hash = jfs_ci_hash, .d_compare = jfs_ci_compare, + .d_revalidate = jfs_ci_revalidate, }; diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index c6dc254d325..90b3bc21e9b 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -29,17 +29,17 @@ #include "jfs_txnmgr.h" #include "jfs_debug.h" -#define BITSPERPAGE (PSIZE << 3) -#define L2MEGABYTE 20 -#define MEGABYTE (1 << L2MEGABYTE) -#define MEGABYTE32 (MEGABYTE << 5) +#define BITSPERPAGE (PSIZE << 3) +#define L2MEGABYTE 20 +#define MEGABYTE (1 << L2MEGABYTE) +#define MEGABYTE32 (MEGABYTE << 5) /* convert block number to bmap file page number */ #define BLKTODMAPN(b)\ - (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) + (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) /* - * jfs_extendfs() + * jfs_extendfs() * * function: extend file system; * @@ -48,16 +48,16 @@ * workspace space * * input: - * new LVSize: in LV blocks (required) - * new LogSize: in LV blocks (optional) - * new FSSize: in LV blocks (optional) + * new LVSize: in LV blocks (required) + * new LogSize: in LV blocks (optional) + * new FSSize: in LV blocks (optional) * * new configuration: * 1. set new LogSize as specified or default from new LVSize; * 2. compute new FSCKSize from new LVSize; * 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where * assert(new FSSize >= old FSSize), - * i.e., file system must not be shrinked; + * i.e., file system must not be shrunk; */ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) { @@ -80,7 +80,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) int log_formatted = 0; struct inode *iplist[1]; struct jfs_superblock *j_sb, *j_sb2; - uint old_agsize; + s64 old_agsize; + int agsizechanged = 0; struct buffer_head *bh, *bh2; /* If the volume hasn't grown, get out now */ @@ -125,8 +126,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) } /* - * reconfigure LV spaces - * --------------------- + * reconfigure LV spaces + * --------------------- * * validate new size, or, if not specified, determine new size */ @@ -172,7 +173,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) */ t64 = ((newLVSize - newLogSize + BPERDMAP - 1) >> L2BPERDMAP) << L2BPERDMAP; - t32 = ((t64 + (BITSPERPAGE - 1)) / BITSPERPAGE) + 1 + 50; + t32 = DIV_ROUND_UP(t64, BITSPERPAGE) + 1 + 50; newFSCKSize = t32 << sbi->l2nbperpage; newFSCKAddress = newLogAddress - newFSCKSize; @@ -181,7 +182,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) */ newFSSize = newLVSize - newLogSize - newFSCKSize; - /* file system cannot be shrinked */ + /* file system cannot be shrunk */ if (newFSSize < bmp->db_mapsize) { rc = -EINVAL; goto out; @@ -198,7 +199,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) log_formatted = 1; } /* - * quiesce file system + * quiesce file system * * (prepare to move the inline log and to prevent map update) * @@ -270,8 +271,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) } /* - * extend block allocation map - * --------------------------- + * extend block allocation map + * --------------------------- * * extendfs() for new extension, retry after crash recovery; * @@ -283,7 +284,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) * s_size: aggregate size in physical blocks; */ /* - * compute the new block allocation map configuration + * compute the new block allocation map configuration * * map dinode: * di_size: map file size in byte; @@ -301,7 +302,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) newNpages = BLKTODMAPN(t64) + 1; /* - * extend map from current map (WITHOUT growing mapfile) + * extend map from current map (WITHOUT growing mapfile) * * map new extension with unmapped part of the last partial * dmap page, if applicable, and extra page(s) allocated @@ -333,6 +334,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) */ if ((rc = dbExtendFS(ipbmap, XAddress, nblocks))) goto error_out; + + agsizechanged |= (bmp->db_agsize != old_agsize); + /* * the map now has extended to cover additional nblocks: * dn_mapsize = oldMapsize + nblocks; @@ -341,8 +345,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) XSize -= nblocks; /* - * grow map file to cover remaining extension - * and/or one extra dmap page for next extendfs(); + * grow map file to cover remaining extension + * and/or one extra dmap page for next extendfs(); * * allocate new map pages and its backing blocks, and * update map file xtree @@ -376,8 +380,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) * by txCommit(); */ filemap_fdatawait(ipbmap->i_mapping); - filemap_fdatawrite(ipbmap->i_mapping); - filemap_fdatawait(ipbmap->i_mapping); + filemap_write_and_wait(ipbmap->i_mapping); diWriteSpecial(ipbmap, 0); newPage = nPages; /* first new page number */ @@ -423,8 +426,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) dbFinalizeBmap(ipbmap); /* - * update inode allocation map - * --------------------------- + * update inode allocation map + * --------------------------- * * move iag lists from old to new iag; * agstart field is not updated for logredo() to reconstruct @@ -433,7 +436,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) * will correctly identify the new ag); */ /* if new AG size the same as old AG size, done! */ - if (bmp->db_agsize != old_agsize) { + if (agsizechanged) { if ((rc = diExtendFS(ipimap, ipbmap))) goto error_out; @@ -443,8 +446,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) } /* - * finalize - * -------- + * finalize + * -------- * * extension is committed when on-disk super block is * updated with new descriptors: logredo will recover @@ -481,7 +484,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) diFreeSpecial(ipbmap2); /* - * update superblock + * update superblock */ if ((rc = readSuper(sb, &bh))) goto error_out; @@ -527,11 +530,11 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) goto resume; error_out: - jfs_error(sb, "jfs_extendfs"); + jfs_error(sb, "\n"); resume: /* - * resume file system transactions + * resume file system transactions */ txResume(sb); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 4226af3ea91..adf8cb045b9 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -4,30 +4,36 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/fs.h> -#include <linux/config.h> #include <linux/module.h> #include <linux/parser.h> #include <linux/completion.h> #include <linux/vfs.h> +#include <linux/quotaops.h> #include <linux/mount.h> #include <linux/moduleparam.h> +#include <linux/kthread.h> #include <linux/posix_acl.h> +#include <linux/buffer_head.h> +#include <linux/exportfs.h> +#include <linux/crc32.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <linux/seq_file.h> +#include <linux/blkdev.h> #include "jfs_incore.h" #include "jfs_filsys.h" @@ -38,27 +44,26 @@ #include "jfs_imap.h" #include "jfs_acl.h" #include "jfs_debug.h" +#include "jfs_xattr.h" MODULE_DESCRIPTION("The Journaled Filesystem (JFS)"); MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM"); MODULE_LICENSE("GPL"); -static kmem_cache_t * jfs_inode_cachep; +static struct kmem_cache *jfs_inode_cachep; -static struct super_operations jfs_super_operations; -static struct export_operations jfs_export_operations; +static const struct super_operations jfs_super_operations; +static const struct export_operations jfs_export_operations; static struct file_system_type jfs_fs_type; #define MAX_COMMIT_THREADS 64 -static int commit_threads = 0; +static int commit_threads; module_param(commit_threads, int, 0); MODULE_PARM_DESC(commit_threads, "Number of commit threads"); -int jfs_stop_threads; -static pid_t jfsIOthread; -static pid_t jfsCommitThread[MAX_COMMIT_THREADS]; -static pid_t jfsSyncThread; -DECLARE_COMPLETION(jfsIOwait); +static struct task_struct *jfsCommitThread[MAX_COMMIT_THREADS]; +struct task_struct *jfsIOthread; +struct task_struct *jfsSyncThread; #ifdef CONFIG_JFS_DEBUG int jfsloglevel = JFS_LOGLEVEL_WARN; @@ -79,25 +84,28 @@ static void jfs_handle_error(struct super_block *sb) panic("JFS (device %s): panic forced after error\n", sb->s_id); else if (sbi->flag & JFS_ERR_REMOUNT_RO) { - jfs_err("ERROR: (device %s): remounting filesystem " - "as read-only\n", + jfs_err("ERROR: (device %s): remounting filesystem as read-only\n", sb->s_id); sb->s_flags |= MS_RDONLY; - } + } /* nothing is done for continue beyond marking the superblock dirty */ } -void jfs_error(struct super_block *sb, const char * function, ...) +void jfs_error(struct super_block *sb, const char *fmt, ...) { - static char error_buf[256]; + struct va_format vaf; va_list args; - va_start(args, function); - vsprintf(error_buf, function, args); - va_end(args); + va_start(args, fmt); - printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf); + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("ERROR: (device %s): %pf: %pV\n", + sb->s_id, __builtin_return_address(0), &vaf); + + va_end(args); jfs_handle_error(sb); } @@ -112,6 +120,13 @@ static struct inode *jfs_alloc_inode(struct super_block *sb) return &jfs_inode->vfs_inode; } +static void jfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct jfs_inode_info *ji = JFS_IP(inode); + kmem_cache_free(jfs_inode_cachep, ji); +} + static void jfs_destroy_inode(struct inode *inode) { struct jfs_inode_info *ji = JFS_IP(inode); @@ -125,24 +140,12 @@ static void jfs_destroy_inode(struct inode *inode) ji->active_ag = -1; } spin_unlock_irq(&ji->ag_lock); - -#ifdef CONFIG_JFS_POSIX_ACL - if (ji->i_acl != JFS_ACL_NOT_CACHED) { - posix_acl_release(ji->i_acl); - ji->i_acl = JFS_ACL_NOT_CACHED; - } - if (ji->i_default_acl != JFS_ACL_NOT_CACHED) { - posix_acl_release(ji->i_default_acl); - ji->i_default_acl = JFS_ACL_NOT_CACHED; - } -#endif - - kmem_cache_free(jfs_inode_cachep, ji); + call_rcu(&inode->i_rcu, jfs_i_callback); } -static int jfs_statfs(struct super_block *sb, struct kstatfs *buf) +static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct jfs_sb_info *sbi = JFS_SBI(sb); + struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb); s64 maxinodes; struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap; @@ -155,7 +158,7 @@ static int jfs_statfs(struct super_block *sb, struct kstatfs *buf) /* * If we really return the number of allocated & free inodes, some * applications will fail because they won't see enough free inodes. - * We'll try to calculate some guess as to how may inodes we can + * We'll try to calculate some guess as to how many inodes we can * really allocate * * buf->f_files = atomic_read(&imap->im_numinos); @@ -167,6 +170,9 @@ static int jfs_statfs(struct super_block *sb, struct kstatfs *buf) buf->f_files = maxinodes; buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) - atomic_read(&imap->im_numfree)); + buf->f_fsid.val[0] = (u32)crc32_le(0, sbi->uuid, sizeof(sbi->uuid)/2); + buf->f_fsid.val[1] = (u32)crc32_le(0, sbi->uuid + sizeof(sbi->uuid)/2, + sizeof(sbi->uuid)/2); buf->f_namelen = JFS_NAME_MAX; return 0; @@ -178,16 +184,17 @@ static void jfs_put_super(struct super_block *sb) int rc; jfs_info("In jfs_put_super"); + + dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + rc = jfs_umount(sb); if (rc) jfs_err("jfs_umount failed with return code %d", rc); - if (sbi->nls_tab) - unload_nls(sbi->nls_tab); - sbi->nls_tab = NULL; + + unload_nls(sbi->nls_tab); truncate_inode_pages(sbi->direct_inode->i_mapping, 0); iput(sbi->direct_inode); - sbi->direct_inode = NULL; kfree(sbi); } @@ -195,10 +202,11 @@ static void jfs_put_super(struct super_block *sb) enum { Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota + Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask, + Opt_discard, Opt_nodiscard, Opt_discard_minblk }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_integrity, "integrity"}, {Opt_nointegrity, "nointegrity"}, {Opt_iocharset, "iocharset=%s"}, @@ -209,6 +217,12 @@ static match_table_t tokens = { {Opt_ignore, "quota"}, {Opt_usrquota, "usrquota"}, {Opt_grpquota, "grpquota"}, + {Opt_uid, "uid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_umask, "umask=%u"}, + {Opt_discard, "discard"}, + {Opt_nodiscard, "nodiscard"}, + {Opt_discard_minblk, "discard=%u"}, {Opt_err, NULL} }; @@ -250,8 +264,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, else { nls_map = load_nls(args[0].from); if (!nls_map) { - printk(KERN_ERR - "JFS: charset not found\n"); + pr_err("JFS: charset not found\n"); goto cleanup; } } @@ -259,7 +272,10 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, case Opt_resize: { char *resize = args[0].from; - *newLVSize = simple_strtoull(resize, &resize, 0); + int rc = kstrtoll(resize, 0, newLVSize); + + if (rc) + goto cleanup; break; } case Opt_resize_nosize: @@ -267,8 +283,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, *newLVSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; if (*newLVSize == 0) - printk(KERN_ERR - "JFS: Cannot determine volume size\n"); + pr_err("JFS: Cannot determine volume size\n"); break; } case Opt_errors: @@ -289,15 +304,14 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, *flag &= ~JFS_ERR_REMOUNT_RO; *flag |= JFS_ERR_PANIC; } else { - printk(KERN_ERR - "JFS: %s is an invalid error handler\n", + pr_err("JFS: %s is an invalid error handler\n", errors); goto cleanup; } break; } -#if defined(CONFIG_QUOTA) +#ifdef CONFIG_QUOTA case Opt_quota: case Opt_usrquota: *flag |= JFS_USRQUOTA; @@ -309,22 +323,96 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, case Opt_usrquota: case Opt_grpquota: case Opt_quota: - printk(KERN_ERR - "JFS: quota operations not supported\n"); + pr_err("JFS: quota operations not supported\n"); break; #endif + case Opt_uid: + { + char *uid = args[0].from; + uid_t val; + int rc = kstrtouint(uid, 0, &val); + + if (rc) + goto cleanup; + sbi->uid = make_kuid(current_user_ns(), val); + if (!uid_valid(sbi->uid)) + goto cleanup; + break; + } + + case Opt_gid: + { + char *gid = args[0].from; + gid_t val; + int rc = kstrtouint(gid, 0, &val); + + if (rc) + goto cleanup; + sbi->gid = make_kgid(current_user_ns(), val); + if (!gid_valid(sbi->gid)) + goto cleanup; + break; + } + + case Opt_umask: + { + char *umask = args[0].from; + int rc = kstrtouint(umask, 8, &sbi->umask); + + if (rc) + goto cleanup; + if (sbi->umask & ~0777) { + pr_err("JFS: Invalid value of umask\n"); + goto cleanup; + } + break; + } + + case Opt_discard: + { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + /* if set to 1, even copying files will cause + * trimming :O + * -> user has more control over the online trimming + */ + sbi->minblks_trim = 64; + if (blk_queue_discard(q)) + *flag |= JFS_DISCARD; + else + pr_err("JFS: discard option not supported on device\n"); + break; + } + + case Opt_nodiscard: + *flag &= ~JFS_DISCARD; + break; + + case Opt_discard_minblk: + { + struct request_queue *q = bdev_get_queue(sb->s_bdev); + char *minblks_trim = args[0].from; + int rc; + if (blk_queue_discard(q)) { + *flag |= JFS_DISCARD; + rc = kstrtouint(minblks_trim, 0, + &sbi->minblks_trim); + if (rc) + goto cleanup; + } else + pr_err("JFS: discard option not supported on device\n"); + break; + } default: - printk("jfs: Unrecognized mount option \"%s\" " - " or missing value\n", p); + printk("jfs: Unrecognized mount option \"%s\" or missing value\n", + p); goto cleanup; } } if (nls_map != (void *) -1) { /* Discard old (if remount) */ - if (sbi->nls_tab) - unload_nls(sbi->nls_tab); + unload_nls(sbi->nls_tab); sbi->nls_tab = nls_map; } return 1; @@ -340,14 +428,15 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) s64 newLVSize = 0; int rc = 0; int flag = JFS_SBI(sb)->flag; + int ret; - if (!parse_options(data, sb, &newLVSize, &flag)) { + sync_filesystem(sb); + if (!parse_options(data, sb, &newLVSize, &flag)) return -EINVAL; - } + if (newLVSize) { if (sb->s_flags & MS_RDONLY) { - printk(KERN_ERR - "JFS: resize requires volume to be mounted read-write\n"); + pr_err("JFS: resize requires volume to be mounted read-write\n"); return -EROFS; } rc = jfs_extendfs(sb, newLVSize, 0); @@ -363,9 +452,18 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0); JFS_SBI(sb)->flag = flag; - return jfs_mount_rw(sb, 1); + ret = jfs_mount_rw(sb, 1); + + /* mark the fs r/w for quota activity */ + sb->s_flags &= ~MS_RDONLY; + + dquot_resume(sb, -1); + return ret; } if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { + rc = dquot_suspend(sb, -1); + if (rc < 0) + return rc; rc = jfs_umount_rw(sb); JFS_SBI(sb)->flag = flag; return rc; @@ -375,8 +473,10 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) rc = jfs_umount_rw(sb); if (rc) return rc; + JFS_SBI(sb)->flag = flag; - return jfs_mount_rw(sb, 1); + ret = jfs_mount_rw(sb, 1); + return ret; } JFS_SBI(sb)->flag = flag; @@ -389,27 +489,29 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) struct inode *inode; int rc; s64 newLVSize = 0; - int flag; + int flag, ret = -EINVAL; jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags); if (!new_valid_dev(sb->s_bdev->bd_dev)) return -EOVERFLOW; - sbi = kmalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); + sbi = kzalloc(sizeof(struct jfs_sb_info), GFP_KERNEL); if (!sbi) - return -ENOSPC; - memset(sbi, 0, sizeof (struct jfs_sb_info)); + return -ENOMEM; + sb->s_fs_info = sbi; + sb->s_max_links = JFS_LINK_MAX; sbi->sb = sb; + sbi->uid = INVALID_UID; + sbi->gid = INVALID_GID; + sbi->umask = -1; /* initialize the mount flag and determine the default error handler */ flag = JFS_ERR_REMOUNT_RO; - if (!parse_options((char *) data, sb, &newLVSize, &flag)) { - kfree(sbi); - return -EINVAL; - } + if (!parse_options((char *) data, sb, &newLVSize, &flag)) + goto out_kfree; sbi->flag = flag; #ifdef CONFIG_JFS_POSIX_ACL @@ -417,8 +519,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) #endif if (newLVSize) { - printk(KERN_ERR "resize option for remount only\n"); - return -EINVAL; + pr_err("resize option for remount only\n"); + goto out_kfree; } /* @@ -431,15 +533,21 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) */ sb->s_op = &jfs_super_operations; sb->s_export_op = &jfs_export_operations; + sb->s_xattr = jfs_xattr_handlers; +#ifdef CONFIG_QUOTA + sb->dq_op = &dquot_operations; + sb->s_qcop = &dquot_quotactl_ops; +#endif /* * Initialize direct-mapping inode/address-space */ inode = new_inode(sb); - if (inode == NULL) - goto out_kfree; + if (inode == NULL) { + ret = -ENOMEM; + goto out_unload; + } inode->i_ino = 0; - inode->i_nlink = 1; inode->i_size = sb->s_bdev->bd_inode->i_size; inode->i_mapping->a_ops = &jfs_metapage_aops; insert_inode_hash(inode); @@ -449,9 +557,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) rc = jfs_mount(sb); if (rc) { - if (!silent) { + if (!silent) jfs_err("jfs_mount failed w/return code = %d", rc); - } goto out_mount_failed; } if (sb->s_flags & MS_RDONLY) @@ -469,16 +576,18 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = JFS_SUPER_MAGIC; - inode = iget(sb, ROOT_I); - if (!inode || is_bad_inode(inode)) - goto out_no_root; - sb->s_root = d_alloc_root(inode); + if (sbi->mntflag & JFS_OS2) + sb->s_d_op = &jfs_ci_dentry_operations; + + inode = jfs_iget(sb, ROOT_I); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out_no_rw; + } + sb->s_root = d_make_root(inode); if (!sb->s_root) goto out_no_root; - if (sbi->mntflag & JFS_OS2) - sb->s_root->d_op = &jfs_ci_dentry_operations; - /* logical blocks are represented by 40 bits in pxd_t, etc. */ sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; #if BITS_PER_LONG == 32 @@ -486,66 +595,88 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) * Page cache is indexed by long. * I would use MAX_LFS_FILESIZE, but it's only half as big */ - sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes); + sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, + (u64)sb->s_maxbytes); #endif sb->s_time_gran = 1; return 0; out_no_root: - jfs_err("jfs_read_super: get root inode failed"); - if (inode) - iput(inode); + jfs_err("jfs_read_super: get root dentry failed"); out_no_rw: rc = jfs_umount(sb); - if (rc) { + if (rc) jfs_err("jfs_umount failed with return code %d", rc); - } out_mount_failed: - filemap_fdatawrite(sbi->direct_inode->i_mapping); - filemap_fdatawait(sbi->direct_inode->i_mapping); + filemap_write_and_wait(sbi->direct_inode->i_mapping); truncate_inode_pages(sbi->direct_inode->i_mapping, 0); make_bad_inode(sbi->direct_inode); iput(sbi->direct_inode); sbi->direct_inode = NULL; -out_kfree: +out_unload: if (sbi->nls_tab) unload_nls(sbi->nls_tab); +out_kfree: kfree(sbi); - return -EINVAL; + return ret; } -static void jfs_write_super_lockfs(struct super_block *sb) +static int jfs_freeze(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_log *log = sbi->log; + int rc = 0; if (!(sb->s_flags & MS_RDONLY)) { txQuiesce(sb); - lmLogShutdown(log); - updateSuper(sb, FM_CLEAN); + rc = lmLogShutdown(log); + if (rc) { + jfs_error(sb, "lmLogShutdown failed\n"); + + /* let operations fail rather than hang */ + txResume(sb); + + return rc; + } + rc = updateSuper(sb, FM_CLEAN); + if (rc) { + jfs_err("jfs_freeze: updateSuper failed\n"); + /* + * Don't fail here. Everything succeeded except + * marking the superblock clean, so there's really + * no harm in leaving it frozen for now. + */ + } } + return 0; } -static void jfs_unlockfs(struct super_block *sb) +static int jfs_unfreeze(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_log *log = sbi->log; int rc = 0; if (!(sb->s_flags & MS_RDONLY)) { - updateSuper(sb, FM_MOUNT); - if ((rc = lmLogInit(log))) - jfs_err("jfs_unlock failed with return code %d", rc); - else - txResume(sb); + rc = updateSuper(sb, FM_MOUNT); + if (rc) { + jfs_error(sb, "updateSuper failed\n"); + goto out; + } + rc = lmLogInit(log); + if (rc) + jfs_error(sb, "lmLogInit failed\n"); +out: + txResume(sb); } + return rc; } -static struct super_block *jfs_get_sb(struct file_system_type *fs_type, +static struct dentry *jfs_do_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super); + return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super); } static int jfs_sync_fs(struct super_block *sb, int wait) @@ -554,6 +685,11 @@ static int jfs_sync_fs(struct super_block *sb, int wait) /* log == NULL indicates read-only mount */ if (log) { + /* + * Write quota structures to quota file, sync_blockdev() will + * write them to disk later + */ + dquot_writeback_dquots(sb, -1); jfs_flush_journal(log, wait); jfs_syncpt(log, 0); } @@ -561,16 +697,28 @@ static int jfs_sync_fs(struct super_block *sb, int wait) return 0; } -static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) +static int jfs_show_options(struct seq_file *seq, struct dentry *root) { - struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb); - + struct jfs_sb_info *sbi = JFS_SBI(root->d_sb); + + if (uid_valid(sbi->uid)) + seq_printf(seq, ",uid=%d", from_kuid(&init_user_ns, sbi->uid)); + if (gid_valid(sbi->gid)) + seq_printf(seq, ",gid=%d", from_kgid(&init_user_ns, sbi->gid)); + if (sbi->umask != -1) + seq_printf(seq, ",umask=%03o", sbi->umask); if (sbi->flag & JFS_NOINTEGRITY) seq_puts(seq, ",nointegrity"); - else - seq_puts(seq, ",integrity"); + if (sbi->flag & JFS_DISCARD) + seq_printf(seq, ",discard=%u", sbi->minblks_trim); + if (sbi->nls_tab) + seq_printf(seq, ",iocharset=%s", sbi->nls_tab->charset); + if (sbi->flag & JFS_ERR_CONTINUE) + seq_printf(seq, ",errors=continue"); + if (sbi->flag & JFS_ERR_PANIC) + seq_printf(seq, ",errors=panic"); -#if defined(CONFIG_QUOTA) +#ifdef CONFIG_QUOTA if (sbi->flag & JFS_USRQUOTA) seq_puts(seq, ",usrquota"); @@ -581,53 +729,161 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) return 0; } -static struct super_operations jfs_super_operations = { +#ifdef CONFIG_QUOTA + +/* Read data from quotafile - avoid pagecache and such because we cannot afford + * acquiring the locks... As quota files are never truncated and quota code + * itself serializes the operations (and no one else should touch the files) + * we don't have to be afraid of races */ +static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data, + size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + sector_t blk = off >> sb->s_blocksize_bits; + int err = 0; + int offset = off & (sb->s_blocksize - 1); + int tocopy; + size_t toread; + struct buffer_head tmp_bh; + struct buffer_head *bh; + loff_t i_size = i_size_read(inode); + + if (off > i_size) + return 0; + if (off+len > i_size) + len = i_size-off; + toread = len; + while (toread > 0) { + tocopy = sb->s_blocksize - offset < toread ? + sb->s_blocksize - offset : toread; + + tmp_bh.b_state = 0; + tmp_bh.b_size = 1 << inode->i_blkbits; + err = jfs_get_block(inode, blk, &tmp_bh, 0); + if (err) + return err; + if (!buffer_mapped(&tmp_bh)) /* A hole? */ + memset(data, 0, tocopy); + else { + bh = sb_bread(sb, tmp_bh.b_blocknr); + if (!bh) + return -EIO; + memcpy(data, bh->b_data+offset, tocopy); + brelse(bh); + } + offset = 0; + toread -= tocopy; + data += tocopy; + blk++; + } + return len; +} + +/* Write to quotafile */ +static ssize_t jfs_quota_write(struct super_block *sb, int type, + const char *data, size_t len, loff_t off) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + sector_t blk = off >> sb->s_blocksize_bits; + int err = 0; + int offset = off & (sb->s_blocksize - 1); + int tocopy; + size_t towrite = len; + struct buffer_head tmp_bh; + struct buffer_head *bh; + + mutex_lock(&inode->i_mutex); + while (towrite > 0) { + tocopy = sb->s_blocksize - offset < towrite ? + sb->s_blocksize - offset : towrite; + + tmp_bh.b_state = 0; + tmp_bh.b_size = 1 << inode->i_blkbits; + err = jfs_get_block(inode, blk, &tmp_bh, 1); + if (err) + goto out; + if (offset || tocopy != sb->s_blocksize) + bh = sb_bread(sb, tmp_bh.b_blocknr); + else + bh = sb_getblk(sb, tmp_bh.b_blocknr); + if (!bh) { + err = -EIO; + goto out; + } + lock_buffer(bh); + memcpy(bh->b_data+offset, data, tocopy); + flush_dcache_page(bh->b_page); + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + brelse(bh); + offset = 0; + towrite -= tocopy; + data += tocopy; + blk++; + } +out: + if (len == towrite) { + mutex_unlock(&inode->i_mutex); + return err; + } + if (inode->i_size < off+len-towrite) + i_size_write(inode, off+len-towrite); + inode->i_version++; + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + mark_inode_dirty(inode); + mutex_unlock(&inode->i_mutex); + return len - towrite; +} + +#endif + +static const struct super_operations jfs_super_operations = { .alloc_inode = jfs_alloc_inode, .destroy_inode = jfs_destroy_inode, - .read_inode = jfs_read_inode, .dirty_inode = jfs_dirty_inode, .write_inode = jfs_write_inode, - .delete_inode = jfs_delete_inode, + .evict_inode = jfs_evict_inode, .put_super = jfs_put_super, .sync_fs = jfs_sync_fs, - .write_super_lockfs = jfs_write_super_lockfs, - .unlockfs = jfs_unlockfs, + .freeze_fs = jfs_freeze, + .unfreeze_fs = jfs_unfreeze, .statfs = jfs_statfs, .remount_fs = jfs_remount, - .show_options = jfs_show_options + .show_options = jfs_show_options, +#ifdef CONFIG_QUOTA + .quota_read = jfs_quota_read, + .quota_write = jfs_quota_write, +#endif }; -static struct export_operations jfs_export_operations = { +static const struct export_operations jfs_export_operations = { + .fh_to_dentry = jfs_fh_to_dentry, + .fh_to_parent = jfs_fh_to_parent, .get_parent = jfs_get_parent, }; static struct file_system_type jfs_fs_type = { .owner = THIS_MODULE, .name = "jfs", - .get_sb = jfs_get_sb, + .mount = jfs_do_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("jfs"); -static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo) { struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { - memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); - INIT_LIST_HEAD(&jfs_ip->anon_inode_list); - init_rwsem(&jfs_ip->rdwrlock); - init_MUTEX(&jfs_ip->commit_sem); - init_rwsem(&jfs_ip->xattr_sem); - spin_lock_init(&jfs_ip->ag_lock); - jfs_ip->active_ag = -1; -#ifdef CONFIG_JFS_POSIX_ACL - jfs_ip->i_acl = JFS_ACL_NOT_CACHED; - jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED; -#endif - inode_init_once(&jfs_ip->vfs_inode); - } + memset(jfs_ip, 0, sizeof(struct jfs_inode_info)); + INIT_LIST_HEAD(&jfs_ip->anon_inode_list); + init_rwsem(&jfs_ip->rdwrlock); + mutex_init(&jfs_ip->commit_mutex); + init_rwsem(&jfs_ip->xattr_sem); + spin_lock_init(&jfs_ip->ag_lock); + jfs_ip->active_ag = -1; + inode_init_once(&jfs_ip->vfs_inode); } static int __init init_jfs_fs(void) @@ -636,8 +892,9 @@ static int __init init_jfs_fs(void) int rc; jfs_inode_cachep = - kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, - SLAB_RECLAIM_ACCOUNT, init_once, NULL); + kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, + init_once); if (jfs_inode_cachep == NULL) return -ENOMEM; @@ -662,12 +919,12 @@ static int __init init_jfs_fs(void) /* * I/O completion thread (endio) */ - jfsIOthread = kernel_thread(jfsIOWait, NULL, CLONE_KERNEL); - if (jfsIOthread < 0) { - jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsIOthread); + jfsIOthread = kthread_run(jfsIOWait, NULL, "jfsIO"); + if (IS_ERR(jfsIOthread)) { + rc = PTR_ERR(jfsIOthread); + jfs_err("init_jfs_fs: fork failed w/rc = %d", rc); goto end_txmngr; } - wait_for_completion(&jfsIOwait); /* Wait until thread starts */ if (commit_threads < 1) commit_threads = num_online_cpus(); @@ -675,39 +932,39 @@ static int __init init_jfs_fs(void) commit_threads = MAX_COMMIT_THREADS; for (i = 0; i < commit_threads; i++) { - jfsCommitThread[i] = kernel_thread(jfs_lazycommit, NULL, - CLONE_KERNEL); - if (jfsCommitThread[i] < 0) { - jfs_err("init_jfs_fs: fork failed w/rc = %d", - jfsCommitThread[i]); + jfsCommitThread[i] = kthread_run(jfs_lazycommit, NULL, + "jfsCommit"); + if (IS_ERR(jfsCommitThread[i])) { + rc = PTR_ERR(jfsCommitThread[i]); + jfs_err("init_jfs_fs: fork failed w/rc = %d", rc); commit_threads = i; goto kill_committask; } - /* Wait until thread starts */ - wait_for_completion(&jfsIOwait); } - jfsSyncThread = kernel_thread(jfs_sync, NULL, CLONE_KERNEL); - if (jfsSyncThread < 0) { - jfs_err("init_jfs_fs: fork failed w/rc = %d", jfsSyncThread); + jfsSyncThread = kthread_run(jfs_sync, NULL, "jfsSync"); + if (IS_ERR(jfsSyncThread)) { + rc = PTR_ERR(jfsSyncThread); + jfs_err("init_jfs_fs: fork failed w/rc = %d", rc); goto kill_committask; } - wait_for_completion(&jfsIOwait); /* Wait until thread starts */ #ifdef PROC_FS_JFS jfs_proc_init(); #endif - return register_filesystem(&jfs_fs_type); + rc = register_filesystem(&jfs_fs_type); + if (!rc) + return 0; +#ifdef PROC_FS_JFS + jfs_proc_clean(); +#endif + kthread_stop(jfsSyncThread); kill_committask: - jfs_stop_threads = 1; - wake_up_all(&jfs_commit_thread_wait); for (i = 0; i < commit_threads; i++) - wait_for_completion(&jfsIOwait); - - wake_up(&jfs_IO_thread_wait); - wait_for_completion(&jfsIOwait); /* Wait for thread exit */ + kthread_stop(jfsCommitThread[i]); + kthread_stop(jfsIOthread); end_txmngr: txExit(); free_metapage: @@ -723,20 +980,23 @@ static void __exit exit_jfs_fs(void) jfs_info("exit_jfs_fs called"); - jfs_stop_threads = 1; txExit(); metapage_exit(); - wake_up(&jfs_IO_thread_wait); - wait_for_completion(&jfsIOwait); /* Wait until IO thread exits */ - wake_up_all(&jfs_commit_thread_wait); + + kthread_stop(jfsIOthread); for (i = 0; i < commit_threads; i++) - wait_for_completion(&jfsIOwait); - wake_up(&jfs_sync_thread_wait); - wait_for_completion(&jfsIOwait); /* Wait until Sync thread exits */ + kthread_stop(jfsCommitThread[i]); + kthread_stop(jfsSyncThread); #ifdef PROC_FS_JFS jfs_proc_clean(); #endif unregister_filesystem(&jfs_fs_type); + + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(jfs_inode_cachep); } diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c index 16477b3835e..205b946d8e0 100644 --- a/fs/jfs/symlink.c +++ b/fs/jfs/symlink.c @@ -3,16 +3,16 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ @@ -29,9 +29,21 @@ static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } -struct inode_operations jfs_symlink_inode_operations = { +const struct inode_operations jfs_fast_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = jfs_follow_link, + .setattr = jfs_setattr, + .setxattr = jfs_setxattr, + .getxattr = jfs_getxattr, + .listxattr = jfs_listxattr, + .removexattr = jfs_removexattr, +}; + +const struct inode_operations jfs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = page_follow_link_light, + .put_link = page_put_link, + .setattr = jfs_setattr, .setxattr = jfs_setxattr, .getxattr = jfs_getxattr, .listxattr = jfs_listxattr, diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 23aa5066b5a..46325d5c34f 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -4,22 +4,24 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See * the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/capability.h> #include <linux/fs.h> #include <linux/xattr.h> #include <linux/posix_acl_xattr.h> +#include <linux/slab.h> #include <linux/quotaops.h> #include <linux/security.h> #include "jfs_incore.h" @@ -56,15 +58,15 @@ * * 0 4 4 + EA_SIZE(ea1) * +------------+-------------------+--------------------+----- - * | Overall EA | First FEA Element | Second FEA Element | ..... + * | Overall EA | First FEA Element | Second FEA Element | ..... * | List Size | | | * +------------+-------------------+--------------------+----- * * On-disk: * - * FEALISTs are stored on disk using blocks allocated by dbAlloc() and - * written directly. An EA list may be in-lined in the inode if there is - * sufficient room available. + * FEALISTs are stored on disk using blocks allocated by dbAlloc() and + * written directly. An EA list may be in-lined in the inode if there is + * sufficient room available. */ struct ea_buffer { @@ -83,62 +85,26 @@ struct ea_buffer { #define EA_NEW 0x0004 #define EA_MALLOC 0x0008 -/* Namespaces */ -#define XATTR_SYSTEM_PREFIX "system." -#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1) - -#define XATTR_USER_PREFIX "user." -#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1) - -#define XATTR_OS2_PREFIX "os2." -#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1) -/* XATTR_SECURITY_PREFIX is defined in include/linux/xattr.h */ -#define XATTR_SECURITY_PREFIX_LEN (sizeof (XATTR_SECURITY_PREFIX) - 1) +static int is_known_namespace(const char *name) +{ + if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) && + strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && + strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && + strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) + return false; -#define XATTR_TRUSTED_PREFIX "trusted." -#define XATTR_TRUSTED_PREFIX_LEN (sizeof (XATTR_TRUSTED_PREFIX) - 1) + return true; +} /* * These three routines are used to recognize on-disk extended attributes * that are in a recognized namespace. If the attribute is not recognized, * "os2." is prepended to the name */ -static inline int is_os2_xattr(struct jfs_ea *ea) +static int is_os2_xattr(struct jfs_ea *ea) { - /* - * Check for "system." - */ - if ((ea->namelen >= XATTR_SYSTEM_PREFIX_LEN) && - !strncmp(ea->name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) - return FALSE; - /* - * Check for "user." - */ - if ((ea->namelen >= XATTR_USER_PREFIX_LEN) && - !strncmp(ea->name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) - return FALSE; - /* - * Check for "security." - */ - if ((ea->namelen >= XATTR_SECURITY_PREFIX_LEN) && - !strncmp(ea->name, XATTR_SECURITY_PREFIX, - XATTR_SECURITY_PREFIX_LEN)) - return FALSE; - /* - * Check for "trusted." - */ - if ((ea->namelen >= XATTR_TRUSTED_PREFIX_LEN) && - !strncmp(ea->name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) - return FALSE; - /* - * Add any other valid namespace prefixes here - */ - - /* - * We assume it's OS/2's flat namespace - */ - return TRUE; + return !is_known_namespace(ea->name); } static inline int name_size(struct jfs_ea *ea) @@ -169,9 +135,9 @@ static void ea_release(struct inode *inode, struct ea_buffer *ea_buf); /* * NAME: ea_write_inline - * + * * FUNCTION: Attempt to write an EA inline if area is available - * + * * PRE CONDITIONS: * Already verified that the specified EA is small enough to fit inline * @@ -230,10 +196,10 @@ static int ea_write_inline(struct inode *ip, struct jfs_ea_list *ealist, /* * NAME: ea_write - * + * * FUNCTION: Write an EA for an inode - * - * PRE CONDITIONS: EA has been verified + * + * PRE CONDITIONS: EA has been verified * * PARAMETERS: * ip - Inode pointer @@ -274,14 +240,14 @@ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size, nblocks = (size + (sb->s_blocksize - 1)) >> sb->s_blocksize_bits; /* Allocate new blocks to quota. */ - if (DQUOT_ALLOC_BLOCK(ip, nblocks)) { - return -EDQUOT; - } + rc = dquot_alloc_block(ip, nblocks); + if (rc) + return rc; rc = dbAlloc(ip, INOHINT(ip), nblocks, &blkno); if (rc) { /*Rollback quota allocation. */ - DQUOT_FREE_BLOCK(ip, nblocks); + dquot_free_block(ip, nblocks); return rc; } @@ -346,7 +312,7 @@ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size, failed: /* Rollback quota allocation. */ - DQUOT_FREE_BLOCK(ip, nblocks); + dquot_free_block(ip, nblocks); dbFree(ip, blkno, nblocks); return rc; @@ -354,9 +320,9 @@ static int ea_write(struct inode *ip, struct jfs_ea_list *ealist, int size, /* * NAME: ea_read_inline - * + * * FUNCTION: Read an inlined EA into user's buffer - * + * * PARAMETERS: * ip - Inode pointer * ealist - Pointer to buffer to fill in with EA @@ -386,9 +352,9 @@ static int ea_read_inline(struct inode *ip, struct jfs_ea_list *ealist) /* * NAME: ea_read - * + * * FUNCTION: copy EA data into user's buffer - * + * * PARAMETERS: * ip - Inode pointer * ealist - Pointer to buffer to fill in with EA @@ -416,11 +382,11 @@ static int ea_read(struct inode *ip, struct jfs_ea_list *ealist) nbytes = sizeDXD(&ji->ea); if (!nbytes) { - jfs_error(sb, "ea_read: nbytes is 0"); + jfs_error(sb, "nbytes is 0\n"); return -EIO; } - /* + /* * Figure out how many blocks were allocated when this EA list was * originally written to disk. */ @@ -457,14 +423,14 @@ static int ea_read(struct inode *ip, struct jfs_ea_list *ealist) /* * NAME: ea_get - * + * * FUNCTION: Returns buffer containing existing extended attributes. * The size of the buffer will be the larger of the existing * attributes size, or min_size. * * The buffer, which may be inlined in the inode or in the - * page cache must be release by calling ea_release or ea_put - * + * page cache must be release by calling ea_release or ea_put + * * PARAMETERS: * inode - Inode pointer * ea_buf - Structure to be populated with ealist and its metadata @@ -516,7 +482,7 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) current_blocks = 0; } else { if (!(ji->ea.flag & DXD_EXTENT)) { - jfs_error(sb, "ea_get: invalid ea.flag)"); + jfs_error(sb, "invalid ea.flag\n"); return -EIO; } current_blocks = (ea_size + sb->s_blocksize - 1) >> @@ -552,7 +518,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) if (blocks_needed > current_blocks) { /* Allocate new blocks to quota. */ - if (DQUOT_ALLOC_BLOCK(inode, blocks_needed)) + rc = dquot_alloc_block(inode, blocks_needed); + if (rc) return -EDQUOT; quota_allocation = blocks_needed; @@ -604,7 +571,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) size_check: if (EALIST_SIZE(ea_buf->xattr) != ea_size) { printk(KERN_ERR "ea_get: invalid extended attribute\n"); - dump_mem("xattr", ea_buf->xattr, ea_size); + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, + ea_buf->xattr, ea_size, 1); ea_release(inode, ea_buf); rc = -EIO; goto clean_up; @@ -615,7 +583,7 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) clean_up: /* Rollback quota allocation */ if (quota_allocation) - DQUOT_FREE_BLOCK(inode, quota_allocation); + dquot_free_block(inode, quota_allocation); return (rc); } @@ -690,7 +658,7 @@ static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf, /* If old blocks exist, they must be removed from quota allocation. */ if (old_blocks) - DQUOT_FREE_BLOCK(inode, old_blocks); + dquot_free_block(inode, old_blocks); inode->i_ctime = CURRENT_TIME; @@ -698,111 +666,32 @@ static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf, } /* - * can_set_system_xattr - * - * This code is specific to the system.* namespace. It contains policy - * which doesn't belong in the main xattr codepath. + * Most of the permission checking is done by xattr_permission in the vfs. + * We also need to verify that this is a namespace that we recognize. */ -static int can_set_system_xattr(struct inode *inode, const char *name, - const void *value, size_t value_len) -{ -#ifdef CONFIG_JFS_POSIX_ACL - struct posix_acl *acl; - int rc; - - if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) - return -EPERM; - - /* - * POSIX_ACL_XATTR_ACCESS is tied to i_mode - */ - if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) { - acl = posix_acl_from_xattr(value, value_len); - if (IS_ERR(acl)) { - rc = PTR_ERR(acl); - printk(KERN_ERR "posix_acl_from_xattr returned %d\n", - rc); - return rc; - } - if (acl) { - mode_t mode = inode->i_mode; - rc = posix_acl_equiv_mode(acl, &mode); - posix_acl_release(acl); - if (rc < 0) { - printk(KERN_ERR - "posix_acl_equiv_mode returned %d\n", - rc); - return rc; - } - inode->i_mode = mode; - mark_inode_dirty(inode); - } - /* - * We're changing the ACL. Get rid of the cached one - */ - acl =JFS_IP(inode)->i_acl; - if (acl != JFS_ACL_NOT_CACHED) - posix_acl_release(acl); - JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED; - - return 0; - } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { - acl = posix_acl_from_xattr(value, value_len); - if (IS_ERR(acl)) { - rc = PTR_ERR(acl); - printk(KERN_ERR "posix_acl_from_xattr returned %d\n", - rc); - return rc; - } - posix_acl_release(acl); - - /* - * We're changing the default ACL. Get rid of the cached one - */ - acl =JFS_IP(inode)->i_default_acl; - if (acl && (acl != JFS_ACL_NOT_CACHED)) - posix_acl_release(acl); - JFS_IP(inode)->i_default_acl = JFS_ACL_NOT_CACHED; - - return 0; - } -#endif /* CONFIG_JFS_POSIX_ACL */ - return -EOPNOTSUPP; -} - static int can_set_xattr(struct inode *inode, const char *name, const void *value, size_t value_len) { - if (IS_RDONLY(inode)) - return -EROFS; - - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; - - if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0) + if (!strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)) { /* - * "system.*" + * This makes sure that we aren't trying to set an + * attribute in a different namespace by prefixing it + * with "os2." */ - return can_set_system_xattr(inode, name, value, value_len); - - if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) - return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); + if (is_known_namespace(name + XATTR_OS2_PREFIX_LEN)) + return -EOPNOTSUPP; + return 0; + } -#ifdef CONFIG_JFS_SECURITY - if (strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) - == 0) - return 0; /* Leave it to the security module */ -#endif - - if((strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) != 0) && - (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) != 0)) + /* + * Don't allow setting an attribute in an unknown namespace. + */ + if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) && + strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) && + strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) return -EOPNOTSUPP; - if (!S_ISREG(inode->i_mode) && - (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX)) - return -EPERM; - - return permission(inode, MAY_WRITE, NULL); + return 0; } int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name, @@ -902,6 +791,19 @@ int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name, /* Completely new ea list */ xattr_size = sizeof (struct jfs_ea_list); + /* + * The size of EA value is limitted by on-disk format up to + * __le16, there would be an overflow if the size is equal + * to XATTR_SIZE_MAX (65536). In order to avoid this issue, + * we can pre-checkup the value size against USHRT_MAX, and + * return -E2BIG in this case, which is consistent with the + * VFS setxattr interface. + */ + if (value_len >= USHRT_MAX) { + rc = -E2BIG; + goto release; + } + ea = (struct jfs_ea *) ((char *) ealist + xattr_size); ea->flag = 0; ea->namelen = namelen; @@ -916,7 +818,7 @@ int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name, /* DEBUG - If we did this right, these number match */ if (xattr_size != new_size) { printk(KERN_ERR - "jfs_xsetattr: xattr_size = %d, new_size = %d\n", + "__jfs_setxattr: xattr_size = %d, new_size = %d\n", xattr_size, new_size); rc = -EINVAL; @@ -952,6 +854,14 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, int rc; tid_t tid; + /* + * If this is a request for a synthetic attribute in the system.* + * namespace use the generic infrastructure to resolve a handler + * for it via sb->s_xattr. + */ + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_setxattr(dentry, name, value, value_len, flags); + if ((rc = can_set_xattr(inode, name, value, value_len))) return rc; @@ -961,33 +871,17 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, } tid = txBegin(inode->i_sb, 0); - down(&ji->commit_sem); + mutex_lock(&ji->commit_mutex); rc = __jfs_setxattr(tid, dentry->d_inode, name, value, value_len, flags); if (!rc) rc = txCommit(tid, 1, &inode, 0); txEnd(tid); - up(&ji->commit_sem); + mutex_unlock(&ji->commit_mutex); return rc; } -static int can_get_xattr(struct inode *inode, const char *name) -{ -#ifdef CONFIG_JFS_SECURITY - if(strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) == 0) - return 0; -#endif - - if(strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) - return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); - - if(strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) == 0) - return 0; - - return permission(inode, MAY_READ, NULL); -} - ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, size_t buf_size) { @@ -997,23 +891,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, int xattr_size; ssize_t size; int namelen = strlen(name); - char *os2name = NULL; - int rc; char *value; - if ((rc = can_get_xattr(inode, name))) - return rc; - - if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { - os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1, - GFP_KERNEL); - if (!os2name) - return -ENOMEM; - strcpy(os2name, name + XATTR_OS2_PREFIX_LEN); - name = os2name; - namelen -= XATTR_OS2_PREFIX_LEN; - } - down_read(&JFS_IP(inode)->xattr_sem); xattr_size = ea_get(inode, &ea_buf, 0); @@ -1051,8 +930,6 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, out: up_read(&JFS_IP(inode)->xattr_sem); - kfree(os2name); - return size; } @@ -1061,6 +938,27 @@ ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data, { int err; + /* + * If this is a request for a synthetic attribute in the system.* + * namespace use the generic infrastructure to resolve a handler + * for it via sb->s_xattr. + */ + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_getxattr(dentry, name, data, buf_size); + + if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { + /* + * skip past "os2." prefix + */ + name += XATTR_OS2_PREFIX_LEN; + /* + * Don't allow retrieving properly prefixed attributes + * by prepending them with "os2." + */ + if (is_known_namespace(name)) + return -EOPNOTSUPP; + } + err = __jfs_getxattr(dentry->d_inode, name, data, buf_size); return err; @@ -1101,7 +999,7 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) /* compute required size of list */ for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { - if (can_list(ea)) + if (can_list(ea)) size += name_size(ea) + 1; } @@ -1116,7 +1014,7 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) /* Copy attribute names to buffer */ buffer = data; for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { - if (can_list(ea)) { + if (can_list(ea)) { int namelen = copy_name(buffer, ea); buffer += namelen + 1; } @@ -1136,51 +1034,73 @@ int jfs_removexattr(struct dentry *dentry, const char *name) int rc; tid_t tid; + /* + * If this is a request for a synthetic attribute in the system.* + * namespace use the generic infrastructure to resolve a handler + * for it via sb->s_xattr. + */ + if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) + return generic_removexattr(dentry, name); + if ((rc = can_set_xattr(inode, name, NULL, 0))) return rc; tid = txBegin(inode->i_sb, 0); - down(&ji->commit_sem); + mutex_lock(&ji->commit_mutex); rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE); if (!rc) rc = txCommit(tid, 1, &inode, 0); txEnd(tid); - up(&ji->commit_sem); + mutex_unlock(&ji->commit_mutex); return rc; } +/* + * List of handlers for synthetic system.* attributes. All real ondisk + * attributes are handled directly. + */ +const struct xattr_handler *jfs_xattr_handlers[] = { +#ifdef CONFIG_JFS_POSIX_ACL + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, +#endif + NULL, +}; + + #ifdef CONFIG_JFS_SECURITY -int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir) +static int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *fs_info) { - int rc; - size_t len; - void *value; - char *suffix; + const struct xattr *xattr; + tid_t *tid = fs_info; char *name; - - rc = security_inode_init_security(inode, dir, &suffix, &value, &len); - if (rc) { - if (rc == -EOPNOTSUPP) - return 0; - return rc; - } - name = kmalloc(XATTR_SECURITY_PREFIX_LEN + 1 + strlen(suffix), - GFP_NOFS); - if (!name) { - rc = -ENOMEM; - goto kmalloc_failed; + int err = 0; + + for (xattr = xattr_array; xattr->name != NULL; xattr++) { + name = kmalloc(XATTR_SECURITY_PREFIX_LEN + + strlen(xattr->name) + 1, GFP_NOFS); + if (!name) { + err = -ENOMEM; + break; + } + strcpy(name, XATTR_SECURITY_PREFIX); + strcpy(name + XATTR_SECURITY_PREFIX_LEN, xattr->name); + + err = __jfs_setxattr(*tid, inode, name, + xattr->value, xattr->value_len, 0); + kfree(name); + if (err < 0) + break; } - strcpy(name, XATTR_SECURITY_PREFIX); - strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); - - rc = __jfs_setxattr(tid, inode, name, value, len, 0); - - kfree(name); -kmalloc_failed: - kfree(suffix); - kfree(value); + return err; +} - return rc; +int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir, + const struct qstr *qstr) +{ + return security_inode_init_security(inode, dir, qstr, + &jfs_initxattrs, &tid); } #endif |
