diff options
Diffstat (limited to 'fs/ext2')
| -rw-r--r-- | fs/ext2/Kconfig | 55 | ||||
| -rw-r--r-- | fs/ext2/Makefile | 2 | ||||
| -rw-r--r-- | fs/ext2/acl.c | 364 | ||||
| -rw-r--r-- | fs/ext2/acl.h | 15 | ||||
| -rw-r--r-- | fs/ext2/balloc.c | 1441 | ||||
| -rw-r--r-- | fs/ext2/dir.c | 275 | ||||
| -rw-r--r-- | fs/ext2/ext2.h | 737 | ||||
| -rw-r--r-- | fs/ext2/file.c | 70 | ||||
| -rw-r--r-- | fs/ext2/fsync.c | 51 | ||||
| -rw-r--r-- | fs/ext2/ialloc.c | 117 | ||||
| -rw-r--r-- | fs/ext2/inode.c | 924 | ||||
| -rw-r--r-- | fs/ext2/ioctl.c | 144 | ||||
| -rw-r--r-- | fs/ext2/namei.c | 185 | ||||
| -rw-r--r-- | fs/ext2/super.c | 818 | ||||
| -rw-r--r-- | fs/ext2/symlink.c | 6 | ||||
| -rw-r--r-- | fs/ext2/xattr.c | 115 | ||||
| -rw-r--r-- | fs/ext2/xattr.h | 16 | ||||
| -rw-r--r-- | fs/ext2/xattr_security.c | 59 | ||||
| -rw-r--r-- | fs/ext2/xattr_trusted.c | 29 | ||||
| -rw-r--r-- | fs/ext2/xattr_user.c | 32 | ||||
| -rw-r--r-- | fs/ext2/xip.c | 60 | ||||
| -rw-r--r-- | fs/ext2/xip.h | 9 |
22 files changed, 3766 insertions, 1758 deletions
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig new file mode 100644 index 00000000000..14a6780fd03 --- /dev/null +++ b/fs/ext2/Kconfig @@ -0,0 +1,55 @@ +config EXT2_FS + tristate "Second extended fs support" + help + Ext2 is a standard Linux file system for hard disks. + + To compile this file system support as a module, choose M here: the + module will be called ext2. + + If unsure, say Y. + +config EXT2_FS_XATTR + bool "Ext2 extended attributes" + depends on EXT2_FS + help + Extended attributes are name:value pairs associated with inodes by + the kernel or by users (see the attr(5) manual page, or visit + <http://acl.bestbits.at/> for details). + + If unsure, say N. + +config EXT2_FS_POSIX_ACL + bool "Ext2 POSIX Access Control Lists" + depends on EXT2_FS_XATTR + select FS_POSIX_ACL + help + Posix Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the Posix ACLs for + Linux website <http://acl.bestbits.at/>. + + If you don't know what Access Control Lists are, say N + +config EXT2_FS_SECURITY + bool "Ext2 Security Labels" + depends on EXT2_FS_XATTR + help + Security labels support alternative access control models + implemented by security modules like SELinux. This option + enables an extended attribute handler for file security + labels in the ext2 filesystem. + + If you are not using a security module that requires using + extended attributes for file security labels, say N. + +config EXT2_FS_XIP + bool "Ext2 execute in place support" + depends on EXT2_FS && MMU + help + Execute in place can be used on memory-backed block devices. If you + enable this option, you can select to mount block devices which are + capable of this feature without using the page cache. + + If you do not use a block device that is capable of using this, + or if unsure, say N. diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile index e0b2b43c1fd..f42af45cfd8 100644 --- a/fs/ext2/Makefile +++ b/fs/ext2/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_EXT2_FS) += ext2.o -ext2-y := balloc.o dir.o file.o fsync.o ialloc.o inode.o \ +ext2-y := balloc.o dir.o file.o ialloc.o inode.o \ ioctl.o namei.o super.o symlink.o ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 7c420b800c3..27695e6f4e4 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -4,7 +4,6 @@ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> */ -#include <linux/capability.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/slab.h> @@ -53,16 +52,23 @@ ext2_acl_from_disk(const void *value, size_t size) case ACL_OTHER: value = (char *)value + sizeof(ext2_acl_entry_short); - acl->a_entries[n].e_id = ACL_UNDEFINED_ID; break; case ACL_USER: + value = (char *)value + sizeof(ext2_acl_entry); + if ((char *)value > end) + goto fail; + acl->a_entries[n].e_uid = + make_kuid(&init_user_ns, + le32_to_cpu(entry->e_id)); + break; case ACL_GROUP: value = (char *)value + sizeof(ext2_acl_entry); if ((char *)value > end) goto fail; - acl->a_entries[n].e_id = - le32_to_cpu(entry->e_id); + acl->a_entries[n].e_gid = + make_kgid(&init_user_ns, + le32_to_cpu(entry->e_id)); break; default: @@ -96,14 +102,19 @@ ext2_acl_to_disk(const struct posix_acl *acl, size_t *size) ext_acl->a_version = cpu_to_le32(EXT2_ACL_VERSION); e = (char *)ext_acl + sizeof(ext2_acl_header); for (n=0; n < acl->a_count; n++) { + const struct posix_acl_entry *acl_e = &acl->a_entries[n]; ext2_acl_entry *entry = (ext2_acl_entry *)e; - entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); - entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); - switch(acl->a_entries[n].e_tag) { + entry->e_tag = cpu_to_le16(acl_e->e_tag); + entry->e_perm = cpu_to_le16(acl_e->e_perm); + switch(acl_e->e_tag) { case ACL_USER: + entry->e_id = cpu_to_le32( + from_kuid(&init_user_ns, acl_e->e_uid)); + e += sizeof(ext2_acl_entry); + break; case ACL_GROUP: - entry->e_id = - cpu_to_le32(acl->a_entries[n].e_id); + entry->e_id = cpu_to_le32( + from_kgid(&init_user_ns, acl_e->e_gid)); e += sizeof(ext2_acl_entry); break; @@ -125,62 +136,26 @@ fail: return ERR_PTR(-EINVAL); } -static inline struct posix_acl * -ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl) -{ - struct posix_acl *acl = EXT2_ACL_NOT_CACHED; - - spin_lock(&inode->i_lock); - if (*i_acl != EXT2_ACL_NOT_CACHED) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); - - return acl; -} - -static inline void -ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*i_acl != EXT2_ACL_NOT_CACHED) - posix_acl_release(*i_acl); - *i_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - /* * inode->i_mutex: don't care */ -static struct posix_acl * +struct posix_acl * ext2_get_acl(struct inode *inode, int type) { - struct ext2_inode_info *ei = EXT2_I(inode); int name_index; char *value = NULL; struct posix_acl *acl; int retval; - if (!test_opt(inode->i_sb, POSIX_ACL)) - return NULL; - - switch(type) { - case ACL_TYPE_ACCESS: - acl = ext2_iget_acl(inode, &ei->i_acl); - if (acl != EXT2_ACL_NOT_CACHED) - return acl; - name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; - break; - - case ACL_TYPE_DEFAULT: - acl = ext2_iget_acl(inode, &ei->i_default_acl); - if (acl != EXT2_ACL_NOT_CACHED) - return acl; - name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; - break; - - default: - return ERR_PTR(-EINVAL); + switch (type) { + case ACL_TYPE_ACCESS: + name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + default: + BUG(); } retval = ext2_xattr_get(inode, name_index, "", NULL, 0); if (retval > 0) { @@ -197,47 +172,32 @@ ext2_get_acl(struct inode *inode, int type) acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) { - switch(type) { - case ACL_TYPE_ACCESS: - ext2_iset_acl(inode, &ei->i_acl, acl); - break; + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); - case ACL_TYPE_DEFAULT: - ext2_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } return acl; } /* * inode->i_mutex: down */ -static int -ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) +int +ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type) { - struct ext2_inode_info *ei = EXT2_I(inode); int name_index; void *value = NULL; size_t size = 0; int error; - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - if (!test_opt(inode->i_sb, POSIX_ACL)) - return 0; - switch(type) { case ACL_TYPE_ACCESS: name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); + error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) return error; else { - inode->i_mode = mode; + inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); if (error == 0) acl = NULL; @@ -263,42 +223,11 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) error = ext2_xattr_set(inode, name_index, "", value, size, 0); kfree(value); - if (!error) { - switch(type) { - case ACL_TYPE_ACCESS: - ext2_iset_acl(inode, &ei->i_acl, acl); - break; - - case ACL_TYPE_DEFAULT: - ext2_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } + if (!error) + set_cached_acl(inode, type, acl); return error; } -static int -ext2_check_acl(struct inode *inode, int mask) -{ - struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); - - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - int error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return error; - } - - return -EAGAIN; -} - -int -ext2_permission(struct inode *inode, int mask, struct nameidata *nd) -{ - return generic_permission(inode, mask, ext2_check_acl); -} - /* * Initialize the ACLs of a new inode. Called from ext2_new_inode. * @@ -308,212 +237,21 @@ ext2_permission(struct inode *inode, int mask, struct nameidata *nd) int ext2_init_acl(struct inode *inode, struct inode *dir) { - struct posix_acl *acl = NULL; - int error = 0; - - if (!S_ISLNK(inode->i_mode)) { - if (test_opt(dir->i_sb, POSIX_ACL)) { - acl = ext2_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) - return PTR_ERR(acl); - } - if (!acl) - inode->i_mode &= ~current->fs->umask; - } - if (test_opt(inode->i_sb, POSIX_ACL) && acl) { - struct posix_acl *clone; - mode_t mode; - - if (S_ISDIR(inode->i_mode)) { - error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl); - if (error) - goto cleanup; - } - clone = posix_acl_clone(acl, GFP_KERNEL); - error = -ENOMEM; - if (!clone) - goto cleanup; - mode = inode->i_mode; - error = posix_acl_create_masq(clone, &mode); - if (error >= 0) { - inode->i_mode = mode; - if (error > 0) { - /* This is an extended ACL */ - error = ext2_set_acl(inode, - ACL_TYPE_ACCESS, clone); - } - } - posix_acl_release(clone); - } -cleanup: - posix_acl_release(acl); - return error; -} - -/* - * Does chmod for an inode that may have an Access Control List. The - * inode->i_mode field must be updated to the desired value by the caller - * before calling this function. - * Returns 0 on success, or a negative error number. - * - * We change the ACL rather than storing some ACL entries in the file - * mode permission bits (which would be more efficient), because that - * would break once additional permissions (like ACL_APPEND, ACL_DELETE - * for directories) are added. There are no more bits available in the - * file mode. - * - * inode->i_mutex: down - */ -int -ext2_acl_chmod(struct inode *inode) -{ - struct posix_acl *acl, *clone; - int error; - - if (!test_opt(inode->i_sb, POSIX_ACL)) - return 0; - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl) || !acl) - return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_KERNEL); - posix_acl_release(acl); - if (!clone) - return -ENOMEM; - error = posix_acl_chmod_masq(clone, inode->i_mode); - if (!error) - error = ext2_set_acl(inode, ACL_TYPE_ACCESS, clone); - posix_acl_release(clone); - return error; -} - -/* - * Extended attribut handlers - */ -static size_t -ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size, - const char *name, size_t name_len) -{ - const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); - - if (!test_opt(inode->i_sb, POSIX_ACL)) - return 0; - if (list && size <= list_size) - memcpy(list, POSIX_ACL_XATTR_ACCESS, size); - return size; -} - -static size_t -ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size, - const char *name, size_t name_len) -{ - const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); - - if (!test_opt(inode->i_sb, POSIX_ACL)) - return 0; - if (list && size <= list_size) - memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); - return size; -} - -static int -ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) -{ - struct posix_acl *acl; + struct posix_acl *default_acl, *acl; int error; - if (!test_opt(inode->i_sb, POSIX_ACL)) - return -EOPNOTSUPP; - - acl = ext2_get_acl(inode, type); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl == NULL) - return -ENODATA; - error = posix_acl_to_xattr(acl, buffer, size); - posix_acl_release(acl); - - return error; -} - -static int -ext2_xattr_get_acl_access(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); -} - -static int -ext2_xattr_get_acl_default(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); -} - -static int -ext2_xattr_set_acl(struct inode *inode, int type, const void *value, - size_t size) -{ - struct posix_acl *acl; - int error; - - if (!test_opt(inode->i_sb, POSIX_ACL)) - return -EOPNOTSUPP; - if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) - return -EPERM; - - if (value) { - acl = posix_acl_from_xattr(value, size); - if (IS_ERR(acl)) - return PTR_ERR(acl); - else if (acl) { - error = posix_acl_valid(acl); - if (error) - goto release_and_out; - } - } else - acl = NULL; - - error = ext2_set_acl(inode, type, acl); + error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); + if (error) + return error; -release_and_out: - posix_acl_release(acl); + if (default_acl) { + error = ext2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + posix_acl_release(default_acl); + } + if (acl) { + if (!error) + error = ext2_set_acl(inode, acl, ACL_TYPE_ACCESS); + posix_acl_release(acl); + } return error; } - -static int -ext2_xattr_set_acl_access(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); -} - -static int -ext2_xattr_set_acl_default(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - if (strcmp(name, "") != 0) - return -EINVAL; - return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); -} - -struct xattr_handler ext2_xattr_acl_access_handler = { - .prefix = POSIX_ACL_XATTR_ACCESS, - .list = ext2_xattr_list_acl_access, - .get = ext2_xattr_get_acl_access, - .set = ext2_xattr_set_acl_access, -}; - -struct xattr_handler ext2_xattr_acl_default_handler = { - .prefix = POSIX_ACL_XATTR_DEFAULT, - .list = ext2_xattr_list_acl_default, - .get = ext2_xattr_get_acl_default, - .set = ext2_xattr_set_acl_default, -}; diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 0bde85bafe3..44937f9fcf3 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -53,27 +53,16 @@ static inline int ext2_acl_count(size_t size) #ifdef CONFIG_EXT2_FS_POSIX_ACL -/* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl - if the ACL has not been cached */ -#define EXT2_ACL_NOT_CACHED ((void *)-1) - /* acl.c */ -extern int ext2_permission (struct inode *, int, struct nameidata *); -extern int ext2_acl_chmod (struct inode *); +extern struct posix_acl *ext2_get_acl(struct inode *inode, int type); +extern int ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type); extern int ext2_init_acl (struct inode *, struct inode *); #else #include <linux/sched.h> -#define ext2_permission NULL #define ext2_get_acl NULL #define ext2_set_acl NULL -static inline int -ext2_acl_chmod (struct inode *inode) -{ - return 0; -} - static inline int ext2_init_acl (struct inode *inode, struct inode *dir) { return 0; diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index b1981d0e95a..9f9992b3792 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -13,6 +13,7 @@ #include "ext2.h" #include <linux/quotaops.h> +#include <linux/slab.h> #include <linux/sched.h> #include <linux/buffer_head.h> #include <linux/capability.h> @@ -29,7 +30,7 @@ * The file system contains group descriptors which are located after the * super block. Each descriptor contains the number of the bitmap block and * the free blocks count in the block. The descriptors are loaded in memory - * when a file system is mounted (see ext2_read_super). + * when a file system is mounted (see ext2_fill_super). */ @@ -69,9 +70,53 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, return desc + offset; } +static int ext2_valid_block_bitmap(struct super_block *sb, + struct ext2_group_desc *desc, + unsigned int block_group, + struct buffer_head *bh) +{ + ext2_grpblk_t offset; + ext2_grpblk_t next_zero_bit; + ext2_fsblk_t bitmap_blk; + ext2_fsblk_t group_first_block; + + group_first_block = ext2_group_first_block_no(sb, block_group); + + /* check whether block bitmap block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); + offset = bitmap_blk - group_first_block; + if (!ext2_test_bit(offset, bh->b_data)) + /* bad block bitmap */ + goto err_out; + + /* check whether the inode bitmap block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap); + offset = bitmap_blk - group_first_block; + if (!ext2_test_bit(offset, bh->b_data)) + /* bad block bitmap */ + goto err_out; + + /* check whether the inode table block number is set */ + bitmap_blk = le32_to_cpu(desc->bg_inode_table); + offset = bitmap_blk - group_first_block; + next_zero_bit = ext2_find_next_zero_bit(bh->b_data, + offset + EXT2_SB(sb)->s_itb_per_group, + offset); + if (next_zero_bit >= offset + EXT2_SB(sb)->s_itb_per_group) + /* good bitmap for inode tables */ + return 1; + +err_out: + ext2_error(sb, __func__, + "Invalid block bitmap - " + "block_group = %d, block = %lu", + block_group, bitmap_blk); + return 0; +} + /* - * Read the bitmap for a given block_group, reading into the specified - * slot in the superblock's bitmap cache. + * Read the bitmap for a given block_group,and validate the + * bits for block/inode/inode tables are set in the bitmaps * * Return buffer_head on success or NULL in case of failure. */ @@ -80,100 +125,354 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext2_group_desc * desc; struct buffer_head * bh = NULL; - - desc = ext2_get_group_desc (sb, block_group, NULL); + ext2_fsblk_t bitmap_blk; + + desc = ext2_get_group_desc(sb, block_group, NULL); if (!desc) - goto error_out; - bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); - if (!bh) - ext2_error (sb, "read_block_bitmap", + return NULL; + bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); + bh = sb_getblk(sb, bitmap_blk); + if (unlikely(!bh)) { + ext2_error(sb, __func__, + "Cannot read block bitmap - " + "block_group = %d, block_bitmap = %u", + block_group, le32_to_cpu(desc->bg_block_bitmap)); + return NULL; + } + if (likely(bh_uptodate_or_lock(bh))) + return bh; + + if (bh_submit_read(bh) < 0) { + brelse(bh); + ext2_error(sb, __func__, "Cannot read block bitmap - " "block_group = %d, block_bitmap = %u", block_group, le32_to_cpu(desc->bg_block_bitmap)); -error_out: + return NULL; + } + + ext2_valid_block_bitmap(sb, desc, block_group, bh); + /* + * file system mounted not to panic on error, continue with corrupt + * bitmap + */ return bh; } +static void group_adjust_blocks(struct super_block *sb, int group_no, + struct ext2_group_desc *desc, struct buffer_head *bh, int count) +{ + if (count) { + struct ext2_sb_info *sbi = EXT2_SB(sb); + unsigned free_blocks; + + spin_lock(sb_bgl_lock(sbi, group_no)); + free_blocks = le16_to_cpu(desc->bg_free_blocks_count); + desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count); + spin_unlock(sb_bgl_lock(sbi, group_no)); + mark_buffer_dirty(bh); + } +} + /* - * Set sb->s_dirt here because the superblock was "logically" altered. We - * need to recalculate its free blocks count and flush it out. + * The reservation window structure operations + * -------------------------------------------- + * Operations include: + * dump, find, add, remove, is_empty, find_next_reservable_window, etc. + * + * We use a red-black tree to represent per-filesystem reservation + * windows. + * + */ + +/** + * __rsv_window_dump() -- Dump the filesystem block allocation reservation map + * @rb_root: root of per-filesystem reservation rb tree + * @verbose: verbose mode + * @fn: function which wishes to dump the reservation map + * + * If verbose is turned on, it will print the whole block reservation + * windows(start, end). Otherwise, it will only print out the "bad" windows, + * those windows that overlap with their immediate neighbors. */ -static int reserve_blocks(struct super_block *sb, int count) +#if 1 +static void __rsv_window_dump(struct rb_root *root, int verbose, + const char *fn) { - struct ext2_sb_info *sbi = EXT2_SB(sb); - struct ext2_super_block *es = sbi->s_es; - unsigned free_blocks; - unsigned root_blocks; + struct rb_node *n; + struct ext2_reserve_window_node *rsv, *prev; + int bad; + +restart: + n = rb_first(root); + bad = 0; + prev = NULL; + + printk("Block Allocation Reservation Windows Map (%s):\n", fn); + while (n) { + rsv = rb_entry(n, struct ext2_reserve_window_node, rsv_node); + if (verbose) + printk("reservation window 0x%p " + "start: %lu, end: %lu\n", + rsv, rsv->rsv_start, rsv->rsv_end); + if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { + printk("Bad reservation %p (start >= end)\n", + rsv); + bad = 1; + } + if (prev && prev->rsv_end >= rsv->rsv_start) { + printk("Bad reservation %p (prev->end >= start)\n", + rsv); + bad = 1; + } + if (bad) { + if (!verbose) { + printk("Restarting reservation walk in verbose mode\n"); + verbose = 1; + goto restart; + } + } + n = rb_next(n); + prev = rsv; + } + printk("Window map complete.\n"); + BUG_ON(bad); +} +#define rsv_window_dump(root, verbose) \ + __rsv_window_dump((root), (verbose), __func__) +#else +#define rsv_window_dump(root, verbose) do {} while (0) +#endif - free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); - root_blocks = le32_to_cpu(es->s_r_blocks_count); +/** + * goal_in_my_reservation() + * @rsv: inode's reservation window + * @grp_goal: given goal block relative to the allocation block group + * @group: the current allocation block group + * @sb: filesystem super block + * + * Test if the given goal block (group relative) is within the file's + * own block reservation window range. + * + * If the reservation window is outside the goal allocation group, return 0; + * grp_goal (given goal block) could be -1, which means no specific + * goal block. In this case, always return 1. + * If the goal block is within the reservation window, return 1; + * otherwise, return 0; + */ +static int +goal_in_my_reservation(struct ext2_reserve_window *rsv, ext2_grpblk_t grp_goal, + unsigned int group, struct super_block * sb) +{ + ext2_fsblk_t group_first_block, group_last_block; - if (free_blocks < count) - count = free_blocks; + group_first_block = ext2_group_first_block_no(sb, group); + group_last_block = group_first_block + EXT2_BLOCKS_PER_GROUP(sb) - 1; - if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) && - sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) { - /* - * We are too close to reserve and we are not privileged. - * Can we allocate anything at all? - */ - if (free_blocks > root_blocks) - count = free_blocks - root_blocks; + if ((rsv->_rsv_start > group_last_block) || + (rsv->_rsv_end < group_first_block)) + return 0; + if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) + || (grp_goal + group_first_block > rsv->_rsv_end))) + return 0; + return 1; +} + +/** + * search_reserve_window() + * @rb_root: root of reservation tree + * @goal: target allocation block + * + * Find the reserved window which includes the goal, or the previous one + * if the goal is not in any window. + * Returns NULL if there are no windows or if all windows start after the goal. + */ +static struct ext2_reserve_window_node * +search_reserve_window(struct rb_root *root, ext2_fsblk_t goal) +{ + struct rb_node *n = root->rb_node; + struct ext2_reserve_window_node *rsv; + + if (!n) + return NULL; + + do { + rsv = rb_entry(n, struct ext2_reserve_window_node, rsv_node); + + if (goal < rsv->rsv_start) + n = n->rb_left; + else if (goal > rsv->rsv_end) + n = n->rb_right; else - return 0; + return rsv; + } while (n); + /* + * We've fallen off the end of the tree: the goal wasn't inside + * any particular node. OK, the previous node must be to one + * side of the interval containing the goal. If it's the RHS, + * we need to back up one. + */ + if (rsv->rsv_start > goal) { + n = rb_prev(&rsv->rsv_node); + rsv = rb_entry(n, struct ext2_reserve_window_node, rsv_node); + } + return rsv; +} + +/* + * ext2_rsv_window_add() -- Insert a window to the block reservation rb tree. + * @sb: super block + * @rsv: reservation window to add + * + * Must be called with rsv_lock held. + */ +void ext2_rsv_window_add(struct super_block *sb, + struct ext2_reserve_window_node *rsv) +{ + struct rb_root *root = &EXT2_SB(sb)->s_rsv_window_root; + struct rb_node *node = &rsv->rsv_node; + ext2_fsblk_t start = rsv->rsv_start; + + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct ext2_reserve_window_node *this; + + while (*p) + { + parent = *p; + this = rb_entry(parent, struct ext2_reserve_window_node, rsv_node); + + if (start < this->rsv_start) + p = &(*p)->rb_left; + else if (start > this->rsv_end) + p = &(*p)->rb_right; + else { + rsv_window_dump(root, 1); + BUG(); + } } - percpu_counter_mod(&sbi->s_freeblocks_counter, -count); - sb->s_dirt = 1; - return count; + rb_link_node(node, parent, p); + rb_insert_color(node, root); } -static void release_blocks(struct super_block *sb, int count) +/** + * rsv_window_remove() -- unlink a window from the reservation rb tree + * @sb: super block + * @rsv: reservation window to remove + * + * Mark the block reservation window as not allocated, and unlink it + * from the filesystem reservation window rb tree. Must be called with + * rsv_lock held. + */ +static void rsv_window_remove(struct super_block *sb, + struct ext2_reserve_window_node *rsv) { - if (count) { - struct ext2_sb_info *sbi = EXT2_SB(sb); + rsv->rsv_start = EXT2_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_end = EXT2_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_alloc_hit = 0; + rb_erase(&rsv->rsv_node, &EXT2_SB(sb)->s_rsv_window_root); +} - percpu_counter_mod(&sbi->s_freeblocks_counter, count); - sb->s_dirt = 1; - } +/* + * rsv_is_empty() -- Check if the reservation window is allocated. + * @rsv: given reservation window to check + * + * returns 1 if the end block is EXT2_RESERVE_WINDOW_NOT_ALLOCATED. + */ +static inline int rsv_is_empty(struct ext2_reserve_window *rsv) +{ + /* a valid reservation end block could not be 0 */ + return (rsv->_rsv_end == EXT2_RESERVE_WINDOW_NOT_ALLOCATED); } -static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no, - struct ext2_group_desc *desc, struct buffer_head *bh, int count) +/** + * ext2_init_block_alloc_info() + * @inode: file inode structure + * + * Allocate and initialize the reservation window structure, and + * link the window to the ext2 inode structure at last + * + * The reservation window structure is only dynamically allocated + * and linked to ext2 inode the first time the open file + * needs a new block. So, before every ext2_new_block(s) call, for + * regular files, we should check whether the reservation window + * structure exists or not. In the latter case, this function is called. + * Fail to do so will result in block reservation being turned off for that + * open file. + * + * This function is called from ext2_get_blocks_handle(), also called + * when setting the reservation window size through ioctl before the file + * is open for write (needs block allocation). + * + * Needs truncate_mutex protection prior to calling this function. + */ +void ext2_init_block_alloc_info(struct inode *inode) { - unsigned free_blocks; + struct ext2_inode_info *ei = EXT2_I(inode); + struct ext2_block_alloc_info *block_i; + struct super_block *sb = inode->i_sb; - if (!desc->bg_free_blocks_count) - return 0; + block_i = kmalloc(sizeof(*block_i), GFP_NOFS); + if (block_i) { + struct ext2_reserve_window_node *rsv = &block_i->rsv_window_node; + + rsv->rsv_start = EXT2_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_end = EXT2_RESERVE_WINDOW_NOT_ALLOCATED; - spin_lock(sb_bgl_lock(sbi, group_no)); - free_blocks = le16_to_cpu(desc->bg_free_blocks_count); - if (free_blocks < count) - count = free_blocks; - desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count); - spin_unlock(sb_bgl_lock(sbi, group_no)); - mark_buffer_dirty(bh); - return count; + /* + * if filesystem is mounted with NORESERVATION, the goal + * reservation window size is set to zero to indicate + * block reservation is off + */ + if (!test_opt(sb, RESERVATION)) + rsv->rsv_goal_size = 0; + else + rsv->rsv_goal_size = EXT2_DEFAULT_RESERVE_BLOCKS; + rsv->rsv_alloc_hit = 0; + block_i->last_alloc_logical_block = 0; + block_i->last_alloc_physical_block = 0; + } + ei->i_block_alloc_info = block_i; } -static void group_release_blocks(struct super_block *sb, int group_no, - struct ext2_group_desc *desc, struct buffer_head *bh, int count) +/** + * ext2_discard_reservation() + * @inode: inode + * + * Discard(free) block reservation window on last file close, or truncate + * or at last iput(). + * + * It is being called in three cases: + * ext2_release_file(): last writer closes the file + * ext2_clear_inode(): last iput(), when nobody links to this file. + * ext2_truncate(): when the block indirect map is about to change. + */ +void ext2_discard_reservation(struct inode *inode) { - if (count) { - struct ext2_sb_info *sbi = EXT2_SB(sb); - unsigned free_blocks; - - spin_lock(sb_bgl_lock(sbi, group_no)); - free_blocks = le16_to_cpu(desc->bg_free_blocks_count); - desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count); - spin_unlock(sb_bgl_lock(sbi, group_no)); - sb->s_dirt = 1; - mark_buffer_dirty(bh); + struct ext2_inode_info *ei = EXT2_I(inode); + struct ext2_block_alloc_info *block_i = ei->i_block_alloc_info; + struct ext2_reserve_window_node *rsv; + spinlock_t *rsv_lock = &EXT2_SB(inode->i_sb)->s_rsv_window_lock; + + if (!block_i) + return; + + rsv = &block_i->rsv_window_node; + if (!rsv_is_empty(&rsv->rsv_window)) { + spin_lock(rsv_lock); + if (!rsv_is_empty(&rsv->rsv_window)) + rsv_window_remove(inode->i_sb, rsv); + spin_unlock(rsv_lock); } } -/* Free given blocks, update quota and i_blocks field */ +/** + * ext2_free_blocks() -- Free given blocks and update quota and i_blocks + * @inode: inode + * @block: start physical block to free + * @count: number of blocks to free + */ void ext2_free_blocks (struct inode * inode, unsigned long block, unsigned long count) { @@ -228,16 +527,18 @@ do_more: in_range (block, le32_to_cpu(desc->bg_inode_table), sbi->s_itb_per_group) || in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), - sbi->s_itb_per_group)) + sbi->s_itb_per_group)) { ext2_error (sb, "ext2_free_blocks", "Freeing blocks in system zones - " "Block = %lu, count = %lu", block, count); + goto error_return; + } for (i = 0, group_freed = 0; i < count; i++) { if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i, bitmap_bh->b_data)) { - ext2_error(sb, __FUNCTION__, + ext2_error(sb, __func__, "bit already cleared for block %lu", block + i); } else { group_freed++; @@ -248,7 +549,7 @@ do_more: if (sb->s_flags & MS_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); - group_release_blocks(sb, block_group, desc, bh2, group_freed); + group_adjust_blocks(sb, block_group, desc, bh2, group_freed); freed += group_freed; if (overflow) { @@ -258,20 +559,53 @@ do_more: } error_return: brelse(bitmap_bh); - release_blocks(sb, freed); - DQUOT_FREE_BLOCK(inode, freed); + if (freed) { + percpu_counter_add(&sbi->s_freeblocks_counter, freed); + dquot_free_block_nodirty(inode, freed); + mark_inode_dirty(inode); + } } -static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal) +/** + * bitmap_search_next_usable_block() + * @start: the starting block (group relative) of the search + * @bh: bufferhead contains the block group bitmap + * @maxblocks: the ending block (group relative) of the reservation + * + * The bitmap search --- search forward through the actual bitmap on disk until + * we find a bit free. + */ +static ext2_grpblk_t +bitmap_search_next_usable_block(ext2_grpblk_t start, struct buffer_head *bh, + ext2_grpblk_t maxblocks) { - int k; - char *p, *r; + ext2_grpblk_t next; - if (!ext2_test_bit(goal, map)) - goto got_it; + next = ext2_find_next_zero_bit(bh->b_data, maxblocks, start); + if (next >= maxblocks) + return -1; + return next; +} -repeat: - if (goal) { +/** + * find_next_usable_block() + * @start: the starting block (group relative) to find next + * allocatable block in bitmap. + * @bh: bufferhead contains the block group bitmap + * @maxblocks: the ending block (group relative) for the search + * + * Find an allocatable block in a bitmap. We perform the "most + * appropriate allocation" algorithm of looking for a free block near + * the initial goal; then for a free byte somewhere in the bitmap; + * then for any free bit in the bitmap. + */ +static ext2_grpblk_t +find_next_usable_block(int start, struct buffer_head *bh, int maxblocks) +{ + ext2_grpblk_t here, next; + char *p, *r; + + if (start > 0) { /* * The goal was occupied; search forward for a free * block within the next XX blocks. @@ -280,261 +614,831 @@ repeat: * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the * next 64-bit boundary is simple.. */ - k = (goal + 63) & ~63; - goal = ext2_find_next_zero_bit(map, k, goal); - if (goal < k) - goto got_it; + ext2_grpblk_t end_goal = (start + 63) & ~63; + if (end_goal > maxblocks) + end_goal = maxblocks; + here = ext2_find_next_zero_bit(bh->b_data, end_goal, start); + if (here < end_goal) + return here; + ext2_debug("Bit not found near goal\n"); + } + + here = start; + if (here < 0) + here = 0; + + p = ((char *)bh->b_data) + (here >> 3); + r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); + next = (r - ((char *)bh->b_data)) << 3; + + if (next < maxblocks && next >= here) + return next; + + here = bitmap_search_next_usable_block(here, bh, maxblocks); + return here; +} + +/** + * ext2_try_to_allocate() + * @sb: superblock + * @group: given allocation block group + * @bitmap_bh: bufferhead holds the block bitmap + * @grp_goal: given target block within the group + * @count: target number of blocks to allocate + * @my_rsv: reservation window + * + * Attempt to allocate blocks within a give range. Set the range of allocation + * first, then find the first free bit(s) from the bitmap (within the range), + * and at last, allocate the blocks by claiming the found free bit as allocated. + * + * To set the range of this allocation: + * if there is a reservation window, only try to allocate block(s) + * from the file's own reservation window; + * Otherwise, the allocation range starts from the give goal block, + * ends at the block group's last block. + * + * If we failed to allocate the desired block then we may end up crossing to a + * new bitmap. + */ +static int +ext2_try_to_allocate(struct super_block *sb, int group, + struct buffer_head *bitmap_bh, ext2_grpblk_t grp_goal, + unsigned long *count, + struct ext2_reserve_window *my_rsv) +{ + ext2_fsblk_t group_first_block; + ext2_grpblk_t start, end; + unsigned long num = 0; + + /* we do allocation within the reservation window if we have a window */ + if (my_rsv) { + group_first_block = ext2_group_first_block_no(sb, group); + if (my_rsv->_rsv_start >= group_first_block) + start = my_rsv->_rsv_start - group_first_block; + else + /* reservation window cross group boundary */ + start = 0; + end = my_rsv->_rsv_end - group_first_block + 1; + if (end > EXT2_BLOCKS_PER_GROUP(sb)) + /* reservation window crosses group boundary */ + end = EXT2_BLOCKS_PER_GROUP(sb); + if ((start <= grp_goal) && (grp_goal < end)) + start = grp_goal; + else + grp_goal = -1; + } else { + if (grp_goal > 0) + start = grp_goal; + else + start = 0; + end = EXT2_BLOCKS_PER_GROUP(sb); + } + + BUG_ON(start > EXT2_BLOCKS_PER_GROUP(sb)); + +repeat: + if (grp_goal < 0) { + grp_goal = find_next_usable_block(start, bitmap_bh, end); + if (grp_goal < 0) + goto fail_access; + if (!my_rsv) { + int i; + + for (i = 0; i < 7 && grp_goal > start && + !ext2_test_bit(grp_goal - 1, + bitmap_bh->b_data); + i++, grp_goal--) + ; + } + } + start = grp_goal; + + if (ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), grp_goal, + bitmap_bh->b_data)) { + /* + * The block was allocated by another thread, or it was + * allocated and then freed by another thread + */ + start++; + grp_goal++; + if (start >= end) + goto fail_access; + goto repeat; + } + num++; + grp_goal++; + while (num < *count && grp_goal < end + && !ext2_set_bit_atomic(sb_bgl_lock(EXT2_SB(sb), group), + grp_goal, bitmap_bh->b_data)) { + num++; + grp_goal++; + } + *count = num; + return grp_goal - num; +fail_access: + *count = num; + return -1; +} + +/** + * find_next_reservable_window(): + * find a reservable space within the given range. + * It does not allocate the reservation window for now: + * alloc_new_reservation() will do the work later. + * + * @search_head: the head of the searching list; + * This is not necessarily the list head of the whole filesystem + * + * We have both head and start_block to assist the search + * for the reservable space. The list starts from head, + * but we will shift to the place where start_block is, + * then start from there, when looking for a reservable space. + * + * @size: the target new reservation window size + * + * @group_first_block: the first block we consider to start + * the real search from + * + * @last_block: + * the maximum block number that our goal reservable space + * could start from. This is normally the last block in this + * group. The search will end when we found the start of next + * possible reservable space is out of this boundary. + * This could handle the cross boundary reservation window + * request. + * + * basically we search from the given range, rather than the whole + * reservation double linked list, (start_block, last_block) + * to find a free region that is of my size and has not + * been reserved. + * + */ +static int find_next_reservable_window( + struct ext2_reserve_window_node *search_head, + struct ext2_reserve_window_node *my_rsv, + struct super_block * sb, + ext2_fsblk_t start_block, + ext2_fsblk_t last_block) +{ + struct rb_node *next; + struct ext2_reserve_window_node *rsv, *prev; + ext2_fsblk_t cur; + int size = my_rsv->rsv_goal_size; + + /* TODO: make the start of the reservation window byte-aligned */ + /* cur = *start_block & ~7;*/ + cur = start_block; + rsv = search_head; + if (!rsv) + return -1; + + while (1) { + if (cur <= rsv->rsv_end) + cur = rsv->rsv_end + 1; + + /* TODO? + * in the case we could not find a reservable space + * that is what is expected, during the re-search, we could + * remember what's the largest reservable space we could have + * and return that one. + * + * For now it will fail if we could not find the reservable + * space with expected-size (or more)... + */ + if (cur > last_block) + return -1; /* fail */ + + prev = rsv; + next = rb_next(&rsv->rsv_node); + rsv = rb_entry(next,struct ext2_reserve_window_node,rsv_node); + /* - * Search in the remainder of the current group. + * Reached the last reservation, we can just append to the + * previous one. */ + if (!next) + break; + + if (cur + size <= rsv->rsv_start) { + /* + * Found a reserveable space big enough. We could + * have a reservation across the group boundary here + */ + break; + } } + /* + * we come here either : + * when we reach the end of the whole list, + * and there is empty reservable space after last entry in the list. + * append it to the end of the list. + * + * or we found one reservable space in the middle of the list, + * return the reservation window that we could append to. + * succeed. + */ + + if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) + rsv_window_remove(sb, my_rsv); + + /* + * Let's book the whole available window for now. We will check the + * disk bitmap later and then, if there are free blocks then we adjust + * the window size if it's larger than requested. + * Otherwise, we will remove this node from the tree next time + * call find_next_reservable_window. + */ + my_rsv->rsv_start = cur; + my_rsv->rsv_end = cur + size - 1; + my_rsv->rsv_alloc_hit = 0; + + if (prev != my_rsv) + ext2_rsv_window_add(sb, my_rsv); + + return 0; +} + +/** + * alloc_new_reservation()--allocate a new reservation window + * + * To make a new reservation, we search part of the filesystem + * reservation list (the list that inside the group). We try to + * allocate a new reservation window near the allocation goal, + * or the beginning of the group, if there is no goal. + * + * We first find a reservable space after the goal, then from + * there, we check the bitmap for the first free block after + * it. If there is no free block until the end of group, then the + * whole group is full, we failed. Otherwise, check if the free + * block is inside the expected reservable space, if so, we + * succeed. + * If the first free block is outside the reservable space, then + * start from the first free block, we search for next available + * space, and go on. + * + * on succeed, a new reservation will be found and inserted into the list + * It contains at least one free block, and it does not overlap with other + * reservation windows. + * + * failed: we failed to find a reservation window in this group + * + * @rsv: the reservation + * + * @grp_goal: The goal (group-relative). It is where the search for a + * free reservable space should start from. + * if we have a goal(goal >0 ), then start from there, + * no goal(goal = -1), we start from the first block + * of the group. + * + * @sb: the super block + * @group: the group we are trying to allocate in + * @bitmap_bh: the block group block bitmap + * + */ +static int alloc_new_reservation(struct ext2_reserve_window_node *my_rsv, + ext2_grpblk_t grp_goal, struct super_block *sb, + unsigned int group, struct buffer_head *bitmap_bh) +{ + struct ext2_reserve_window_node *search_head; + ext2_fsblk_t group_first_block, group_end_block, start_block; + ext2_grpblk_t first_free_block; + struct rb_root *fs_rsv_root = &EXT2_SB(sb)->s_rsv_window_root; + unsigned long size; + int ret; + spinlock_t *rsv_lock = &EXT2_SB(sb)->s_rsv_window_lock; + + group_first_block = ext2_group_first_block_no(sb, group); + group_end_block = group_first_block + (EXT2_BLOCKS_PER_GROUP(sb) - 1); + + if (grp_goal < 0) + start_block = group_first_block; + else + start_block = grp_goal + group_first_block; + + size = my_rsv->rsv_goal_size; - p = map + (goal >> 3); - r = memscan(p, 0, (size - goal + 7) >> 3); - k = (r - map) << 3; - if (k < size) { - /* - * We have succeeded in finding a free byte in the block - * bitmap. Now search backwards to find the start of this - * group of free blocks - won't take more than 7 iterations. + if (!rsv_is_empty(&my_rsv->rsv_window)) { + /* + * if the old reservation is cross group boundary + * and if the goal is inside the old reservation window, + * we will come here when we just failed to allocate from + * the first part of the window. We still have another part + * that belongs to the next group. In this case, there is no + * point to discard our window and try to allocate a new one + * in this group(which will fail). we should + * keep the reservation window, just simply move on. + * + * Maybe we could shift the start block of the reservation + * window to the first block of next group. */ - for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--) - ; - goto got_it; + + if ((my_rsv->rsv_start <= group_end_block) && + (my_rsv->rsv_end > group_end_block) && + (start_block >= my_rsv->rsv_start)) + return -1; + + if ((my_rsv->rsv_alloc_hit > + (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { + /* + * if the previously allocation hit ratio is + * greater than 1/2, then we double the size of + * the reservation window the next time, + * otherwise we keep the same size window + */ + size = size * 2; + if (size > EXT2_MAX_RESERVE_BLOCKS) + size = EXT2_MAX_RESERVE_BLOCKS; + my_rsv->rsv_goal_size= size; + } } - k = ext2_find_next_zero_bit ((u32 *)map, size, goal); - if (k < size) { - goal = k; - goto got_it; + spin_lock(rsv_lock); + /* + * shift the search start to the window near the goal block + */ + search_head = search_reserve_window(fs_rsv_root, start_block); + + /* + * find_next_reservable_window() simply finds a reservable window + * inside the given range(start_block, group_end_block). + * + * To make sure the reservation window has a free bit inside it, we + * need to check the bitmap after we found a reservable window. + */ +retry: + ret = find_next_reservable_window(search_head, my_rsv, sb, + start_block, group_end_block); + + if (ret == -1) { + if (!rsv_is_empty(&my_rsv->rsv_window)) + rsv_window_remove(sb, my_rsv); + spin_unlock(rsv_lock); + return -1; } - return -1; -got_it: - if (ext2_set_bit_atomic(lock, goal, (void *) map)) - goto repeat; - return goal; + + /* + * On success, find_next_reservable_window() returns the + * reservation window where there is a reservable space after it. + * Before we reserve this reservable space, we need + * to make sure there is at least a free block inside this region. + * + * Search the first free bit on the block bitmap. Search starts from + * the start block of the reservable space we just found. + */ + spin_unlock(rsv_lock); + first_free_block = bitmap_search_next_usable_block( + my_rsv->rsv_start - group_first_block, + bitmap_bh, group_end_block - group_first_block + 1); + + if (first_free_block < 0) { + /* + * no free block left on the bitmap, no point + * to reserve the space. return failed. + */ + spin_lock(rsv_lock); + if (!rsv_is_empty(&my_rsv->rsv_window)) + rsv_window_remove(sb, my_rsv); + spin_unlock(rsv_lock); + return -1; /* failed */ + } + + start_block = first_free_block + group_first_block; + /* + * check if the first free block is within the + * free space we just reserved + */ + if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) + return 0; /* success */ + /* + * if the first free bit we found is out of the reservable space + * continue search for next reservable space, + * start from where the free block is, + * we also shift the list head to where we stopped last time + */ + search_head = my_rsv; + spin_lock(rsv_lock); + goto retry; +} + +/** + * try_to_extend_reservation() + * @my_rsv: given reservation window + * @sb: super block + * @size: the delta to extend + * + * Attempt to expand the reservation window large enough to have + * required number of free blocks + * + * Since ext2_try_to_allocate() will always allocate blocks within + * the reservation window range, if the window size is too small, + * multiple blocks allocation has to stop at the end of the reservation + * window. To make this more efficient, given the total number of + * blocks needed and the current size of the window, we try to + * expand the reservation window size if necessary on a best-effort + * basis before ext2_new_blocks() tries to allocate blocks. + */ +static void try_to_extend_reservation(struct ext2_reserve_window_node *my_rsv, + struct super_block *sb, int size) +{ + struct ext2_reserve_window_node *next_rsv; + struct rb_node *next; + spinlock_t *rsv_lock = &EXT2_SB(sb)->s_rsv_window_lock; + + if (!spin_trylock(rsv_lock)) + return; + + next = rb_next(&my_rsv->rsv_node); + + if (!next) + my_rsv->rsv_end += size; + else { + next_rsv = rb_entry(next, struct ext2_reserve_window_node, rsv_node); + + if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) + my_rsv->rsv_end += size; + else + my_rsv->rsv_end = next_rsv->rsv_start - 1; + } + spin_unlock(rsv_lock); +} + +/** + * ext2_try_to_allocate_with_rsv() + * @sb: superblock + * @group: given allocation block group + * @bitmap_bh: bufferhead holds the block bitmap + * @grp_goal: given target block within the group + * @count: target number of blocks to allocate + * @my_rsv: reservation window + * + * This is the main function used to allocate a new block and its reservation + * window. + * + * Each time when a new block allocation is need, first try to allocate from + * its own reservation. If it does not have a reservation window, instead of + * looking for a free bit on bitmap first, then look up the reservation list to + * see if it is inside somebody else's reservation window, we try to allocate a + * reservation window for it starting from the goal first. Then do the block + * allocation within the reservation window. + * + * This will avoid keeping on searching the reservation list again and + * again when somebody is looking for a free block (without + * reservation), and there are lots of free blocks, but they are all + * being reserved. + * + * We use a red-black tree for the per-filesystem reservation list. + */ +static ext2_grpblk_t +ext2_try_to_allocate_with_rsv(struct super_block *sb, unsigned int group, + struct buffer_head *bitmap_bh, ext2_grpblk_t grp_goal, + struct ext2_reserve_window_node * my_rsv, + unsigned long *count) +{ + ext2_fsblk_t group_first_block, group_last_block; + ext2_grpblk_t ret = 0; + unsigned long num = *count; + + /* + * we don't deal with reservation when + * filesystem is mounted without reservation + * or the file is not a regular file + * or last attempt to allocate a block with reservation turned on failed + */ + if (my_rsv == NULL) { + return ext2_try_to_allocate(sb, group, bitmap_bh, + grp_goal, count, NULL); + } + /* + * grp_goal is a group relative block number (if there is a goal) + * 0 <= grp_goal < EXT2_BLOCKS_PER_GROUP(sb) + * first block is a filesystem wide block number + * first block is the block number of the first block in this group + */ + group_first_block = ext2_group_first_block_no(sb, group); + group_last_block = group_first_block + (EXT2_BLOCKS_PER_GROUP(sb) - 1); + + /* + * Basically we will allocate a new block from inode's reservation + * window. + * + * We need to allocate a new reservation window, if: + * a) inode does not have a reservation window; or + * b) last attempt to allocate a block from existing reservation + * failed; or + * c) we come here with a goal and with a reservation window + * + * We do not need to allocate a new reservation window if we come here + * at the beginning with a goal and the goal is inside the window, or + * we don't have a goal but already have a reservation window. + * then we could go to allocate from the reservation window directly. + */ + while (1) { + if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || + !goal_in_my_reservation(&my_rsv->rsv_window, + grp_goal, group, sb)) { + if (my_rsv->rsv_goal_size < *count) + my_rsv->rsv_goal_size = *count; + ret = alloc_new_reservation(my_rsv, grp_goal, sb, + group, bitmap_bh); + if (ret < 0) + break; /* failed */ + + if (!goal_in_my_reservation(&my_rsv->rsv_window, + grp_goal, group, sb)) + grp_goal = -1; + } else if (grp_goal >= 0) { + int curr = my_rsv->rsv_end - + (grp_goal + group_first_block) + 1; + + if (curr < *count) + try_to_extend_reservation(my_rsv, sb, + *count - curr); + } + + if ((my_rsv->rsv_start > group_last_block) || + (my_rsv->rsv_end < group_first_block)) { + rsv_window_dump(&EXT2_SB(sb)->s_rsv_window_root, 1); + BUG(); + } + ret = ext2_try_to_allocate(sb, group, bitmap_bh, grp_goal, + &num, &my_rsv->rsv_window); + if (ret >= 0) { + my_rsv->rsv_alloc_hit += num; + *count = num; + break; /* succeed */ + } + num = *count; + } + return ret; +} + +/** + * ext2_has_free_blocks() + * @sbi: in-core super block structure. + * + * Check if filesystem has at least 1 free block available for allocation. + */ +static int ext2_has_free_blocks(struct ext2_sb_info *sbi) +{ + ext2_fsblk_t free_blocks, root_blocks; + + free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); + if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && + !uid_eq(sbi->s_resuid, current_fsuid()) && + (gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) || + !in_group_p (sbi->s_resgid))) { + return 0; + } + return 1; } /* - * ext2_new_block uses a goal block to assist allocation. If the goal is + * ext2_new_blocks() -- core block(s) allocation function + * @inode: file inode + * @goal: given target block(filesystem wide) + * @count: target number of blocks to allocate + * @errp: error code + * + * ext2_new_blocks uses a goal block to assist allocation. If the goal is * free, or there is a free block within 32 blocks of the goal, that block * is allocated. Otherwise a forward search is made for a free block; within * each block group the search first looks for an entire free byte in the block * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. */ -int ext2_new_block(struct inode *inode, unsigned long goal, - u32 *prealloc_count, u32 *prealloc_block, int *err) +ext2_fsblk_t ext2_new_blocks(struct inode *inode, ext2_fsblk_t goal, + unsigned long *count, int *errp) { struct buffer_head *bitmap_bh = NULL; - struct buffer_head *gdp_bh; /* bh2 */ - struct ext2_group_desc *desc; - int group_no; /* i */ - int ret_block; /* j */ - int group_idx; /* k */ - int target_block; /* tmp */ - int block = 0; - struct super_block *sb = inode->i_sb; - struct ext2_sb_info *sbi = EXT2_SB(sb); - struct ext2_super_block *es = sbi->s_es; - unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb); - unsigned prealloc_goal = es->s_prealloc_blocks; - unsigned group_alloc = 0, es_alloc, dq_alloc; - int nr_scanned_groups; - - if (!prealloc_goal--) - prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1; - if (!prealloc_count || *prealloc_count) - prealloc_goal = 0; - - if (DQUOT_ALLOC_BLOCK(inode, 1)) { - *err = -EDQUOT; - goto out; - } + struct buffer_head *gdp_bh; + int group_no; + int goal_group; + ext2_grpblk_t grp_target_blk; /* blockgroup relative goal block */ + ext2_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ + ext2_fsblk_t ret_block; /* filesyetem-wide allocated block */ + int bgi; /* blockgroup iteration index */ + int performed_allocation = 0; + ext2_grpblk_t free_blocks; /* number of free blocks in a group */ + struct super_block *sb; + struct ext2_group_desc *gdp; + struct ext2_super_block *es; + struct ext2_sb_info *sbi; + struct ext2_reserve_window_node *my_rsv = NULL; + struct ext2_block_alloc_info *block_i; + unsigned short windowsz = 0; + unsigned long ngroups; + unsigned long num = *count; + int ret; - while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal)) - prealloc_goal--; + *errp = -ENOSPC; + sb = inode->i_sb; - dq_alloc = prealloc_goal + 1; - es_alloc = reserve_blocks(sb, dq_alloc); - if (!es_alloc) { - *err = -ENOSPC; - goto out_dquot; + /* + * Check quota for allocation of this block. + */ + ret = dquot_alloc_block(inode, num); + if (ret) { + *errp = ret; + return 0; } - ext2_debug ("goal=%lu.\n", goal); + sbi = EXT2_SB(sb); + es = EXT2_SB(sb)->s_es; + ext2_debug("goal=%lu.\n", goal); + /* + * Allocate a block from reservation only when + * filesystem is mounted with reservation(default,-o reservation), and + * it's a regular file, and + * the desired window size is greater than 0 (One could use ioctl + * command EXT2_IOC_SETRSVSZ to set the window size to 0 to turn off + * reservation on that particular file) + */ + block_i = EXT2_I(inode)->i_block_alloc_info; + if (block_i) { + windowsz = block_i->rsv_window_node.rsv_goal_size; + if (windowsz > 0) + my_rsv = &block_i->rsv_window_node; + } + + if (!ext2_has_free_blocks(sbi)) { + *errp = -ENOSPC; + goto out; + } + /* + * First, test whether the goal block is free. + */ if (goal < le32_to_cpu(es->s_first_data_block) || goal >= le32_to_cpu(es->s_blocks_count)) goal = le32_to_cpu(es->s_first_data_block); - group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size; - desc = ext2_get_group_desc (sb, group_no, &gdp_bh); - if (!desc) { - /* - * gdp_bh may still be uninitialised. But group_release_blocks - * will not touch it because group_alloc is zero. - */ + group_no = (goal - le32_to_cpu(es->s_first_data_block)) / + EXT2_BLOCKS_PER_GROUP(sb); + goal_group = group_no; +retry_alloc: + gdp = ext2_get_group_desc(sb, group_no, &gdp_bh); + if (!gdp) goto io_error; - } - group_alloc = group_reserve_blocks(sbi, group_no, desc, - gdp_bh, es_alloc); - if (group_alloc) { - ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % - group_size); - brelse(bitmap_bh); + free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); + /* + * if there is not enough free blocks to make a new resevation + * turn off reservation for this allocation + */ + if (my_rsv && (free_blocks < windowsz) + && (free_blocks > 0) + && (rsv_is_empty(&my_rsv->rsv_window))) + my_rsv = NULL; + + if (free_blocks > 0) { + grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) % + EXT2_BLOCKS_PER_GROUP(sb)); bitmap_bh = read_block_bitmap(sb, group_no); if (!bitmap_bh) goto io_error; - - ext2_debug("goal is at %d:%d.\n", group_no, ret_block); - - ret_block = grab_block(sb_bgl_lock(sbi, group_no), - bitmap_bh->b_data, group_size, ret_block); - if (ret_block >= 0) - goto got_block; - group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); - group_alloc = 0; + grp_alloc_blk = ext2_try_to_allocate_with_rsv(sb, group_no, + bitmap_bh, grp_target_blk, + my_rsv, &num); + if (grp_alloc_blk >= 0) + goto allocated; } - ext2_debug ("Bit not found in block group %d.\n", group_no); + ngroups = EXT2_SB(sb)->s_groups_count; + smp_rmb(); /* - * Now search the rest of the groups. We assume that - * i and desc correctly point to the last group visited. + * Now search the rest of the groups. We assume that + * group_no and gdp correctly point to the last group visited. */ - nr_scanned_groups = 0; -retry: - for (group_idx = 0; !group_alloc && - group_idx < sbi->s_groups_count; group_idx++) { + for (bgi = 0; bgi < ngroups; bgi++) { group_no++; - if (group_no >= sbi->s_groups_count) + if (group_no >= ngroups) group_no = 0; - desc = ext2_get_group_desc(sb, group_no, &gdp_bh); - if (!desc) + gdp = ext2_get_group_desc(sb, group_no, &gdp_bh); + if (!gdp) goto io_error; - group_alloc = group_reserve_blocks(sbi, group_no, desc, - gdp_bh, es_alloc); - } - if (!group_alloc) { - *err = -ENOSPC; - goto out_release; - } - brelse(bitmap_bh); - bitmap_bh = read_block_bitmap(sb, group_no); - if (!bitmap_bh) - goto io_error; - ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data, - group_size, 0); - if (ret_block < 0) { + free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); + /* + * skip this group (and avoid loading bitmap) if there + * are no free blocks + */ + if (!free_blocks) + continue; /* - * If a free block counter is corrupted we can loop inifintely. - * Detect that here. + * skip this group if the number of + * free blocks is less than half of the reservation + * window size. */ - nr_scanned_groups++; - if (nr_scanned_groups > 2 * sbi->s_groups_count) { - ext2_error(sb, "ext2_new_block", - "corrupted free blocks counters"); + if (my_rsv && (free_blocks <= (windowsz/2))) + continue; + + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, group_no); + if (!bitmap_bh) goto io_error; - } /* - * Someone else grabbed the last free block in this blockgroup - * before us. Retry the scan. + * try to allocate block(s) from this group, without a goal(-1). */ - group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); - group_alloc = 0; - goto retry; + grp_alloc_blk = ext2_try_to_allocate_with_rsv(sb, group_no, + bitmap_bh, -1, my_rsv, &num); + if (grp_alloc_blk >= 0) + goto allocated; + } + /* + * We may end up a bogus earlier ENOSPC error due to + * filesystem is "full" of reservations, but + * there maybe indeed free blocks available on disk + * In this case, we just forget about the reservations + * just do block allocation as without reservations. + */ + if (my_rsv) { + my_rsv = NULL; + windowsz = 0; + group_no = goal_group; + goto retry_alloc; } + /* No space left on the device */ + *errp = -ENOSPC; + goto out; + +allocated: -got_block: ext2_debug("using block group %d(%d)\n", - group_no, desc->bg_free_blocks_count); + group_no, gdp->bg_free_blocks_count); - target_block = ret_block + group_no * group_size + - le32_to_cpu(es->s_first_data_block); + ret_block = grp_alloc_blk + ext2_group_first_block_no(sb, group_no); - if (target_block == le32_to_cpu(desc->bg_block_bitmap) || - target_block == le32_to_cpu(desc->bg_inode_bitmap) || - in_range(target_block, le32_to_cpu(desc->bg_inode_table), - sbi->s_itb_per_group)) - ext2_error (sb, "ext2_new_block", + if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) || + in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) || + in_range(ret_block, le32_to_cpu(gdp->bg_inode_table), + EXT2_SB(sb)->s_itb_per_group) || + in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table), + EXT2_SB(sb)->s_itb_per_group)) { + ext2_error(sb, "ext2_new_blocks", "Allocating block in system zone - " - "block = %u", target_block); + "blocks from "E2FSBLK", length %lu", + ret_block, num); + /* + * ext2_try_to_allocate marked the blocks we allocated as in + * use. So we may want to selectively mark some of the blocks + * as free + */ + goto retry_alloc; + } - if (target_block >= le32_to_cpu(es->s_blocks_count)) { - ext2_error (sb, "ext2_new_block", - "block(%d) >= blocks count(%d) - " + performed_allocation = 1; + + if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) { + ext2_error(sb, "ext2_new_blocks", + "block("E2FSBLK") >= blocks count(%d) - " "block_group = %d, es == %p ", ret_block, le32_to_cpu(es->s_blocks_count), group_no, es); - goto io_error; + goto out; } - block = target_block; - /* OK, we _had_ allocated something */ - ext2_debug("found bit %d\n", ret_block); - - dq_alloc--; - es_alloc--; - group_alloc--; - - /* - * Do block preallocation now if required. - */ - write_lock(&EXT2_I(inode)->i_meta_lock); - if (group_alloc && !*prealloc_count) { - unsigned n; - - for (n = 0; n < group_alloc && ++ret_block < group_size; n++) { - if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no), - ret_block, - (void*) bitmap_bh->b_data)) - break; - } - *prealloc_block = block + 1; - *prealloc_count = n; - es_alloc -= n; - dq_alloc -= n; - group_alloc -= n; - } - write_unlock(&EXT2_I(inode)->i_meta_lock); + group_adjust_blocks(sb, group_no, gdp, gdp_bh, -num); + percpu_counter_sub(&sbi->s_freeblocks_counter, num); mark_buffer_dirty(bitmap_bh); if (sb->s_flags & MS_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); - ext2_debug ("allocating block %d. ", block); + *errp = 0; + brelse(bitmap_bh); + if (num < *count) { + dquot_free_block_nodirty(inode, *count-num); + mark_inode_dirty(inode); + *count = num; + } + return ret_block; - *err = 0; -out_release: - group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc); - release_blocks(sb, es_alloc); -out_dquot: - DQUOT_FREE_BLOCK(inode, dq_alloc); +io_error: + *errp = -EIO; out: + /* + * Undo the block allocation + */ + if (!performed_allocation) { + dquot_free_block_nodirty(inode, *count); + mark_inode_dirty(inode); + } brelse(bitmap_bh); - return block; + return 0; +} -io_error: - *err = -EIO; - goto out_release; +ext2_fsblk_t ext2_new_block(struct inode *inode, unsigned long goal, int *errp) +{ + unsigned long count = 1; + + return ext2_new_blocks(inode, goal, &count, errp); } #ifdef EXT2FS_DEBUG -static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; - -unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars) +unsigned long ext2_count_free(struct buffer_head *map, unsigned int numchars) { - unsigned int i; - unsigned long sum = 0; - - if (!map) - return (0); - for (i = 0; i < numchars; i++) - sum += nibblemap[map->b_data[i] & 0xf] + - nibblemap[(map->b_data[i] >> 4) & 0xf]; - return (sum); + return numchars * BITS_PER_BYTE - memweight(map->b_data, numchars); } #endif /* EXT2FS_DEBUG */ @@ -583,14 +1487,6 @@ unsigned long ext2_count_free_blocks (struct super_block * sb) #endif } -static inline int -block_in_use(unsigned long block, struct super_block *sb, unsigned char *map) -{ - return ext2_test_bit ((block - - le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) % - EXT2_BLOCKS_PER_GROUP(sb), map); -} - static inline int test_root(int a, int b) { int num = b; @@ -635,9 +1531,6 @@ int ext2_bg_has_super(struct super_block *sb, int group) */ unsigned long ext2_bg_num_gdb(struct super_block *sb, int group) { - if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&& - !ext2_group_sparse(group)) - return 0; - return EXT2_SB(sb)->s_gdb_count; + return ext2_bg_has_super(sb, group) ? EXT2_SB(sb)->s_gdb_count : 0; } diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 92ea8265d7d..6e1d4ab09d7 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -22,12 +22,40 @@ */ #include "ext2.h" +#include <linux/buffer_head.h> #include <linux/pagemap.h> -#include <linux/smp_lock.h> +#include <linux/swap.h> typedef struct ext2_dir_entry_2 ext2_dirent; /* + * Tests against MAX_REC_LEN etc were put in place for 64k block + * sizes; if that is not possible on this arch, we can skip + * those tests and speed things up. + */ +static inline unsigned ext2_rec_len_from_disk(__le16 dlen) +{ + unsigned len = le16_to_cpu(dlen); + +#if (PAGE_CACHE_SIZE >= 65536) + if (len == EXT2_MAX_REC_LEN) + return 1 << 16; +#endif + return len; +} + +static inline __le16 ext2_rec_len_to_disk(unsigned len) +{ +#if (PAGE_CACHE_SIZE >= 65536) + if (len == (1 << 16)) + return cpu_to_le16(EXT2_MAX_REC_LEN); + else + BUG_ON(len > (1 << 16)); +#endif + return cpu_to_le16(len); +} + +/* * ext2 uses block-sized chunks. Arguably, sector-sized ones would be * more robust, but we have what we have */ @@ -62,20 +90,32 @@ ext2_last_byte(struct inode *inode, unsigned long page_nr) return last_byte; } -static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to) +static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) { - struct inode *dir = page->mapping->host; + struct address_space *mapping = page->mapping; + struct inode *dir = mapping->host; int err = 0; + dir->i_version++; - page->mapping->a_ops->commit_write(NULL, page, from, to); - if (IS_DIRSYNC(dir)) + block_write_end(NULL, mapping, pos, len, len, page, NULL); + + if (pos+len > dir->i_size) { + i_size_write(dir, pos+len); + mark_inode_dirty(dir); + } + + if (IS_DIRSYNC(dir)) { err = write_one_page(page, 1); - else + if (!err) + err = sync_inode_metadata(dir, 1); + } else { unlock_page(page); + } + return err; } -static void ext2_check_page(struct page *page) +static void ext2_check_page(struct page *page, int quiet) { struct inode *dir = page->mapping->host; struct super_block *sb = dir->i_sb; @@ -96,17 +136,17 @@ static void ext2_check_page(struct page *page) } for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) { p = (ext2_dirent *)(kaddr + offs); - rec_len = le16_to_cpu(p->rec_len); + rec_len = ext2_rec_len_from_disk(p->rec_len); - if (rec_len < EXT2_DIR_REC_LEN(1)) + if (unlikely(rec_len < EXT2_DIR_REC_LEN(1))) goto Eshort; - if (rec_len & 3) + if (unlikely(rec_len & 3)) goto Ealign; - if (rec_len < EXT2_DIR_REC_LEN(p->name_len)) + if (unlikely(rec_len < EXT2_DIR_REC_LEN(p->name_len))) goto Enamelen; - if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) + if (unlikely(((offs + rec_len - 1) ^ offs) & ~(chunk_size-1))) goto Espan; - if (le32_to_cpu(p->inode) > max_inumber) + if (unlikely(le32_to_cpu(p->inode) > max_inumber)) goto Einumber; } if (offs != limit) @@ -118,10 +158,10 @@ out: /* Too bad, we had an error */ Ebadsize: - ext2_error(sb, "ext2_check_page", - "size of directory #%lu is not a multiple of chunk size", - dir->i_ino - ); + if (!quiet) + ext2_error(sb, __func__, + "size of directory #%lu is not a multiple " + "of chunk size", dir->i_ino); goto fail; Eshort: error = "rec_len is smaller than minimal"; @@ -138,35 +178,36 @@ Espan: Einumber: error = "inode out of bounds"; bad_entry: - ext2_error (sb, "ext2_check_page", "bad entry in directory #%lu: %s - " - "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", - dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, - (unsigned long) le32_to_cpu(p->inode), - rec_len, p->name_len); + if (!quiet) + ext2_error(sb, __func__, "bad entry in directory #%lu: : %s - " + "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", + dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, + (unsigned long) le32_to_cpu(p->inode), + rec_len, p->name_len); goto fail; Eend: - p = (ext2_dirent *)(kaddr + offs); - ext2_error (sb, "ext2_check_page", - "entry in directory #%lu spans the page boundary" - "offset=%lu, inode=%lu", - dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, - (unsigned long) le32_to_cpu(p->inode)); + if (!quiet) { + p = (ext2_dirent *)(kaddr + offs); + ext2_error(sb, "ext2_check_page", + "entry in directory #%lu spans the page boundary" + "offset=%lu, inode=%lu", + dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, + (unsigned long) le32_to_cpu(p->inode)); + } fail: SetPageChecked(page); SetPageError(page); } -static struct page * ext2_get_page(struct inode *dir, unsigned long n) +static struct page * ext2_get_page(struct inode *dir, unsigned long n, + int quiet) { struct address_space *mapping = dir->i_mapping; struct page *page = read_mapping_page(mapping, n, NULL); if (!IS_ERR(page)) { - wait_on_page_locked(page); kmap(page); - if (!PageUptodate(page)) - goto fail; if (!PageChecked(page)) - ext2_check_page(page); + ext2_check_page(page, quiet); if (PageError(page)) goto fail; } @@ -197,7 +238,8 @@ static inline int ext2_match (int len, const char * const name, */ static inline ext2_dirent *ext2_next_entry(ext2_dirent *p) { - return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len)); + return (ext2_dirent *)((char *)p + + ext2_rec_len_from_disk(p->rec_len)); } static inline unsigned @@ -237,7 +279,7 @@ static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = { static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) { - mode_t mode = inode->i_mode; + umode_t mode = inode->i_mode; if (EXT2_HAS_INCOMPAT_FEATURE(inode->i_sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; else @@ -245,17 +287,17 @@ static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) } static int -ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) +ext2_readdir(struct file *file, struct dir_context *ctx) { - loff_t pos = filp->f_pos; - struct inode *inode = filp->f_dentry->d_inode; + loff_t pos = ctx->pos; + struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; unsigned int offset = pos & ~PAGE_CACHE_MASK; unsigned long n = pos >> PAGE_CACHE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); unsigned char *types = NULL; - int need_revalidate = filp->f_version != inode->i_version; + int need_revalidate = file->f_version != inode->i_version; if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) return 0; @@ -266,50 +308,47 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) for ( ; n < npages; n++, offset = 0) { char *kaddr, *limit; ext2_dirent *de; - struct page *page = ext2_get_page(inode, n); + struct page *page = ext2_get_page(inode, n, 0); if (IS_ERR(page)) { - ext2_error(sb, __FUNCTION__, + ext2_error(sb, __func__, "bad page in #%lu", inode->i_ino); - filp->f_pos += PAGE_CACHE_SIZE - offset; - return -EIO; + ctx->pos += PAGE_CACHE_SIZE - offset; + return PTR_ERR(page); } kaddr = page_address(page); if (unlikely(need_revalidate)) { if (offset) { offset = ext2_validate_entry(kaddr, offset, chunk_mask); - filp->f_pos = (n<<PAGE_CACHE_SHIFT) + offset; + ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset; } - filp->f_version = inode->i_version; + file->f_version = inode->i_version; need_revalidate = 0; } de = (ext2_dirent *)(kaddr+offset); limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1); for ( ;(char*)de <= limit; de = ext2_next_entry(de)) { if (de->rec_len == 0) { - ext2_error(sb, __FUNCTION__, + ext2_error(sb, __func__, "zero-length directory entry"); ext2_put_page(page); return -EIO; } if (de->inode) { - int over; unsigned char d_type = DT_UNKNOWN; if (types && de->file_type < EXT2_FT_MAX) d_type = types[de->file_type]; - offset = (char *)de - kaddr; - over = filldir(dirent, de->name, de->name_len, - (n<<PAGE_CACHE_SHIFT) | offset, - le32_to_cpu(de->inode), d_type); - if (over) { + if (!dir_emit(ctx, de->name, de->name_len, + le32_to_cpu(de->inode), + d_type)) { ext2_put_page(page); return 0; } } - filp->f_pos += le16_to_cpu(de->rec_len); + ctx->pos += ext2_rec_len_from_disk(de->rec_len); } ext2_put_page(page); } @@ -320,21 +359,22 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) * ext2_find_entry() * * finds an entry in the specified directory with the wanted name. It - * returns the page in which the entry was found, and the entry itself - * (as a parameter - res_dir). Page is returned mapped and unlocked. + * returns the page in which the entry was found (as a parameter - res_page), + * and the entry itself. Page is returned mapped and unlocked. * Entry is guaranteed to be valid. */ -struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, - struct dentry *dentry, struct page ** res_page) +struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir, + struct qstr *child, struct page ** res_page) { - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; + const char *name = child->name; + int namelen = child->len; unsigned reclen = EXT2_DIR_REC_LEN(namelen); unsigned long start, n; unsigned long npages = dir_pages(dir); struct page *page = NULL; struct ext2_inode_info *ei = EXT2_I(dir); ext2_dirent * de; + int dir_has_error = 0; if (npages == 0) goto out; @@ -348,14 +388,14 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, n = start; do { char *kaddr; - page = ext2_get_page(dir, n); + page = ext2_get_page(dir, n, dir_has_error); if (!IS_ERR(page)) { kaddr = page_address(page); de = (ext2_dirent *) kaddr; kaddr += ext2_last_byte(dir, n) - reclen; while ((char *) de <= kaddr) { if (de->rec_len == 0) { - ext2_error(dir->i_sb, __FUNCTION__, + ext2_error(dir->i_sb, __func__, "zero-length directory entry"); ext2_put_page(page); goto out; @@ -365,9 +405,19 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, de = ext2_next_entry(de); } ext2_put_page(page); - } + } else + dir_has_error = 1; + if (++n >= npages) n = 0; + /* next page is past the blocks we've got */ + if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) { + ext2_error(dir->i_sb, __func__, + "dir %lu size %lld exceeds block count %llu", + dir->i_ino, dir->i_size, + (unsigned long long)dir->i_blocks); + goto out; + } } while (n != start); out: return NULL; @@ -380,7 +430,7 @@ found: struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) { - struct page *page = ext2_get_page(dir, 0); + struct page *page = ext2_get_page(dir, 0, 0); ext2_dirent *de = NULL; if (!IS_ERR(page)) { @@ -390,13 +440,13 @@ struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) return de; } -ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry) +ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child) { ino_t res = 0; - struct ext2_dir_entry_2 * de; + struct ext2_dir_entry_2 *de; struct page *page; - de = ext2_find_entry (dir, dentry, &page); + de = ext2_find_entry (dir, child, &page); if (de) { res = le32_to_cpu(de->inode); ext2_put_page(page); @@ -404,22 +454,29 @@ ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry) return res; } +static int ext2_prepare_chunk(struct page *page, loff_t pos, unsigned len) +{ + return __block_write_begin(page, pos, len, ext2_get_block); +} + /* Releases the page */ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, - struct page *page, struct inode *inode) + struct page *page, struct inode *inode, int update_times) { - unsigned from = (char *) de - (char *) page_address(page); - unsigned to = from + le16_to_cpu(de->rec_len); + loff_t pos = page_offset(page) + + (char *) de - (char *) page_address(page); + unsigned len = ext2_rec_len_from_disk(de->rec_len); int err; lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + err = ext2_prepare_chunk(page, pos, len); BUG_ON(err); de->inode = cpu_to_le32(inode->i_ino); - ext2_set_de_type (de, inode); - err = ext2_commit_chunk(page, from, to); + ext2_set_de_type(de, inode); + err = ext2_commit_chunk(page, pos, len); ext2_put_page(page); - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; + if (update_times) + dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(dir); } @@ -440,7 +497,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) unsigned long npages = dir_pages(dir); unsigned long n; char *kaddr; - unsigned from, to; + loff_t pos; int err; /* @@ -451,7 +508,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) for (n = 0; n <= npages; n++) { char *dir_end; - page = ext2_get_page(dir, n); + page = ext2_get_page(dir, n, 0); err = PTR_ERR(page); if (IS_ERR(page)) goto out; @@ -465,12 +522,12 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) /* We hit i_size */ name_len = 0; rec_len = chunk_size; - de->rec_len = cpu_to_le16(chunk_size); + de->rec_len = ext2_rec_len_to_disk(chunk_size); de->inode = 0; goto got_it; } if (de->rec_len == 0) { - ext2_error(dir->i_sb, __FUNCTION__, + ext2_error(dir->i_sb, __func__, "zero-length directory entry"); err = -EIO; goto out_unlock; @@ -479,7 +536,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) if (ext2_match (namelen, name, de)) goto out_unlock; name_len = EXT2_DIR_REC_LEN(de->name_len); - rec_len = le16_to_cpu(de->rec_len); + rec_len = ext2_rec_len_from_disk(de->rec_len); if (!de->inode && rec_len >= reclen) goto got_it; if (rec_len >= name_len + reclen) @@ -493,22 +550,22 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) return -EINVAL; got_it: - from = (char*)de - (char*)page_address(page); - to = from + rec_len; - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); + pos = page_offset(page) + + (char*)de - (char*)page_address(page); + err = ext2_prepare_chunk(page, pos, rec_len); if (err) goto out_unlock; if (de->inode) { ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len); - de1->rec_len = cpu_to_le16(rec_len - name_len); - de->rec_len = cpu_to_le16(name_len); + de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len); + de->rec_len = ext2_rec_len_to_disk(name_len); de = de1; } de->name_len = namelen; - memcpy (de->name, name, namelen); + memcpy(de->name, name, namelen); de->inode = cpu_to_le32(inode->i_ino); ext2_set_de_type (de, inode); - err = ext2_commit_chunk(page, from, to); + err = ext2_commit_chunk(page, pos, rec_len); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(dir); @@ -528,18 +585,19 @@ out_unlock: */ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) { - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; + struct inode *inode = page->mapping->host; char *kaddr = page_address(page); unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); - unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len); + unsigned to = ((char *)dir - kaddr) + + ext2_rec_len_from_disk(dir->rec_len); + loff_t pos; ext2_dirent * pde = NULL; ext2_dirent * de = (ext2_dirent *) (kaddr + from); int err; while ((char*)de < (char*)dir) { if (de->rec_len == 0) { - ext2_error(inode->i_sb, __FUNCTION__, + ext2_error(inode->i_sb, __func__, "zero-length directory entry"); err = -EIO; goto out; @@ -549,13 +607,14 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) } if (pde) from = (char*)pde - (char*)page_address(page); + pos = page_offset(page) + from; lock_page(page); - err = mapping->a_ops->prepare_write(NULL, page, from, to); + err = ext2_prepare_chunk(page, pos, to - from); BUG_ON(err); if (pde) - pde->rec_len = cpu_to_le16(to-from); + pde->rec_len = ext2_rec_len_to_disk(to - from); dir->inode = 0; - err = ext2_commit_chunk(page, from, to); + err = ext2_commit_chunk(page, pos, to - from); inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(inode); @@ -569,8 +628,7 @@ out: */ int ext2_make_empty(struct inode *inode, struct inode *parent) { - struct address_space *mapping = inode->i_mapping; - struct page *page = grab_cache_page(mapping, 0); + struct page *page = grab_cache_page(inode->i_mapping, 0); unsigned chunk_size = ext2_chunk_size(inode); struct ext2_dir_entry_2 * de; int err; @@ -578,27 +636,28 @@ int ext2_make_empty(struct inode *inode, struct inode *parent) if (!page) return -ENOMEM; - err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size); + + err = ext2_prepare_chunk(page, 0, chunk_size); if (err) { unlock_page(page); goto fail; } - kaddr = kmap_atomic(page, KM_USER0); + kaddr = kmap_atomic(page); memset(kaddr, 0, chunk_size); de = (struct ext2_dir_entry_2 *)kaddr; de->name_len = 1; - de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1)); + de->rec_len = ext2_rec_len_to_disk(EXT2_DIR_REC_LEN(1)); memcpy (de->name, ".\0\0", 4); de->inode = cpu_to_le32(inode->i_ino); ext2_set_de_type (de, inode); de = (struct ext2_dir_entry_2 *)(kaddr + EXT2_DIR_REC_LEN(1)); de->name_len = 2; - de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1)); + de->rec_len = ext2_rec_len_to_disk(chunk_size - EXT2_DIR_REC_LEN(1)); de->inode = cpu_to_le32(parent->i_ino); memcpy (de->name, "..\0", 4); ext2_set_de_type (de, inode); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(kaddr); err = ext2_commit_chunk(page, 0, chunk_size); fail: page_cache_release(page); @@ -612,14 +671,17 @@ int ext2_empty_dir (struct inode * inode) { struct page *page = NULL; unsigned long i, npages = dir_pages(inode); + int dir_has_error = 0; for (i = 0; i < npages; i++) { char *kaddr; ext2_dirent * de; - page = ext2_get_page(inode, i); + page = ext2_get_page(inode, i, dir_has_error); - if (IS_ERR(page)) + if (IS_ERR(page)) { + dir_has_error = 1; continue; + } kaddr = page_address(page); de = (ext2_dirent *)kaddr; @@ -627,7 +689,7 @@ int ext2_empty_dir (struct inode * inode) while ((char *)de <= kaddr) { if (de->rec_len == 0) { - ext2_error(inode->i_sb, __FUNCTION__, + ext2_error(inode->i_sb, __func__, "zero-length directory entry"); printk("kaddr=%p, de=%p\n", kaddr, de); goto not_empty; @@ -659,7 +721,10 @@ not_empty: const struct file_operations ext2_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .readdir = ext2_readdir, - .ioctl = ext2_ioctl, - .fsync = ext2_sync_file, + .iterate = ext2_readdir, + .unlocked_ioctl = ext2_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext2_compat_ioctl, +#endif + .fsync = ext2_fsync, }; diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index e65a019fc7a..d9a17d0b124 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -1,13 +1,644 @@ +/* + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * from + * + * linux/include/linux/minix_fs.h + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ #include <linux/fs.h> #include <linux/ext2_fs.h> +#include <linux/blockgroup_lock.h> +#include <linux/percpu_counter.h> +#include <linux/rbtree.h> + +/* XXX Here for now... not interested in restructing headers JUST now */ + +/* data type for block offset of block group */ +typedef int ext2_grpblk_t; + +/* data type for filesystem-wide blocks number */ +typedef unsigned long ext2_fsblk_t; + +#define E2FSBLK "%lu" + +struct ext2_reserve_window { + ext2_fsblk_t _rsv_start; /* First byte reserved */ + ext2_fsblk_t _rsv_end; /* Last byte reserved or 0 */ +}; + +struct ext2_reserve_window_node { + struct rb_node rsv_node; + __u32 rsv_goal_size; + __u32 rsv_alloc_hit; + struct ext2_reserve_window rsv_window; +}; + +struct ext2_block_alloc_info { + /* information about reservation window */ + struct ext2_reserve_window_node rsv_window_node; + /* + * was i_next_alloc_block in ext2_inode_info + * is the logical (file-relative) number of the + * most-recently-allocated block in this file. + * We use this for detecting linearly ascending allocation requests. + */ + __u32 last_alloc_logical_block; + /* + * Was i_next_alloc_goal in ext2_inode_info + * is the *physical* companion to i_next_alloc_block. + * it the the physical block number of the block which was most-recentl + * allocated to this file. This give us the goal (target) for the next + * allocation when we detect linearly ascending requests. + */ + ext2_fsblk_t last_alloc_physical_block; +}; + +#define rsv_start rsv_window._rsv_start +#define rsv_end rsv_window._rsv_end + +/* + * second extended-fs super-block data in memory + */ +struct ext2_sb_info { + unsigned long s_frag_size; /* Size of a fragment in bytes */ + unsigned long s_frags_per_block;/* Number of fragments per block */ + unsigned long s_inodes_per_block;/* Number of inodes per block */ + unsigned long s_frags_per_group;/* Number of fragments in a group */ + unsigned long s_blocks_per_group;/* Number of blocks in a group */ + unsigned long s_inodes_per_group;/* Number of inodes in a group */ + unsigned long s_itb_per_group; /* Number of inode table blocks per group */ + unsigned long s_gdb_count; /* Number of group descriptor blocks */ + unsigned long s_desc_per_block; /* Number of group descriptors per block */ + unsigned long s_groups_count; /* Number of groups in the fs */ + unsigned long s_overhead_last; /* Last calculated overhead */ + unsigned long s_blocks_last; /* Last seen block count */ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct ext2_super_block * s_es; /* Pointer to the super block in the buffer */ + struct buffer_head ** s_group_desc; + unsigned long s_mount_opt; + unsigned long s_sb_block; + kuid_t s_resuid; + kgid_t s_resgid; + unsigned short s_mount_state; + unsigned short s_pad; + int s_addr_per_block_bits; + int s_desc_per_block_bits; + int s_inode_size; + int s_first_ino; + spinlock_t s_next_gen_lock; + u32 s_next_generation; + unsigned long s_dir_count; + u8 *s_debts; + struct percpu_counter s_freeblocks_counter; + struct percpu_counter s_freeinodes_counter; + struct percpu_counter s_dirs_counter; + struct blockgroup_lock *s_blockgroup_lock; + /* root of the per fs reservation window tree */ + spinlock_t s_rsv_window_lock; + struct rb_root s_rsv_window_root; + struct ext2_reserve_window_node s_rsv_window_head; + /* + * s_lock protects against concurrent modifications of s_mount_state, + * s_blocks_last, s_overhead_last and the content of superblock's + * buffer pointed to by sbi->s_es. + * + * Note: It is used in ext2_show_options() to provide a consistent view + * of the mount options. + */ + spinlock_t s_lock; +}; + +static inline spinlock_t * +sb_bgl_lock(struct ext2_sb_info *sbi, unsigned int block_group) +{ + return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group); +} + +/* + * Define EXT2FS_DEBUG to produce debug messages + */ +#undef EXT2FS_DEBUG + +/* + * Define EXT2_RESERVATION to reserve data blocks for expanding files + */ +#define EXT2_DEFAULT_RESERVE_BLOCKS 8 +/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */ +#define EXT2_MAX_RESERVE_BLOCKS 1027 +#define EXT2_RESERVE_WINDOW_NOT_ALLOCATED 0 +/* + * The second extended file system version + */ +#define EXT2FS_DATE "95/08/09" +#define EXT2FS_VERSION "0.5b" + +/* + * Debug code + */ +#ifdef EXT2FS_DEBUG +# define ext2_debug(f, a...) { \ + printk ("EXT2-fs DEBUG (%s, %d): %s:", \ + __FILE__, __LINE__, __func__); \ + printk (f, ## a); \ + } +#else +# define ext2_debug(f, a...) /**/ +#endif + +/* + * Special inode numbers + */ +#define EXT2_BAD_INO 1 /* Bad blocks inode */ +#define EXT2_ROOT_INO 2 /* Root inode */ +#define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ +#define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ + +/* First non-reserved inode for old ext2 filesystems */ +#define EXT2_GOOD_OLD_FIRST_INO 11 + +static inline struct ext2_sb_info *EXT2_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +/* + * Macro-instructions used to manage several block sizes + */ +#define EXT2_MIN_BLOCK_SIZE 1024 +#define EXT2_MAX_BLOCK_SIZE 4096 +#define EXT2_MIN_BLOCK_LOG_SIZE 10 +#define EXT2_BLOCK_SIZE(s) ((s)->s_blocksize) +#define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) +#define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) +#define EXT2_ADDR_PER_BLOCK_BITS(s) (EXT2_SB(s)->s_addr_per_block_bits) +#define EXT2_INODE_SIZE(s) (EXT2_SB(s)->s_inode_size) +#define EXT2_FIRST_INO(s) (EXT2_SB(s)->s_first_ino) + +/* + * Macro-instructions used to manage fragments + */ +#define EXT2_MIN_FRAG_SIZE 1024 +#define EXT2_MAX_FRAG_SIZE 4096 +#define EXT2_MIN_FRAG_LOG_SIZE 10 +#define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->s_frag_size) +#define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->s_frags_per_block) + +/* + * Structure of a blocks group descriptor + */ +struct ext2_group_desc +{ + __le32 bg_block_bitmap; /* Blocks bitmap block */ + __le32 bg_inode_bitmap; /* Inodes bitmap block */ + __le32 bg_inode_table; /* Inodes table block */ + __le16 bg_free_blocks_count; /* Free blocks count */ + __le16 bg_free_inodes_count; /* Free inodes count */ + __le16 bg_used_dirs_count; /* Directories count */ + __le16 bg_pad; + __le32 bg_reserved[3]; +}; + +/* + * Macro-instructions used to manage group descriptors + */ +#define EXT2_BLOCKS_PER_GROUP(s) (EXT2_SB(s)->s_blocks_per_group) +#define EXT2_DESC_PER_BLOCK(s) (EXT2_SB(s)->s_desc_per_block) +#define EXT2_INODES_PER_GROUP(s) (EXT2_SB(s)->s_inodes_per_group) +#define EXT2_DESC_PER_BLOCK_BITS(s) (EXT2_SB(s)->s_desc_per_block_bits) + +/* + * Constants relative to the data blocks + */ +#define EXT2_NDIR_BLOCKS 12 +#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS +#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1) +#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1) +#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1) + +/* + * Inode flags (GETFLAGS/SETFLAGS) + */ +#define EXT2_SECRM_FL FS_SECRM_FL /* Secure deletion */ +#define EXT2_UNRM_FL FS_UNRM_FL /* Undelete */ +#define EXT2_COMPR_FL FS_COMPR_FL /* Compress file */ +#define EXT2_SYNC_FL FS_SYNC_FL /* Synchronous updates */ +#define EXT2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */ +#define EXT2_APPEND_FL FS_APPEND_FL /* writes to file may only append */ +#define EXT2_NODUMP_FL FS_NODUMP_FL /* do not dump file */ +#define EXT2_NOATIME_FL FS_NOATIME_FL /* do not update atime */ +/* Reserved for compression usage... */ +#define EXT2_DIRTY_FL FS_DIRTY_FL +#define EXT2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */ +#define EXT2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */ +#define EXT2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */ +/* End compression flags --- maybe not all used */ +#define EXT2_BTREE_FL FS_BTREE_FL /* btree format dir */ +#define EXT2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */ +#define EXT2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */ +#define EXT2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */ +#define EXT2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */ +#define EXT2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */ +#define EXT2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/ +#define EXT2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */ + +#define EXT2_FL_USER_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ +#define EXT2_FL_USER_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ + +/* Flags that should be inherited by new inodes from their parent. */ +#define EXT2_FL_INHERITED (EXT2_SECRM_FL | EXT2_UNRM_FL | EXT2_COMPR_FL |\ + EXT2_SYNC_FL | EXT2_NODUMP_FL |\ + EXT2_NOATIME_FL | EXT2_COMPRBLK_FL |\ + EXT2_NOCOMP_FL | EXT2_JOURNAL_DATA_FL |\ + EXT2_NOTAIL_FL | EXT2_DIRSYNC_FL) + +/* Flags that are appropriate for regular files (all but dir-specific ones). */ +#define EXT2_REG_FLMASK (~(EXT2_DIRSYNC_FL | EXT2_TOPDIR_FL)) + +/* Flags that are appropriate for non-directories/regular files. */ +#define EXT2_OTHER_FLMASK (EXT2_NODUMP_FL | EXT2_NOATIME_FL) + +/* Mask out flags that are inappropriate for the given type of inode. */ +static inline __u32 ext2_mask_flags(umode_t mode, __u32 flags) +{ + if (S_ISDIR(mode)) + return flags; + else if (S_ISREG(mode)) + return flags & EXT2_REG_FLMASK; + else + return flags & EXT2_OTHER_FLMASK; +} + +/* + * ioctl commands + */ +#define EXT2_IOC_GETFLAGS FS_IOC_GETFLAGS +#define EXT2_IOC_SETFLAGS FS_IOC_SETFLAGS +#define EXT2_IOC_GETVERSION FS_IOC_GETVERSION +#define EXT2_IOC_SETVERSION FS_IOC_SETVERSION +#define EXT2_IOC_GETRSVSZ _IOR('f', 5, long) +#define EXT2_IOC_SETRSVSZ _IOW('f', 6, long) + +/* + * ioctl commands in 32 bit emulation + */ +#define EXT2_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define EXT2_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define EXT2_IOC32_GETVERSION FS_IOC32_GETVERSION +#define EXT2_IOC32_SETVERSION FS_IOC32_SETVERSION + +/* + * Structure of an inode on the disk + */ +struct ext2_inode { + __le16 i_mode; /* File mode */ + __le16 i_uid; /* Low 16 bits of Owner Uid */ + __le32 i_size; /* Size in bytes */ + __le32 i_atime; /* Access time */ + __le32 i_ctime; /* Creation time */ + __le32 i_mtime; /* Modification time */ + __le32 i_dtime; /* Deletion Time */ + __le16 i_gid; /* Low 16 bits of Group Id */ + __le16 i_links_count; /* Links count */ + __le32 i_blocks; /* Blocks count */ + __le32 i_flags; /* File flags */ + union { + struct { + __le32 l_i_reserved1; + } linux1; + struct { + __le32 h_i_translator; + } hurd1; + struct { + __le32 m_i_reserved1; + } masix1; + } osd1; /* OS dependent 1 */ + __le32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */ + __le32 i_generation; /* File version (for NFS) */ + __le32 i_file_acl; /* File ACL */ + __le32 i_dir_acl; /* Directory ACL */ + __le32 i_faddr; /* Fragment address */ + union { + struct { + __u8 l_i_frag; /* Fragment number */ + __u8 l_i_fsize; /* Fragment size */ + __u16 i_pad1; + __le16 l_i_uid_high; /* these 2 fields */ + __le16 l_i_gid_high; /* were reserved2[0] */ + __u32 l_i_reserved2; + } linux2; + struct { + __u8 h_i_frag; /* Fragment number */ + __u8 h_i_fsize; /* Fragment size */ + __le16 h_i_mode_high; + __le16 h_i_uid_high; + __le16 h_i_gid_high; + __le32 h_i_author; + } hurd2; + struct { + __u8 m_i_frag; /* Fragment number */ + __u8 m_i_fsize; /* Fragment size */ + __u16 m_pad1; + __u32 m_i_reserved2[2]; + } masix2; + } osd2; /* OS dependent 2 */ +}; + +#define i_size_high i_dir_acl + +#define i_reserved1 osd1.linux1.l_i_reserved1 +#define i_frag osd2.linux2.l_i_frag +#define i_fsize osd2.linux2.l_i_fsize +#define i_uid_low i_uid +#define i_gid_low i_gid +#define i_uid_high osd2.linux2.l_i_uid_high +#define i_gid_high osd2.linux2.l_i_gid_high +#define i_reserved2 osd2.linux2.l_i_reserved2 + +/* + * File system states + */ +#define EXT2_VALID_FS 0x0001 /* Unmounted cleanly */ +#define EXT2_ERROR_FS 0x0002 /* Errors detected */ + +/* + * Mount flags + */ +#define EXT2_MOUNT_CHECK 0x000001 /* Do mount-time checks */ +#define EXT2_MOUNT_OLDALLOC 0x000002 /* Don't use the new Orlov allocator */ +#define EXT2_MOUNT_GRPID 0x000004 /* Create files with directory's group */ +#define EXT2_MOUNT_DEBUG 0x000008 /* Some debugging messages */ +#define EXT2_MOUNT_ERRORS_CONT 0x000010 /* Continue on errors */ +#define EXT2_MOUNT_ERRORS_RO 0x000020 /* Remount fs ro on errors */ +#define EXT2_MOUNT_ERRORS_PANIC 0x000040 /* Panic on errors */ +#define EXT2_MOUNT_MINIX_DF 0x000080 /* Mimics the Minix statfs */ +#define EXT2_MOUNT_NOBH 0x000100 /* No buffer_heads */ +#define EXT2_MOUNT_NO_UID32 0x000200 /* Disable 32-bit UIDs */ +#define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */ +#define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */ +#define EXT2_MOUNT_XIP 0x010000 /* Execute in place */ +#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ +#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ +#define EXT2_MOUNT_RESERVATION 0x080000 /* Preallocation */ + + +#define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt +#define set_opt(o, opt) o |= EXT2_MOUNT_##opt +#define test_opt(sb, opt) (EXT2_SB(sb)->s_mount_opt & \ + EXT2_MOUNT_##opt) +/* + * Maximal mount counts between two filesystem checks + */ +#define EXT2_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ +#define EXT2_DFL_CHECKINTERVAL 0 /* Don't use interval check */ + +/* + * Behaviour when detecting errors + */ +#define EXT2_ERRORS_CONTINUE 1 /* Continue execution */ +#define EXT2_ERRORS_RO 2 /* Remount fs read-only */ +#define EXT2_ERRORS_PANIC 3 /* Panic */ +#define EXT2_ERRORS_DEFAULT EXT2_ERRORS_CONTINUE + +/* + * Structure of the super block + */ +struct ext2_super_block { + __le32 s_inodes_count; /* Inodes count */ + __le32 s_blocks_count; /* Blocks count */ + __le32 s_r_blocks_count; /* Reserved blocks count */ + __le32 s_free_blocks_count; /* Free blocks count */ + __le32 s_free_inodes_count; /* Free inodes count */ + __le32 s_first_data_block; /* First Data Block */ + __le32 s_log_block_size; /* Block size */ + __le32 s_log_frag_size; /* Fragment size */ + __le32 s_blocks_per_group; /* # Blocks per group */ + __le32 s_frags_per_group; /* # Fragments per group */ + __le32 s_inodes_per_group; /* # Inodes per group */ + __le32 s_mtime; /* Mount time */ + __le32 s_wtime; /* Write time */ + __le16 s_mnt_count; /* Mount count */ + __le16 s_max_mnt_count; /* Maximal mount count */ + __le16 s_magic; /* Magic signature */ + __le16 s_state; /* File system state */ + __le16 s_errors; /* Behaviour when detecting errors */ + __le16 s_minor_rev_level; /* minor revision level */ + __le32 s_lastcheck; /* time of last check */ + __le32 s_checkinterval; /* max. time between checks */ + __le32 s_creator_os; /* OS */ + __le32 s_rev_level; /* Revision level */ + __le16 s_def_resuid; /* Default uid for reserved blocks */ + __le16 s_def_resgid; /* Default gid for reserved blocks */ + /* + * These fields are for EXT2_DYNAMIC_REV superblocks only. + * + * Note: the difference between the compatible feature set and + * the incompatible feature set is that if there is a bit set + * in the incompatible feature set that the kernel doesn't + * know about, it should refuse to mount the filesystem. + * + * e2fsck's requirements are more strict; if it doesn't know + * about a feature in either the compatible or incompatible + * feature set, it must abort and not try to meddle with + * things it doesn't understand... + */ + __le32 s_first_ino; /* First non-reserved inode */ + __le16 s_inode_size; /* size of inode structure */ + __le16 s_block_group_nr; /* block group # of this superblock */ + __le32 s_feature_compat; /* compatible feature set */ + __le32 s_feature_incompat; /* incompatible feature set */ + __le32 s_feature_ro_compat; /* readonly-compatible feature set */ + __u8 s_uuid[16]; /* 128-bit uuid for volume */ + char s_volume_name[16]; /* volume name */ + char s_last_mounted[64]; /* directory where last mounted */ + __le32 s_algorithm_usage_bitmap; /* For compression */ + /* + * Performance hints. Directory preallocation should only + * happen if the EXT2_COMPAT_PREALLOC flag is on. + */ + __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ + __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ + __u16 s_padding1; + /* + * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set. + */ + __u8 s_journal_uuid[16]; /* uuid of journal superblock */ + __u32 s_journal_inum; /* inode number of journal file */ + __u32 s_journal_dev; /* device number of journal file */ + __u32 s_last_orphan; /* start of list of inodes to delete */ + __u32 s_hash_seed[4]; /* HTREE hash seed */ + __u8 s_def_hash_version; /* Default hash version to use */ + __u8 s_reserved_char_pad; + __u16 s_reserved_word_pad; + __le32 s_default_mount_opts; + __le32 s_first_meta_bg; /* First metablock block group */ + __u32 s_reserved[190]; /* Padding to the end of the block */ +}; + +/* + * Codes for operating systems + */ +#define EXT2_OS_LINUX 0 +#define EXT2_OS_HURD 1 +#define EXT2_OS_MASIX 2 +#define EXT2_OS_FREEBSD 3 +#define EXT2_OS_LITES 4 + +/* + * Revision levels + */ +#define EXT2_GOOD_OLD_REV 0 /* The good old (original) format */ +#define EXT2_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ + +#define EXT2_CURRENT_REV EXT2_GOOD_OLD_REV +#define EXT2_MAX_SUPP_REV EXT2_DYNAMIC_REV + +#define EXT2_GOOD_OLD_INODE_SIZE 128 + +/* + * Feature set definitions + */ + +#define EXT2_HAS_COMPAT_FEATURE(sb,mask) \ + ( EXT2_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) +#define EXT2_HAS_RO_COMPAT_FEATURE(sb,mask) \ + ( EXT2_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) +#define EXT2_HAS_INCOMPAT_FEATURE(sb,mask) \ + ( EXT2_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) +#define EXT2_SET_COMPAT_FEATURE(sb,mask) \ + EXT2_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) +#define EXT2_SET_RO_COMPAT_FEATURE(sb,mask) \ + EXT2_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask) +#define EXT2_SET_INCOMPAT_FEATURE(sb,mask) \ + EXT2_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask) +#define EXT2_CLEAR_COMPAT_FEATURE(sb,mask) \ + EXT2_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask) +#define EXT2_CLEAR_RO_COMPAT_FEATURE(sb,mask) \ + EXT2_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask) +#define EXT2_CLEAR_INCOMPAT_FEATURE(sb,mask) \ + EXT2_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask) + +#define EXT2_FEATURE_COMPAT_DIR_PREALLOC 0x0001 +#define EXT2_FEATURE_COMPAT_IMAGIC_INODES 0x0002 +#define EXT3_FEATURE_COMPAT_HAS_JOURNAL 0x0004 +#define EXT2_FEATURE_COMPAT_EXT_ATTR 0x0008 +#define EXT2_FEATURE_COMPAT_RESIZE_INO 0x0010 +#define EXT2_FEATURE_COMPAT_DIR_INDEX 0x0020 +#define EXT2_FEATURE_COMPAT_ANY 0xffffffff + +#define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 +#define EXT2_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 +#define EXT2_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT2_FEATURE_RO_COMPAT_ANY 0xffffffff + +#define EXT2_FEATURE_INCOMPAT_COMPRESSION 0x0001 +#define EXT2_FEATURE_INCOMPAT_FILETYPE 0x0002 +#define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 +#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 +#define EXT2_FEATURE_INCOMPAT_META_BG 0x0010 +#define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff + +#define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR +#define EXT2_FEATURE_INCOMPAT_SUPP (EXT2_FEATURE_INCOMPAT_FILETYPE| \ + EXT2_FEATURE_INCOMPAT_META_BG) +#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT2_FEATURE_RO_COMPAT_BTREE_DIR) +#define EXT2_FEATURE_RO_COMPAT_UNSUPPORTED ~EXT2_FEATURE_RO_COMPAT_SUPP +#define EXT2_FEATURE_INCOMPAT_UNSUPPORTED ~EXT2_FEATURE_INCOMPAT_SUPP + +/* + * Default values for user and/or group using reserved blocks + */ +#define EXT2_DEF_RESUID 0 +#define EXT2_DEF_RESGID 0 + +/* + * Default mount options + */ +#define EXT2_DEFM_DEBUG 0x0001 +#define EXT2_DEFM_BSDGROUPS 0x0002 +#define EXT2_DEFM_XATTR_USER 0x0004 +#define EXT2_DEFM_ACL 0x0008 +#define EXT2_DEFM_UID16 0x0010 + /* Not used by ext2, but reserved for use by ext3 */ +#define EXT3_DEFM_JMODE 0x0060 +#define EXT3_DEFM_JMODE_DATA 0x0020 +#define EXT3_DEFM_JMODE_ORDERED 0x0040 +#define EXT3_DEFM_JMODE_WBACK 0x0060 + +/* + * Structure of a directory entry + */ + +struct ext2_dir_entry { + __le32 inode; /* Inode number */ + __le16 rec_len; /* Directory entry length */ + __le16 name_len; /* Name length */ + char name[]; /* File name, up to EXT2_NAME_LEN */ +}; + +/* + * The new version of the directory entry. Since EXT2 structures are + * stored in intel byte order, and the name_len field could never be + * bigger than 255 chars, it's safe to reclaim the extra byte for the + * file_type field. + */ +struct ext2_dir_entry_2 { + __le32 inode; /* Inode number */ + __le16 rec_len; /* Directory entry length */ + __u8 name_len; /* Name length */ + __u8 file_type; + char name[]; /* File name, up to EXT2_NAME_LEN */ +}; + +/* + * Ext2 directory file types. Only the low 3 bits are used. The + * other bits are reserved for now. + */ +enum { + EXT2_FT_UNKNOWN = 0, + EXT2_FT_REG_FILE = 1, + EXT2_FT_DIR = 2, + EXT2_FT_CHRDEV = 3, + EXT2_FT_BLKDEV = 4, + EXT2_FT_FIFO = 5, + EXT2_FT_SOCK = 6, + EXT2_FT_SYMLINK = 7, + EXT2_FT_MAX +}; + +/* + * EXT2_DIR_PAD defines the directory entries boundaries + * + * NOTE: It must be a multiple of 4 + */ +#define EXT2_DIR_PAD 4 +#define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1) +#define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \ + ~EXT2_DIR_ROUND) +#define EXT2_MAX_REC_LEN ((1<<16)-1) + +static inline void verify_offsets(void) +{ +#define A(x,y) BUILD_BUG_ON(x != offsetof(struct ext2_super_block, y)); + A(EXT2_SB_MAGIC_OFFSET, s_magic); + A(EXT2_SB_BLOCKS_OFFSET, s_blocks_count); + A(EXT2_SB_BSIZE_OFFSET, s_log_block_size); +#undef A +} /* * ext2 mount options */ struct ext2_mount_options { unsigned long s_mount_opt; - uid_t s_resuid; - gid_t s_resgid; + kuid_t s_resuid; + kgid_t s_resgid; }; /* @@ -27,28 +658,15 @@ struct ext2_inode_info { /* * i_block_group is the number of the block group which contains * this file's inode. Constant across the lifetime of the inode, - * it is ued for making block allocation decisions - we try to + * it is used for making block allocation decisions - we try to * place a file's data blocks near its inode block, and new inodes * near to their parent directory's inode. */ __u32 i_block_group; - /* - * i_next_alloc_block is the logical (file-relative) number of the - * most-recently-allocated block in this file. Yes, it is misnamed. - * We use this for detecting linearly ascending allocation requests. - */ - __u32 i_next_alloc_block; + /* block reservation info */ + struct ext2_block_alloc_info *i_block_alloc_info; - /* - * i_next_alloc_goal is the *physical* companion to i_next_alloc_block. - * it the the physical block number of the block which was most-recently - * allocated to this file. This give us the goal (target) for the next - * allocation when we detect linearly ascending requests. - */ - __u32 i_next_alloc_goal; - __u32 i_prealloc_block; - __u32 i_prealloc_count; __u32 i_dir_start_lookup; #ifdef CONFIG_EXT2_FS_XATTR /* @@ -60,12 +678,17 @@ struct ext2_inode_info { */ struct rw_semaphore xattr_sem; #endif -#ifdef CONFIG_EXT2_FS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif rwlock_t i_meta_lock; + + /* + * truncate_mutex is for serialising ext2_truncate() against + * ext2_getblock(). It also protects the internals of the inode's + * reservation data structures: ext2_reserve_window and + * ext2_reserve_window_node. + */ + struct mutex truncate_mutex; struct inode vfs_inode; + struct list_head i_orphan; /* unlinked but open inodes */ }; /* @@ -91,8 +714,9 @@ static inline struct ext2_inode_info *EXT2_I(struct inode *inode) /* balloc.c */ extern int ext2_bg_has_super(struct super_block *sb, int group); extern unsigned long ext2_bg_num_gdb(struct super_block *sb, int group); -extern int ext2_new_block (struct inode *, unsigned long, - __u32 *, __u32 *, int *); +extern ext2_fsblk_t ext2_new_block(struct inode *, unsigned long, int *); +extern ext2_fsblk_t ext2_new_blocks(struct inode *, unsigned long, + unsigned long *, int *); extern void ext2_free_blocks (struct inode *, unsigned long, unsigned long); extern unsigned long ext2_count_free_blocks (struct super_block *); @@ -101,51 +725,51 @@ extern void ext2_check_blocks_bitmap (struct super_block *); extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, unsigned int block_group, struct buffer_head ** bh); +extern void ext2_discard_reservation (struct inode *); +extern int ext2_should_retry_alloc(struct super_block *sb, int *retries); +extern void ext2_init_block_alloc_info(struct inode *); +extern void ext2_rsv_window_add(struct super_block *sb, struct ext2_reserve_window_node *rsv); /* dir.c */ extern int ext2_add_link (struct dentry *, struct inode *); -extern ino_t ext2_inode_by_name(struct inode *, struct dentry *); +extern ino_t ext2_inode_by_name(struct inode *, struct qstr *); extern int ext2_make_empty(struct inode *, struct inode *); -extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct dentry *, struct page **); +extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *, struct page **); extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); extern int ext2_empty_dir (struct inode *); extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); -extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *); - -/* fsync.c */ -extern int ext2_sync_file (struct file *, struct dentry *, int); +extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int); /* ialloc.c */ -extern struct inode * ext2_new_inode (struct inode *, int); +extern struct inode * ext2_new_inode (struct inode *, umode_t, const struct qstr *); extern void ext2_free_inode (struct inode *); extern unsigned long ext2_count_free_inodes (struct super_block *); extern void ext2_check_inodes_bitmap (struct super_block *); extern unsigned long ext2_count_free (struct buffer_head *, unsigned); /* inode.c */ -extern void ext2_read_inode (struct inode *); -extern int ext2_write_inode (struct inode *, int); -extern void ext2_put_inode (struct inode *); -extern void ext2_delete_inode (struct inode *); -extern int ext2_sync_inode (struct inode *); -extern void ext2_discard_prealloc (struct inode *); +extern struct inode *ext2_iget (struct super_block *, unsigned long); +extern int ext2_write_inode (struct inode *, struct writeback_control *); +extern void ext2_evict_inode(struct inode *); extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); -extern void ext2_truncate (struct inode *); extern int ext2_setattr (struct dentry *, struct iattr *); extern void ext2_set_inode_flags(struct inode *inode); +extern void ext2_get_inode_flags(struct ext2_inode_info *); +extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len); /* ioctl.c */ -extern int ext2_ioctl (struct inode *, struct file *, unsigned int, - unsigned long); +extern long ext2_ioctl(struct file *, unsigned int, unsigned long); +extern long ext2_compat_ioctl(struct file *, unsigned int, unsigned long); /* namei.c */ struct dentry *ext2_get_parent(struct dentry *child); /* super.c */ -extern void ext2_error (struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); -extern void ext2_warning (struct super_block *, const char *, const char *, ...) - __attribute__ ((format (printf, 3, 4))); +extern __printf(3, 4) +void ext2_error(struct super_block *, const char *, const char *, ...); +extern __printf(3, 4) +void ext2_msg(struct super_block *, const char *, const char *, ...); extern void ext2_update_dynamic_rev (struct super_block *sb); extern void ext2_write_super (struct super_block *); @@ -157,7 +781,9 @@ extern void ext2_write_super (struct super_block *); extern const struct file_operations ext2_dir_operations; /* file.c */ -extern struct inode_operations ext2_file_inode_operations; +extern int ext2_fsync(struct file *file, loff_t start, loff_t end, + int datasync); +extern const struct inode_operations ext2_file_inode_operations; extern const struct file_operations ext2_file_operations; extern const struct file_operations ext2_xip_file_operations; @@ -167,9 +793,22 @@ extern const struct address_space_operations ext2_aops_xip; extern const struct address_space_operations ext2_nobh_aops; /* namei.c */ -extern struct inode_operations ext2_dir_inode_operations; -extern struct inode_operations ext2_special_inode_operations; +extern const struct inode_operations ext2_dir_inode_operations; +extern const struct inode_operations ext2_special_inode_operations; /* symlink.c */ -extern struct inode_operations ext2_fast_symlink_inode_operations; -extern struct inode_operations ext2_symlink_inode_operations; +extern const struct inode_operations ext2_fast_symlink_inode_operations; +extern const struct inode_operations ext2_symlink_inode_operations; + +static inline ext2_fsblk_t +ext2_group_first_block_no(struct super_block *sb, unsigned long group_no) +{ + return group_no * (ext2_fsblk_t)EXT2_BLOCKS_PER_GROUP(sb) + + le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block); +} + +#define ext2_set_bit __test_and_set_bit_le +#define ext2_clear_bit __test_and_clear_bit_le +#define ext2_test_bit test_bit_le +#define ext2_find_first_zero_bit find_first_zero_bit_le +#define ext2_find_next_zero_bit find_next_zero_bit_le diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 23e2c7ccec1..7c87b22a722 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -19,42 +19,63 @@ */ #include <linux/time.h> +#include <linux/pagemap.h> +#include <linux/quotaops.h> #include "ext2.h" #include "xattr.h" #include "acl.h" /* - * Called when an inode is released. Note that this is different - * from ext2_open_file: open gets called at every open, but release - * gets called only when /all/ the files are closed. + * Called when filp is released. This happens when all file descriptors + * for a single struct file are closed. Note that different open() calls + * for the same file yield different struct file structures. */ static int ext2_release_file (struct inode * inode, struct file * filp) { - if (filp->f_mode & FMODE_WRITE) - ext2_discard_prealloc (inode); + if (filp->f_mode & FMODE_WRITE) { + mutex_lock(&EXT2_I(inode)->truncate_mutex); + ext2_discard_reservation(inode); + mutex_unlock(&EXT2_I(inode)->truncate_mutex); + } return 0; } +int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + int ret; + struct super_block *sb = file->f_mapping->host->i_sb; + struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; + + ret = generic_file_fsync(file, start, end, datasync); + if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) { + /* We don't really know where the IO error happened... */ + ext2_error(sb, __func__, + "detected IO error when writing metadata buffers"); + ret = -EIO; + } + return ret; +} + /* * We have mostly NULL's here: the current defaults are ok for * the ext2 filesystem. */ const struct file_operations ext2_file_operations = { .llseek = generic_file_llseek, - .read = generic_file_read, - .write = generic_file_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, - .ioctl = ext2_ioctl, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, + .unlocked_ioctl = ext2_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext2_compat_ioctl, +#endif .mmap = generic_file_mmap, - .open = generic_file_open, + .open = dquot_file_open, .release = ext2_release_file, - .fsync = ext2_sync_file, - .readv = generic_file_readv, - .writev = generic_file_writev, - .sendfile = generic_file_sendfile, + .fsync = ext2_fsync, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, }; #ifdef CONFIG_EXT2_FS_XIP @@ -62,17 +83,18 @@ const struct file_operations ext2_xip_file_operations = { .llseek = generic_file_llseek, .read = xip_file_read, .write = xip_file_write, - .ioctl = ext2_ioctl, + .unlocked_ioctl = ext2_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext2_compat_ioctl, +#endif .mmap = xip_file_mmap, - .open = generic_file_open, + .open = dquot_file_open, .release = ext2_release_file, - .fsync = ext2_sync_file, - .sendfile = xip_file_sendfile, + .fsync = ext2_fsync, }; #endif -struct inode_operations ext2_file_inode_operations = { - .truncate = ext2_truncate, +const struct inode_operations ext2_file_inode_operations = { #ifdef CONFIG_EXT2_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, @@ -80,5 +102,7 @@ struct inode_operations ext2_file_inode_operations = { .removexattr = generic_removexattr, #endif .setattr = ext2_setattr, - .permission = ext2_permission, + .get_acl = ext2_get_acl, + .set_acl = ext2_set_acl, + .fiemap = ext2_fiemap, }; diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c deleted file mode 100644 index 7806b9e8155..00000000000 --- a/fs/ext2/fsync.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * linux/fs/ext2/fsync.c - * - * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk) - * from - * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * from - * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds - * - * ext2fs fsync primitive - * - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * - * Removed unnecessary code duplication for little endian machines - * and excessive __inline__s. - * Andi Kleen, 1997 - * - * Major simplications and cleanup - we only need to do the metadata, because - * we can depend on generic_block_fdatasync() to sync the data blocks. - */ - -#include "ext2.h" -#include <linux/smp_lock.h> -#include <linux/buffer_head.h> /* for sync_mapping_buffers() */ - - -/* - * File may be NULL when we are called. Perhaps we shouldn't - * even pass file to fsync ? - */ - -int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync) -{ - struct inode *inode = dentry->d_inode; - int err; - int ret; - - ret = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) - return ret; - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) - return ret; - - err = ext2_sync_inode(inode); - if (ret == 0) - ret = err; - return ret; -} diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 2cb545bf0f3..7d66fb0e4cc 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -75,15 +75,12 @@ static void ext2_release_inode(struct super_block *sb, int group, int dir) } spin_lock(sb_bgl_lock(EXT2_SB(sb), group)); - desc->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1); + le16_add_cpu(&desc->bg_free_inodes_count, 1); if (dir) - desc->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1); + le16_add_cpu(&desc->bg_used_dirs_count, -1); spin_unlock(sb_bgl_lock(EXT2_SB(sb), group)); if (dir) percpu_counter_dec(&EXT2_SB(sb)->s_dirs_counter); - sb->s_dirt = 1; mark_buffer_dirty(bh); } @@ -108,7 +105,7 @@ void ext2_free_inode (struct inode * inode) struct super_block * sb = inode->i_sb; int is_directory; unsigned long ino; - struct buffer_head *bitmap_bh = NULL; + struct buffer_head *bitmap_bh; unsigned long block_group; unsigned long bit; struct ext2_super_block * es; @@ -120,31 +117,24 @@ void ext2_free_inode (struct inode * inode) * Note: we must free any quota before locking the superblock, * as writing the quota to disk may need the lock as well. */ - if (!is_bad_inode(inode)) { - /* Quota is already initialized in iput() */ - ext2_xattr_delete_inode(inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - } + /* Quota is already initialized in iput() */ + dquot_free_inode(inode); + dquot_drop(inode); es = EXT2_SB(sb)->s_es; is_directory = S_ISDIR(inode->i_mode); - /* Do this BEFORE marking the inode not in use or returning an error */ - clear_inode (inode); - if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { ext2_error (sb, "ext2_free_inode", "reserved or nonexistent inode %lu", ino); - goto error_return; + return; } block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb); - brelse(bitmap_bh); bitmap_bh = read_inode_bitmap(sb, block_group); if (!bitmap_bh) - goto error_return; + return; /* Ok, now we can actually update the inode bitmaps.. */ if (!ext2_clear_bit_atomic(sb_bgl_lock(EXT2_SB(sb), block_group), @@ -156,7 +146,7 @@ void ext2_free_inode (struct inode * inode) mark_buffer_dirty(bitmap_bh); if (sb->s_flags & MS_SYNCHRONOUS) sync_dirty_buffer(bitmap_bh); -error_return: + brelse(bitmap_bh); } @@ -177,7 +167,6 @@ static void ext2_preread_inode(struct inode *inode) unsigned long block_group; unsigned long offset; unsigned long block; - struct buffer_head *bh; struct ext2_group_desc * gdp; struct backing_dev_info *bdi; @@ -188,7 +177,7 @@ static void ext2_preread_inode(struct inode *inode) return; block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); - gdp = ext2_get_group_desc(inode->i_sb, block_group, &bh); + gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL); if (gdp == NULL) return; @@ -217,11 +206,10 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) int ngroups = EXT2_SB(sb)->s_groups_count; int avefreei = ext2_count_free_inodes(sb) / ngroups; struct ext2_group_desc *desc, *best_desc = NULL; - struct buffer_head *bh, *best_bh = NULL; int group, best_group = -1; for (group = 0; group < ngroups; group++) { - desc = ext2_get_group_desc (sb, group, &bh); + desc = ext2_get_group_desc (sb, group, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) @@ -231,7 +219,6 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) le16_to_cpu(best_desc->bg_free_blocks_count))) { best_group = group; best_desc = desc; - best_bh = bh; } } if (!best_desc) @@ -256,7 +243,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent) * it has too few free inodes left (min_inodes) or * it has too few free blocks left (min_blocks) or * it's already running too large debt (max_debt). - * Parent's group is prefered, if it doesn't satisfy these + * Parent's group is preferred, if it doesn't satisfy these * conditions we search cyclically through the rest. If none * of the groups look good we just look for a group with more * free inodes than average (starting at parent's group). @@ -284,7 +271,6 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) int max_debt, max_dirs, min_blocks, min_inodes; int group = -1, i; struct ext2_group_desc *desc; - struct buffer_head *bh; freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); avefreei = freei / ngroups; @@ -295,15 +281,14 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) if ((parent == sb->s_root->d_inode) || (EXT2_I(parent)->i_flags & EXT2_TOPDIR_FL)) { struct ext2_group_desc *best_desc = NULL; - struct buffer_head *best_bh = NULL; int best_ndir = inodes_per_group; int best_group = -1; - get_random_bytes(&group, sizeof(group)); + group = prandom_u32(); parent_group = (unsigned)group % ngroups; for (i = 0; i < ngroups; i++) { group = (parent_group + i) % ngroups; - desc = ext2_get_group_desc (sb, group, &bh); + desc = ext2_get_group_desc (sb, group, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) @@ -315,11 +300,9 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) best_group = group; best_ndir = le16_to_cpu(desc->bg_used_dirs_count); best_desc = desc; - best_bh = bh; } if (best_group >= 0) { desc = best_desc; - bh = best_bh; group = best_group; goto found; } @@ -345,7 +328,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) for (i = 0; i < ngroups; i++) { group = (parent_group + i) % ngroups; - desc = ext2_get_group_desc (sb, group, &bh); + desc = ext2_get_group_desc (sb, group, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (sbi->s_debts[group] >= max_debt) @@ -362,7 +345,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) fallback: for (i = 0; i < ngroups; i++) { group = (parent_group + i) % ngroups; - desc = ext2_get_group_desc (sb, group, &bh); + desc = ext2_get_group_desc (sb, group, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) @@ -389,14 +372,13 @@ static int find_group_other(struct super_block *sb, struct inode *parent) int parent_group = EXT2_I(parent)->i_block_group; int ngroups = EXT2_SB(sb)->s_groups_count; struct ext2_group_desc *desc; - struct buffer_head *bh; int group, i; /* * Try to place the inode in its parent directory */ group = parent_group; - desc = ext2_get_group_desc (sb, group, &bh); + desc = ext2_get_group_desc (sb, group, NULL); if (desc && le16_to_cpu(desc->bg_free_inodes_count) && le16_to_cpu(desc->bg_free_blocks_count)) goto found; @@ -420,7 +402,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent) group += i; if (group >= ngroups) group -= ngroups; - desc = ext2_get_group_desc (sb, group, &bh); + desc = ext2_get_group_desc (sb, group, NULL); if (desc && le16_to_cpu(desc->bg_free_inodes_count) && le16_to_cpu(desc->bg_free_blocks_count)) goto found; @@ -434,7 +416,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent) for (i = 0; i < ngroups; i++) { if (++group >= ngroups) group = 0; - desc = ext2_get_group_desc (sb, group, &bh); + desc = ext2_get_group_desc (sb, group, NULL); if (desc && le16_to_cpu(desc->bg_free_inodes_count)) goto found; } @@ -445,7 +427,8 @@ found: return group; } -struct inode *ext2_new_inode(struct inode *dir, int mode) +struct inode *ext2_new_inode(struct inode *dir, umode_t mode, + const struct qstr *qstr) { struct super_block *sb; struct buffer_head *bitmap_bh = NULL; @@ -542,76 +525,68 @@ got: goto fail; } - percpu_counter_mod(&sbi->s_freeinodes_counter, -1); + percpu_counter_add(&sbi->s_freeinodes_counter, -1); if (S_ISDIR(mode)) percpu_counter_inc(&sbi->s_dirs_counter); spin_lock(sb_bgl_lock(sbi, group)); - gdp->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); + le16_add_cpu(&gdp->bg_free_inodes_count, -1); if (S_ISDIR(mode)) { if (sbi->s_debts[group] < 255) sbi->s_debts[group]++; - gdp->bg_used_dirs_count = - cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); + le16_add_cpu(&gdp->bg_used_dirs_count, 1); } else { if (sbi->s_debts[group]) sbi->s_debts[group]--; } spin_unlock(sb_bgl_lock(sbi, group)); - sb->s_dirt = 1; mark_buffer_dirty(bh2); - inode->i_uid = current->fsuid; - if (test_opt (sb, GRPID)) + if (test_opt(sb, GRPID)) { + inode->i_mode = mode; + inode->i_uid = current_fsuid(); inode->i_gid = dir->i_gid; - else if (dir->i_mode & S_ISGID) { - inode->i_gid = dir->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; } else - inode->i_gid = current->fsgid; - inode->i_mode = mode; + inode_init_owner(inode, dir, mode); inode->i_ino = ino; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; memset(ei->i_data, 0, sizeof(ei->i_data)); - ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL; - if (S_ISLNK(mode)) - ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL); - /* dirsync is only applied to directories */ - if (!S_ISDIR(mode)) - ei->i_flags &= ~EXT2_DIRSYNC_FL; + ei->i_flags = + ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED); ei->i_faddr = 0; ei->i_frag_no = 0; ei->i_frag_size = 0; ei->i_file_acl = 0; ei->i_dir_acl = 0; ei->i_dtime = 0; + ei->i_block_alloc_info = NULL; ei->i_block_group = group; - ei->i_next_alloc_block = 0; - ei->i_next_alloc_goal = 0; - ei->i_prealloc_block = 0; - ei->i_prealloc_count = 0; ei->i_dir_start_lookup = 0; ei->i_state = EXT2_STATE_NEW; ext2_set_inode_flags(inode); spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); - insert_inode_hash(inode); + if (insert_inode_locked(inode) < 0) { + ext2_error(sb, "ext2_new_inode", + "inode number already in use - inode=%lu", + (unsigned long) ino); + err = -EIO; + goto fail; + } - if (DQUOT_ALLOC_INODE(inode)) { - err = -EDQUOT; + dquot_initialize(inode); + err = dquot_alloc_inode(inode); + if (err) goto fail_drop; - } err = ext2_init_acl(inode, dir); if (err) goto fail_free_drop; - err = ext2_init_security(inode,dir); + err = ext2_init_security(inode, dir, qstr); if (err) goto fail_free_drop; @@ -621,12 +596,13 @@ got: return inode; fail_free_drop: - DQUOT_FREE_INODE(inode); + dquot_free_inode(inode); fail_drop: - DQUOT_DROP(inode); + dquot_drop(inode); inode->i_flags |= S_NOQUOTA; - inode->i_nlink = 0; + clear_nlink(inode); + unlock_new_inode(inode); iput(inode); return ERR_PTR(err); @@ -667,6 +643,7 @@ unsigned long ext2_count_free_inodes (struct super_block * sb) } brelse(bitmap_bh); printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n", + (unsigned long) percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter), desc_count, bitmap_count); return desc_count; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index dd4e14c221e..36d35c36311 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -22,24 +22,22 @@ * Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000 */ -#include <linux/smp_lock.h> #include <linux/time.h> #include <linux/highuid.h> #include <linux/pagemap.h> #include <linux/quotaops.h> -#include <linux/module.h> #include <linux/writeback.h> #include <linux/buffer_head.h> #include <linux/mpage.h> +#include <linux/fiemap.h> +#include <linux/namei.h> +#include <linux/aio.h> #include "ext2.h" #include "acl.h" #include "xip.h" +#include "xattr.h" -MODULE_AUTHOR("Remy Card and others"); -MODULE_DESCRIPTION("Second Extended Filesystem"); -MODULE_LICENSE("GPL"); - -static int ext2_update_inode(struct inode * inode, int do_sync); +static int __ext2_write_inode(struct inode *inode, int do_sync); /* * Test whether an inode is a fast symlink. @@ -53,95 +51,61 @@ static inline int ext2_inode_is_fast_symlink(struct inode *inode) inode->i_blocks - ea_blocks == 0); } -/* - * Called at each iput(). - * - * The inode may be "bad" if ext2_read_inode() saw an error from - * ext2_get_inode(), so we need to check that to avoid freeing random disk - * blocks. - */ -void ext2_put_inode(struct inode *inode) +static void ext2_truncate_blocks(struct inode *inode, loff_t offset); + +static void ext2_write_failed(struct address_space *mapping, loff_t to) { - if (!is_bad_inode(inode)) - ext2_discard_prealloc(inode); + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, inode->i_size); + ext2_truncate_blocks(inode, inode->i_size); + } } /* * Called at the last iput() if i_nlink is zero. */ -void ext2_delete_inode (struct inode * inode) +void ext2_evict_inode(struct inode * inode) { - truncate_inode_pages(&inode->i_data, 0); + struct ext2_block_alloc_info *rsv; + int want_delete = 0; - if (is_bad_inode(inode)) - goto no_delete; - EXT2_I(inode)->i_dtime = get_seconds(); - mark_inode_dirty(inode); - ext2_update_inode(inode, inode_needs_sync(inode)); - - inode->i_size = 0; - if (inode->i_blocks) - ext2_truncate (inode); - ext2_free_inode (inode); + if (!inode->i_nlink && !is_bad_inode(inode)) { + want_delete = 1; + dquot_initialize(inode); + } else { + dquot_drop(inode); + } - return; -no_delete: - clear_inode(inode); /* We must guarantee clearing of inode... */ -} + truncate_inode_pages_final(&inode->i_data); -void ext2_discard_prealloc (struct inode * inode) -{ -#ifdef EXT2_PREALLOCATE - struct ext2_inode_info *ei = EXT2_I(inode); - write_lock(&ei->i_meta_lock); - if (ei->i_prealloc_count) { - unsigned short total = ei->i_prealloc_count; - unsigned long block = ei->i_prealloc_block; - ei->i_prealloc_count = 0; - ei->i_prealloc_block = 0; - write_unlock(&ei->i_meta_lock); - ext2_free_blocks (inode, block, total); - return; - } else - write_unlock(&ei->i_meta_lock); -#endif -} + if (want_delete) { + sb_start_intwrite(inode->i_sb); + /* set dtime */ + EXT2_I(inode)->i_dtime = get_seconds(); + mark_inode_dirty(inode); + __ext2_write_inode(inode, inode_needs_sync(inode)); + /* truncate to 0 */ + inode->i_size = 0; + if (inode->i_blocks) + ext2_truncate_blocks(inode, 0); + ext2_xattr_delete_inode(inode); + } -static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err) -{ -#ifdef EXT2FS_DEBUG - static unsigned long alloc_hits, alloc_attempts; -#endif - unsigned long result; + invalidate_inode_buffers(inode); + clear_inode(inode); + ext2_discard_reservation(inode); + rsv = EXT2_I(inode)->i_block_alloc_info; + EXT2_I(inode)->i_block_alloc_info = NULL; + if (unlikely(rsv)) + kfree(rsv); -#ifdef EXT2_PREALLOCATE - struct ext2_inode_info *ei = EXT2_I(inode); - write_lock(&ei->i_meta_lock); - if (ei->i_prealloc_count && - (goal == ei->i_prealloc_block || goal + 1 == ei->i_prealloc_block)) - { - result = ei->i_prealloc_block++; - ei->i_prealloc_count--; - write_unlock(&ei->i_meta_lock); - ext2_debug ("preallocation hit (%lu/%lu).\n", - ++alloc_hits, ++alloc_attempts); - } else { - write_unlock(&ei->i_meta_lock); - ext2_discard_prealloc (inode); - ext2_debug ("preallocation miss (%lu/%lu).\n", - alloc_hits, ++alloc_attempts); - if (S_ISREG(inode->i_mode)) - result = ext2_new_block (inode, goal, - &ei->i_prealloc_count, - &ei->i_prealloc_block, err); - else - result = ext2_new_block(inode, goal, NULL, NULL, err); + if (want_delete) { + ext2_free_inode(inode); + sb_end_intwrite(inode->i_sb); } -#else - result = ext2_new_block (inode, goal, 0, 0, err); -#endif - return result; } typedef struct { @@ -205,7 +169,8 @@ static int ext2_block_to_path(struct inode *inode, int final = 0; if (i_block < 0) { - ext2_warning (inode->i_sb, "ext2_block_to_path", "block < 0"); + ext2_msg(inode->i_sb, KERN_WARNING, + "warning: %s: block < 0", __func__); } else if (i_block < direct_blocks) { offsets[n++] = i_block; final = direct_blocks; @@ -225,10 +190,12 @@ static int ext2_block_to_path(struct inode *inode, offsets[n++] = i_block & (ptrs - 1); final = ptrs; } else { - ext2_warning (inode->i_sb, "ext2_block_to_path", "block > big"); + ext2_msg(inode->i_sb, KERN_WARNING, + "warning: %s: block is too big", __func__); } if (boundary) - *boundary = (i_block & (ptrs - 1)) == (final - 1); + *boundary = final - 1 - (i_block & (ptrs - 1)); + return n; } @@ -306,7 +273,7 @@ no_block: * @inode: owner * @ind: descriptor of indirect block. * - * This function returns the prefered place for block allocation. + * This function returns the preferred place for block allocation. * It is used when heuristic for sequential allocation fails. * Rules are: * + if there is a block to the left of our position - allocate near it. @@ -321,13 +288,13 @@ no_block: * Caller must make sure that @ind is valid and will stay that way. */ -static unsigned long ext2_find_near(struct inode *inode, Indirect *ind) +static ext2_fsblk_t ext2_find_near(struct inode *inode, Indirect *ind) { struct ext2_inode_info *ei = EXT2_I(inode); __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; __le32 *p; - unsigned long bg_start; - unsigned long colour; + ext2_fsblk_t bg_start; + ext2_fsblk_t colour; /* Try to find previous block */ for (p = ind->p - 1; p >= start; p--) @@ -339,55 +306,143 @@ static unsigned long ext2_find_near(struct inode *inode, Indirect *ind) return ind->bh->b_blocknr; /* - * It is going to be refered from inode itself? OK, just put it into + * It is going to be referred from inode itself? OK, just put it into * the same cylinder group then. */ - bg_start = (ei->i_block_group * EXT2_BLOCKS_PER_GROUP(inode->i_sb)) + - le32_to_cpu(EXT2_SB(inode->i_sb)->s_es->s_first_data_block); + bg_start = ext2_group_first_block_no(inode->i_sb, ei->i_block_group); colour = (current->pid % 16) * (EXT2_BLOCKS_PER_GROUP(inode->i_sb) / 16); return bg_start + colour; } /** - * ext2_find_goal - find a prefered place for allocation. + * ext2_find_goal - find a preferred place for allocation. * @inode: owner * @block: block we want - * @chain: chain of indirect blocks * @partial: pointer to the last triple within a chain - * @goal: place to store the result. * - * Normally this function find the prefered place for block allocation, - * stores it in *@goal and returns zero. If the branch had been changed - * under us we return -EAGAIN. + * Returns preferred place for a block (the goal). */ -static inline int ext2_find_goal(struct inode *inode, - long block, - Indirect chain[4], - Indirect *partial, - unsigned long *goal) +static inline ext2_fsblk_t ext2_find_goal(struct inode *inode, long block, + Indirect *partial) { - struct ext2_inode_info *ei = EXT2_I(inode); - write_lock(&ei->i_meta_lock); - if ((block == ei->i_next_alloc_block + 1) && ei->i_next_alloc_goal) { - ei->i_next_alloc_block++; - ei->i_next_alloc_goal++; - } - if (verify_chain(chain, partial)) { - /* - * try the heuristic for sequential allocation, - * failing that at least try to get decent locality. - */ - if (block == ei->i_next_alloc_block) - *goal = ei->i_next_alloc_goal; - if (!*goal) - *goal = ext2_find_near(inode, partial); - write_unlock(&ei->i_meta_lock); - return 0; + struct ext2_block_alloc_info *block_i; + + block_i = EXT2_I(inode)->i_block_alloc_info; + + /* + * try the heuristic for sequential allocation, + * failing that at least try to get decent locality. + */ + if (block_i && (block == block_i->last_alloc_logical_block + 1) + && (block_i->last_alloc_physical_block != 0)) { + return block_i->last_alloc_physical_block + 1; + } + + return ext2_find_near(inode, partial); +} + +/** + * ext2_blks_to_allocate: Look up the block map and count the number + * of direct blocks need to be allocated for the given branch. + * + * @branch: chain of indirect blocks + * @k: number of blocks need for indirect blocks + * @blks: number of data blocks to be mapped. + * @blocks_to_boundary: the offset in the indirect block + * + * return the total number of blocks to be allocate, including the + * direct and indirect blocks. + */ +static int +ext2_blks_to_allocate(Indirect * branch, int k, unsigned long blks, + int blocks_to_boundary) +{ + unsigned long count = 0; + + /* + * Simple case, [t,d]Indirect block(s) has not allocated yet + * then it's clear blocks on that path have not allocated + */ + if (k > 0) { + /* right now don't hanel cross boundary allocation */ + if (blks < blocks_to_boundary + 1) + count += blks; + else + count += blocks_to_boundary + 1; + return count; + } + + count++; + while (count < blks && count <= blocks_to_boundary + && le32_to_cpu(*(branch[0].p + count)) == 0) { + count++; + } + return count; +} + +/** + * ext2_alloc_blocks: multiple allocate blocks needed for a branch + * @indirect_blks: the number of blocks need to allocate for indirect + * blocks + * + * @new_blocks: on return it will store the new block numbers for + * the indirect blocks(if needed) and the first direct block, + * @blks: on return it will store the total number of allocated + * direct blocks + */ +static int ext2_alloc_blocks(struct inode *inode, + ext2_fsblk_t goal, int indirect_blks, int blks, + ext2_fsblk_t new_blocks[4], int *err) +{ + int target, i; + unsigned long count = 0; + int index = 0; + ext2_fsblk_t current_block = 0; + int ret = 0; + + /* + * Here we try to allocate the requested multiple blocks at once, + * on a best-effort basis. + * To build a branch, we should allocate blocks for + * the indirect blocks(if not allocated yet), and at least + * the first direct block of this branch. That's the + * minimum number of blocks need to allocate(required) + */ + target = blks + indirect_blks; + + while (1) { + count = target; + /* allocating blocks for indirect blocks and direct blocks */ + current_block = ext2_new_blocks(inode,goal,&count,err); + if (*err) + goto failed_out; + + target -= count; + /* allocate blocks for indirect blocks */ + while (index < indirect_blks && count) { + new_blocks[index++] = current_block++; + count--; + } + + if (count > 0) + break; } - write_unlock(&ei->i_meta_lock); - return -EAGAIN; + + /* save the new block number for the first direct block */ + new_blocks[index] = current_block; + + /* total number of blocks allocated for direct blocks */ + ret = count; + *err = 0; + return ret; +failed_out: + for (i = 0; i <index; i++) + ext2_free_blocks(inode, new_blocks[i], 1); + if (index) + mark_inode_dirty(inode); + return ret; } /** @@ -404,7 +459,7 @@ static inline int ext2_find_goal(struct inode *inode, * the same format as ext2_get_branch() would do. We are calling it after * we had read the existing part of chain and partial points to the last * triple of that (one with zero ->key). Upon the exit we have the same - * picture as after the successful ext2_get_block(), excpet that in one + * picture as after the successful ext2_get_block(), except that in one * place chain is disconnected - *branch->p is still zero (we did not * set the last link), but branch->key contains the number that should * be placed into *branch->p to fill that gap. @@ -416,119 +471,130 @@ static inline int ext2_find_goal(struct inode *inode, */ static int ext2_alloc_branch(struct inode *inode, - int num, - unsigned long goal, - int *offsets, - Indirect *branch) + int indirect_blks, int *blks, ext2_fsblk_t goal, + int *offsets, Indirect *branch) { int blocksize = inode->i_sb->s_blocksize; - int n = 0; - int err; - int i; - int parent = ext2_alloc_block(inode, goal, &err); - - branch[0].key = cpu_to_le32(parent); - if (parent) for (n = 1; n < num; n++) { - struct buffer_head *bh; - /* Allocate the next block */ - int nr = ext2_alloc_block(inode, parent, &err); - if (!nr) - break; - branch[n].key = cpu_to_le32(nr); + int i, n = 0; + int err = 0; + struct buffer_head *bh; + int num; + ext2_fsblk_t new_blocks[4]; + ext2_fsblk_t current_block; + + num = ext2_alloc_blocks(inode, goal, indirect_blks, + *blks, new_blocks, &err); + if (err) + return err; + + branch[0].key = cpu_to_le32(new_blocks[0]); + /* + * metadata blocks and data blocks are allocated. + */ + for (n = 1; n <= indirect_blks; n++) { /* - * Get buffer_head for parent block, zero it out and set - * the pointer to new one, then send parent to disk. + * Get buffer_head for parent block, zero it out + * and set the pointer to new one, then send + * parent to disk. */ - bh = sb_getblk(inode->i_sb, parent); - if (!bh) { - err = -EIO; - break; + bh = sb_getblk(inode->i_sb, new_blocks[n-1]); + if (unlikely(!bh)) { + err = -ENOMEM; + goto failed; } + branch[n].bh = bh; lock_buffer(bh); memset(bh->b_data, 0, blocksize); - branch[n].bh = bh; branch[n].p = (__le32 *) bh->b_data + offsets[n]; + branch[n].key = cpu_to_le32(new_blocks[n]); *branch[n].p = branch[n].key; + if ( n == indirect_blks) { + current_block = new_blocks[n]; + /* + * End of chain, update the last new metablock of + * the chain to point to the new allocated + * data blocks numbers + */ + for (i=1; i < num; i++) + *(branch[n].p + i) = cpu_to_le32(++current_block); + } set_buffer_uptodate(bh); unlock_buffer(bh); mark_buffer_dirty_inode(bh, inode); /* We used to sync bh here if IS_SYNC(inode). - * But we now rely upon generic_osync_inode() + * But we now rely upon generic_write_sync() * and b_inode_buffers. But not for directories. */ if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) sync_dirty_buffer(bh); - parent = nr; } - if (n == num) - return 0; + *blks = num; + return err; - /* Allocation failed, free what we already allocated */ +failed: for (i = 1; i < n; i++) bforget(branch[i].bh); - for (i = 0; i < n; i++) - ext2_free_blocks(inode, le32_to_cpu(branch[i].key), 1); + for (i = 0; i < indirect_blks; i++) + ext2_free_blocks(inode, new_blocks[i], 1); + ext2_free_blocks(inode, new_blocks[i], num); return err; } /** - * ext2_splice_branch - splice the allocated branch onto inode. - * @inode: owner - * @block: (logical) number of block we are adding - * @chain: chain of indirect blocks (with a missing link - see - * ext2_alloc_branch) - * @where: location of missing link - * @num: number of blocks we are adding + * ext2_splice_branch - splice the allocated branch onto inode. + * @inode: owner + * @block: (logical) number of block we are adding + * @where: location of missing link + * @num: number of indirect blocks we are adding + * @blks: number of direct blocks we are adding * - * This function verifies that chain (up to the missing link) had not - * changed, fills the missing link and does all housekeeping needed in - * inode (->i_blocks, etc.). In case of success we end up with the full - * chain to new block and return 0. Otherwise (== chain had been changed) - * we free the new blocks (forgetting their buffer_heads, indeed) and - * return -EAGAIN. + * This function fills the missing link and does all housekeeping needed in + * inode (->i_blocks, etc.). In case of success we end up with the full + * chain to new block and return 0. */ - -static inline int ext2_splice_branch(struct inode *inode, - long block, - Indirect chain[4], - Indirect *where, - int num) +static void ext2_splice_branch(struct inode *inode, + long block, Indirect *where, int num, int blks) { - struct ext2_inode_info *ei = EXT2_I(inode); int i; + struct ext2_block_alloc_info *block_i; + ext2_fsblk_t current_block; - /* Verify that place we are splicing to is still there and vacant */ - - write_lock(&ei->i_meta_lock); - if (!verify_chain(chain, where-1) || *where->p) - goto changed; + block_i = EXT2_I(inode)->i_block_alloc_info; + /* XXX LOCKING probably should have i_meta_lock ?*/ /* That's it */ *where->p = where->key; - ei->i_next_alloc_block = block; - ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key); - write_unlock(&ei->i_meta_lock); + /* + * Update the host buffer_head or inode to point to more just allocated + * direct blocks blocks + */ + if (num == 0 && blks > 1) { + current_block = le32_to_cpu(where->key) + 1; + for (i = 1; i < blks; i++) + *(where->p + i ) = cpu_to_le32(current_block++); + } - /* We are done with atomic stuff, now do the rest of housekeeping */ + /* + * update the most recently allocated logical & physical block + * in i_block_alloc_info, to assist find the proper goal block for next + * allocation + */ + if (block_i) { + block_i->last_alloc_logical_block = block + blks - 1; + block_i->last_alloc_physical_block = + le32_to_cpu(where[num].key) + blks - 1; + } - inode->i_ctime = CURRENT_TIME_SEC; + /* We are done with atomic stuff, now do the rest of housekeeping */ /* had we spliced it onto indirect block? */ if (where->bh) mark_buffer_dirty_inode(where->bh, inode); + inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); - return 0; - -changed: - write_unlock(&ei->i_meta_lock); - for (i = 1; i < num; i++) - bforget(where[i].bh); - for (i = 0; i < num; i++) - ext2_free_blocks(inode, le32_to_cpu(where[i].key), 1); - return -EAGAIN; } /* @@ -542,64 +608,128 @@ changed: * That has a nice additional property: no special recovery from the failed * allocations is needed - we simply release blocks and do not touch anything * reachable from inode. + * + * `handle' can be NULL if create == 0. + * + * return > 0, # of blocks mapped or allocated. + * return = 0, if plain lookup failed. + * return < 0, error case. */ - -int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) +static int ext2_get_blocks(struct inode *inode, + sector_t iblock, unsigned long maxblocks, + struct buffer_head *bh_result, + int create) { int err = -EIO; int offsets[4]; Indirect chain[4]; Indirect *partial; - unsigned long goal; - int left; - int boundary = 0; - int depth = ext2_block_to_path(inode, iblock, offsets, &boundary); + ext2_fsblk_t goal; + int indirect_blks; + int blocks_to_boundary = 0; + int depth; + struct ext2_inode_info *ei = EXT2_I(inode); + int count = 0; + ext2_fsblk_t first_block = 0; + + BUG_ON(maxblocks == 0); + + depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary); if (depth == 0) - goto out; + return (err); -reread: partial = ext2_get_branch(inode, depth, offsets, chain, &err); - /* Simplest case - block found, no allocation needed */ if (!partial) { -got_it: - map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); - if (boundary) - set_buffer_boundary(bh_result); - /* Clean up and exit */ - partial = chain+depth-1; /* the whole chain */ - goto cleanup; + first_block = le32_to_cpu(chain[depth - 1].key); + clear_buffer_new(bh_result); /* What's this do? */ + count++; + /*map more blocks*/ + while (count < maxblocks && count <= blocks_to_boundary) { + ext2_fsblk_t blk; + + if (!verify_chain(chain, chain + depth - 1)) { + /* + * Indirect block might be removed by + * truncate while we were reading it. + * Handling of that case: forget what we've + * got now, go to reread. + */ + err = -EAGAIN; + count = 0; + break; + } + blk = le32_to_cpu(*(chain[depth-1].p + count)); + if (blk == first_block + count) + count++; + else + break; + } + if (err != -EAGAIN) + goto got_it; } /* Next simple case - plain lookup or failed read of indirect block */ - if (!create || err == -EIO) { -cleanup: + if (!create || err == -EIO) + goto cleanup; + + mutex_lock(&ei->truncate_mutex); + /* + * If the indirect block is missing while we are reading + * the chain(ext2_get_branch() returns -EAGAIN err), or + * if the chain has been changed after we grab the semaphore, + * (either because another process truncated this branch, or + * another get_block allocated this branch) re-grab the chain to see if + * the request block has been allocated or not. + * + * Since we already block the truncate/other get_block + * at this point, we will have the current copy of the chain when we + * splice the branch into the tree. + */ + if (err == -EAGAIN || !verify_chain(chain, partial)) { while (partial > chain) { brelse(partial->bh); partial--; } -out: - return err; + partial = ext2_get_branch(inode, depth, offsets, chain, &err); + if (!partial) { + count++; + mutex_unlock(&ei->truncate_mutex); + if (err) + goto cleanup; + clear_buffer_new(bh_result); + goto got_it; + } } /* - * Indirect block might be removed by truncate while we were - * reading it. Handling of that case (forget what we've got and - * reread) is taken out of the main path. - */ - if (err == -EAGAIN) - goto changed; + * Okay, we need to do block allocation. Lazily initialize the block + * allocation info here if necessary + */ + if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) + ext2_init_block_alloc_info(inode); - goal = 0; - if (ext2_find_goal(inode, iblock, chain, partial, &goal) < 0) - goto changed; + goal = ext2_find_goal(inode, iblock, partial); - left = (chain + depth) - partial; - err = ext2_alloc_branch(inode, left, goal, - offsets+(partial-chain), partial); - if (err) + /* the number of blocks need to allocate for [d,t]indirect blocks */ + indirect_blks = (chain + depth) - partial - 1; + /* + * Next look up the indirect map to count the totoal number of + * direct blocks to allocate for this branch. + */ + count = ext2_blks_to_allocate(partial, indirect_blks, + maxblocks, blocks_to_boundary); + /* + * XXX ???? Block out ext2_truncate while we alter the tree + */ + err = ext2_alloc_branch(inode, indirect_blks, &count, goal, + offsets + (partial - chain), partial); + + if (err) { + mutex_unlock(&ei->truncate_mutex); goto cleanup; + } if (ext2_use_xip(inode->i_sb)) { /* @@ -607,22 +737,48 @@ out: */ err = ext2_clear_xip_target (inode, le32_to_cpu(chain[depth-1].key)); - if (err) + if (err) { + mutex_unlock(&ei->truncate_mutex); goto cleanup; + } } - if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0) - goto changed; - + ext2_splice_branch(inode, iblock, partial, indirect_blks, count); + mutex_unlock(&ei->truncate_mutex); set_buffer_new(bh_result); - goto got_it; - -changed: +got_it: + map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); + if (count > blocks_to_boundary) + set_buffer_boundary(bh_result); + err = count; + /* Clean up and exit */ + partial = chain + depth - 1; /* the whole chain */ +cleanup: while (partial > chain) { brelse(partial->bh); partial--; } - goto reread; + return err; +} + +int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) +{ + unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; + int ret = ext2_get_blocks(inode, iblock, max_blocks, + bh_result, create); + if (ret > 0) { + bh_result->b_size = (ret << inode->i_blkbits); + ret = 0; + } + return ret; + +} + +int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) +{ + return generic_block_fiemap(inode, fieinfo, start, len, + ext2_get_block); } static int ext2_writepage(struct page *page, struct writeback_control *wbc) @@ -643,17 +799,43 @@ ext2_readpages(struct file *file, struct address_space *mapping, } static int -ext2_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +ext2_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, + ext2_get_block); + if (ret < 0) + ext2_write_failed(mapping, pos + len); + return ret; +} + +static int ext2_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) { - return block_prepare_write(page,from,to,ext2_get_block); + int ret; + + ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); + if (ret < len) + ext2_write_failed(mapping, pos + len); + return ret; } static int -ext2_nobh_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) +ext2_nobh_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) { - return nobh_prepare_write(page,from,to,ext2_get_block); + int ret; + + ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata, + ext2_get_block); + if (ret < 0) + ext2_write_failed(mapping, pos + len); + return ret; } static int ext2_nobh_writepage(struct page *page, @@ -668,14 +850,19 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block) } static ssize_t -ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - - return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, ext2_get_block, NULL); + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + size_t count = iov_iter_count(iter); + ssize_t ret; + + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext2_get_block); + if (ret < 0 && (rw & WRITE)) + ext2_write_failed(mapping, offset + count); + return ret; } static int @@ -688,31 +875,32 @@ const struct address_space_operations ext2_aops = { .readpage = ext2_readpage, .readpages = ext2_readpages, .writepage = ext2_writepage, - .sync_page = block_sync_page, - .prepare_write = ext2_prepare_write, - .commit_write = generic_commit_write, + .write_begin = ext2_write_begin, + .write_end = ext2_write_end, .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, }; const struct address_space_operations ext2_aops_xip = { .bmap = ext2_bmap, - .get_xip_page = ext2_get_xip_page, + .get_xip_mem = ext2_get_xip_mem, }; const struct address_space_operations ext2_nobh_aops = { .readpage = ext2_readpage, .readpages = ext2_readpages, .writepage = ext2_nobh_writepage, - .sync_page = block_sync_page, - .prepare_write = ext2_nobh_prepare_write, - .commit_write = nobh_commit_write, + .write_begin = ext2_nobh_write_begin, + .write_end = nobh_write_end, .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, .migratepage = buffer_migrate_page, + .error_remove_page = generic_error_remove_page, }; /* @@ -740,7 +928,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q) * * When we do truncate() we may have to clean the ends of several indirect * blocks but leave the blocks themselves alive. Block is partially - * truncated if some data below the new i_size is refered from it (and + * truncated if some data below the new i_size is referred from it (and * it is on the path to the first completely truncated data block, indeed). * We have to free the top of that path along with everything to the right * of the path. Since no allocation past the truncation point is possible @@ -817,7 +1005,7 @@ no_top: * @p: array of block numbers * @q: points immediately past the end of array * - * We are freeing all blocks refered from that array (numbers are + * We are freeing all blocks referred from that array (numbers are * stored as little-endian 32-bit) and updating @inode->i_blocks * appropriately. */ @@ -836,8 +1024,8 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q) else if (block_to_free == nr - count) count++; else { - mark_inode_dirty(inode); ext2_free_blocks (inode, block_to_free, count); + mark_inode_dirty(inode); free_this: block_to_free = nr; count = 1; @@ -845,8 +1033,8 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q) } } if (count > 0) { - mark_inode_dirty(inode); ext2_free_blocks (inode, block_to_free, count); + mark_inode_dirty(inode); } } @@ -857,7 +1045,7 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q) * @q: pointer immediately past the end of array * @depth: depth of the branches to free * - * We are freeing all blocks refered from these branches (numbers are + * We are freeing all blocks referred from these branches (numbers are * stored as little-endian 32-bit) and updating @inode->i_blocks * appropriately. */ @@ -896,9 +1084,10 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de ext2_free_data(inode, p, q); } -void ext2_truncate (struct inode * inode) +static void __ext2_truncate_blocks(struct inode *inode, loff_t offset) { __le32 *i_data = EXT2_I(inode)->i_data; + struct ext2_inode_info *ei = EXT2_I(inode); int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb); int offsets[4]; Indirect chain[4]; @@ -907,33 +1096,19 @@ void ext2_truncate (struct inode * inode) int n; long iblock; unsigned blocksize; - - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; - if (ext2_inode_is_fast_symlink(inode)) - return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - - ext2_discard_prealloc(inode); - blocksize = inode->i_sb->s_blocksize; - iblock = (inode->i_size + blocksize-1) - >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); - - if (mapping_is_xip(inode->i_mapping)) - xip_truncate_page(inode->i_mapping, inode->i_size); - else if (test_opt(inode->i_sb, NOBH)) - nobh_truncate_page(inode->i_mapping, inode->i_size); - else - block_truncate_page(inode->i_mapping, - inode->i_size, ext2_get_block); + iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); n = ext2_block_to_path(inode, iblock, offsets, NULL); if (n == 0) return; + /* + * From here we block out all ext2_get_block() callers who want to + * modify the block allocation tree. + */ + mutex_lock(&ei->truncate_mutex); + if (n == 1) { ext2_free_data(inode, i_data+offsets[0], i_data + EXT2_NDIR_BLOCKS); @@ -986,13 +1161,69 @@ do_indirects: case EXT2_TIND_BLOCK: ; } + + ext2_discard_reservation(inode); + + mutex_unlock(&ei->truncate_mutex); +} + +static void ext2_truncate_blocks(struct inode *inode, loff_t offset) +{ + /* + * XXX: it seems like a bug here that we don't allow + * IS_APPEND inode to have blocks-past-i_size trimmed off. + * review and fix this. + * + * Also would be nice to be able to handle IO errors and such, + * but that's probably too much to ask. + */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return; + if (ext2_inode_is_fast_symlink(inode)) + return; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return; + __ext2_truncate_blocks(inode, offset); +} + +static int ext2_setsize(struct inode *inode, loff_t newsize) +{ + int error; + + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) + return -EINVAL; + if (ext2_inode_is_fast_symlink(inode)) + return -EINVAL; + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return -EPERM; + + inode_dio_wait(inode); + + if (mapping_is_xip(inode->i_mapping)) + error = xip_truncate_page(inode->i_mapping, newsize); + else if (test_opt(inode->i_sb, NOBH)) + error = nobh_truncate_page(inode->i_mapping, + newsize, ext2_get_block); + else + error = block_truncate_page(inode->i_mapping, + newsize, ext2_get_block); + if (error) + return error; + + truncate_setsize(inode, newsize); + __ext2_truncate_blocks(inode, newsize); + inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; if (inode_needs_sync(inode)) { sync_mapping_buffers(inode->i_mapping); - ext2_sync_inode (inode); + sync_inode_metadata(inode, 1); } else { mark_inode_dirty(inode); } + + return 0; } static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino, @@ -1010,7 +1241,7 @@ static struct ext2_inode *ext2_get_inode(struct super_block *sb, ino_t ino, goto Einval; block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb); - gdp = ext2_get_group_desc(sb, block_group, &bh); + gdp = ext2_get_group_desc(sb, block_group, NULL); if (!gdp) goto Egdp; /* @@ -1055,33 +1286,65 @@ void ext2_set_inode_flags(struct inode *inode) inode->i_flags |= S_DIRSYNC; } -void ext2_read_inode (struct inode * inode) +/* Propagate flags from i_flags to EXT2_I(inode)->i_flags */ +void ext2_get_inode_flags(struct ext2_inode_info *ei) { - struct ext2_inode_info *ei = EXT2_I(inode); - ino_t ino = inode->i_ino; + unsigned int flags = ei->vfs_inode.i_flags; + + ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL| + EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL); + if (flags & S_SYNC) + ei->i_flags |= EXT2_SYNC_FL; + if (flags & S_APPEND) + ei->i_flags |= EXT2_APPEND_FL; + if (flags & S_IMMUTABLE) + ei->i_flags |= EXT2_IMMUTABLE_FL; + if (flags & S_NOATIME) + ei->i_flags |= EXT2_NOATIME_FL; + if (flags & S_DIRSYNC) + ei->i_flags |= EXT2_DIRSYNC_FL; +} + +struct inode *ext2_iget (struct super_block *sb, unsigned long ino) +{ + struct ext2_inode_info *ei; struct buffer_head * bh; - struct ext2_inode * raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); + struct ext2_inode *raw_inode; + struct inode *inode; + long ret = -EIO; int n; + uid_t i_uid; + gid_t i_gid; -#ifdef CONFIG_EXT2_FS_POSIX_ACL - ei->i_acl = EXT2_ACL_NOT_CACHED; - ei->i_default_acl = EXT2_ACL_NOT_CACHED; -#endif - if (IS_ERR(raw_inode)) + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + ei = EXT2_I(inode); + ei->i_block_alloc_info = NULL; + + raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); + if (IS_ERR(raw_inode)) { + ret = PTR_ERR(raw_inode); goto bad_inode; + } inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); + i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); + i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); if (!(test_opt (inode->i_sb, NO_UID32))) { - inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; + i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; + i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } - inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); + i_uid_write(inode, i_uid); + i_gid_write(inode, i_gid); + set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le32_to_cpu(raw_inode->i_size); - inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime); - inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime); + inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); + inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); + inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0; ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); /* We now have enough fields to check if the inode was active or not. @@ -1092,6 +1355,7 @@ void ext2_read_inode (struct inode * inode) if (inode->i_nlink == 0 && (inode->i_mode == 0 || ei->i_dtime)) { /* this inode is deleted */ brelse (bh); + ret = -ESTALE; goto bad_inode; } inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); @@ -1108,9 +1372,6 @@ void ext2_read_inode (struct inode * inode) ei->i_dtime = 0; inode->i_generation = le32_to_cpu(raw_inode->i_generation); ei->i_state = 0; - ei->i_next_alloc_block = 0; - ei->i_next_alloc_goal = 0; - ei->i_prealloc_count = 0; ei->i_block_group = (ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb); ei->i_dir_start_lookup = 0; @@ -1141,9 +1402,11 @@ void ext2_read_inode (struct inode * inode) else inode->i_mapping->a_ops = &ext2_aops; } else if (S_ISLNK(inode->i_mode)) { - if (ext2_inode_is_fast_symlink(inode)) + if (ext2_inode_is_fast_symlink(inode)) { inode->i_op = &ext2_fast_symlink_inode_operations; - else { + nd_terminate_link(ei->i_data, inode->i_size, + sizeof(ei->i_data) - 1); + } else { inode->i_op = &ext2_symlink_inode_operations; if (test_opt(inode->i_sb, NOBH)) inode->i_mapping->a_ops = &ext2_nobh_aops; @@ -1161,20 +1424,21 @@ void ext2_read_inode (struct inode * inode) } brelse (bh); ext2_set_inode_flags(inode); - return; + unlock_new_inode(inode); + return inode; bad_inode: - make_bad_inode(inode); - return; + iget_failed(inode); + return ERR_PTR(ret); } -static int ext2_update_inode(struct inode * inode, int do_sync) +static int __ext2_write_inode(struct inode *inode, int do_sync) { struct ext2_inode_info *ei = EXT2_I(inode); struct super_block *sb = inode->i_sb; ino_t ino = inode->i_ino; - uid_t uid = inode->i_uid; - gid_t gid = inode->i_gid; + uid_t uid = i_uid_read(inode); + gid_t gid = i_gid_read(inode); struct buffer_head * bh; struct ext2_inode * raw_inode = ext2_get_inode(sb, ino, &bh); int n; @@ -1188,6 +1452,7 @@ static int ext2_update_inode(struct inode * inode, int do_sync) if (ei->i_state & EXT2_STATE_NEW) memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size); + ext2_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); if (!(test_opt(sb, NO_UID32))) { raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); @@ -1234,11 +1499,11 @@ static int ext2_update_inode(struct inode * inode, int do_sync) /* If this is the first large file * created, add a flag to the superblock. */ - lock_kernel(); + spin_lock(&EXT2_SB(sb)->s_lock); ext2_update_dynamic_rev(sb); EXT2_SET_RO_COMPAT_FEATURE(sb, EXT2_FEATURE_RO_COMPAT_LARGE_FILE); - unlock_kernel(); + spin_unlock(&EXT2_SB(sb)->s_lock); ext2_write_super(sb); } } @@ -1272,18 +1537,9 @@ static int ext2_update_inode(struct inode * inode, int do_sync) return err; } -int ext2_write_inode(struct inode *inode, int wait) -{ - return ext2_update_inode(inode, wait); -} - -int ext2_sync_inode(struct inode *inode) +int ext2_write_inode(struct inode *inode, struct writeback_control *wbc) { - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .nr_to_write = 0, /* sys_fsync did this */ - }; - return sync_inode(inode, &wbc); + return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); } int ext2_setattr(struct dentry *dentry, struct iattr *iattr) @@ -1294,14 +1550,24 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr) error = inode_change_ok(inode, iattr); if (error) return error; - if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) || - (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) { - error = DQUOT_TRANSFER(inode, iattr) ? -EDQUOT : 0; + + if (is_quota_modification(inode, iattr)) + dquot_initialize(inode); + if ((iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) || + (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid))) { + error = dquot_transfer(inode, iattr); if (error) return error; } - error = inode_setattr(inode, iattr); - if (!error && (iattr->ia_valid & ATTR_MODE)) - error = ext2_acl_chmod(inode); + if (iattr->ia_valid & ATTR_SIZE && iattr->ia_size != inode->i_size) { + error = ext2_setsize(inode, iattr->ia_size); + if (error) + return error; + } + setattr_copy(inode, iattr); + if (iattr->ia_valid & ATTR_MODE) + error = posix_acl_chmod(inode, inode->i_mode); + mark_inode_dirty(inode); + return error; } diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 3ca9afdf713..5d46c09863f 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -11,37 +11,53 @@ #include <linux/capability.h> #include <linux/time.h> #include <linux/sched.h> +#include <linux/compat.h> +#include <linux/mount.h> #include <asm/current.h> #include <asm/uaccess.h> -int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) +long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + struct inode *inode = file_inode(filp); struct ext2_inode_info *ei = EXT2_I(inode); unsigned int flags; + unsigned short rsv_window_size; + int ret; ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg); switch (cmd) { case EXT2_IOC_GETFLAGS: + ext2_get_inode_flags(ei); flags = ei->i_flags & EXT2_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); case EXT2_IOC_SETFLAGS: { unsigned int oldflags; - if (IS_RDONLY(inode)) - return -EROFS; + ret = mnt_want_write_file(filp); + if (ret) + return ret; - if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) - return -EACCES; + if (!inode_owner_or_capable(inode)) { + ret = -EACCES; + goto setflags_out; + } - if (get_user(flags, (int __user *) arg)) - return -EFAULT; + if (get_user(flags, (int __user *) arg)) { + ret = -EFAULT; + goto setflags_out; + } - if (!S_ISDIR(inode->i_mode)) - flags &= ~EXT2_DIRSYNC_FL; + flags = ext2_mask_flags(inode->i_mode, flags); + mutex_lock(&inode->i_mutex); + /* Is it quota file? Do not allow user to mess with it */ + if (IS_NOQUOTA(inode)) { + mutex_unlock(&inode->i_mutex); + ret = -EPERM; + goto setflags_out; + } oldflags = ei->i_flags; /* @@ -51,8 +67,11 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, * This test looks nicer. Thanks to Pauline Middelink */ if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; + if (!capable(CAP_LINUX_IMMUTABLE)) { + mutex_unlock(&inode->i_mutex); + ret = -EPERM; + goto setflags_out; + } } flags = flags & EXT2_FL_USER_MODIFIABLE; @@ -61,22 +80,109 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, ext2_set_inode_flags(inode); inode->i_ctime = CURRENT_TIME_SEC; + mutex_unlock(&inode->i_mutex); + mark_inode_dirty(inode); - return 0; +setflags_out: + mnt_drop_write_file(filp); + return ret; } case EXT2_IOC_GETVERSION: return put_user(inode->i_generation, (int __user *) arg); - case EXT2_IOC_SETVERSION: - if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + case EXT2_IOC_SETVERSION: { + __u32 generation; + + if (!inode_owner_or_capable(inode)) return -EPERM; - if (IS_RDONLY(inode)) - return -EROFS; - if (get_user(inode->i_generation, (int __user *) arg)) - return -EFAULT; + ret = mnt_want_write_file(filp); + if (ret) + return ret; + if (get_user(generation, (int __user *) arg)) { + ret = -EFAULT; + goto setversion_out; + } + + mutex_lock(&inode->i_mutex); inode->i_ctime = CURRENT_TIME_SEC; + inode->i_generation = generation; + mutex_unlock(&inode->i_mutex); + mark_inode_dirty(inode); +setversion_out: + mnt_drop_write_file(filp); + return ret; + } + case EXT2_IOC_GETRSVSZ: + if (test_opt(inode->i_sb, RESERVATION) + && S_ISREG(inode->i_mode) + && ei->i_block_alloc_info) { + rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; + return put_user(rsv_window_size, (int __user *)arg); + } + return -ENOTTY; + case EXT2_IOC_SETRSVSZ: { + + if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) + return -ENOTTY; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (get_user(rsv_window_size, (int __user *)arg)) + return -EFAULT; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (rsv_window_size > EXT2_MAX_RESERVE_BLOCKS) + rsv_window_size = EXT2_MAX_RESERVE_BLOCKS; + + /* + * need to allocate reservation structure for this inode + * before set the window size + */ + /* + * XXX What lock should protect the rsv_goal_size? + * Accessed in ext2_get_block only. ext3 uses i_truncate. + */ + mutex_lock(&ei->truncate_mutex); + if (!ei->i_block_alloc_info) + ext2_init_block_alloc_info(inode); + + if (ei->i_block_alloc_info){ + struct ext2_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; + rsv->rsv_goal_size = rsv_window_size; + } + mutex_unlock(&ei->truncate_mutex); + mnt_drop_write_file(filp); return 0; + } default: return -ENOTTY; } } + +#ifdef CONFIG_COMPAT +long ext2_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + /* These are just misnamed, they actually get/put from/to user an int */ + switch (cmd) { + case EXT2_IOC32_GETFLAGS: + cmd = EXT2_IOC_GETFLAGS; + break; + case EXT2_IOC32_SETFLAGS: + cmd = EXT2_IOC_SETFLAGS; + break; + case EXT2_IOC32_GETVERSION: + cmd = EXT2_IOC_GETVERSION; + break; + case EXT2_IOC32_SETVERSION: + cmd = EXT2_IOC_SETVERSION; + break; + default: + return -ENOIOCTLCMD; + } + return ext2_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); +} +#endif diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 4ca82498532..c268d0af1db 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -31,6 +31,7 @@ */ #include <linux/pagemap.h> +#include <linux/quotaops.h> #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -40,10 +41,12 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) { int err = ext2_add_link(dentry, inode); if (!err) { + unlock_new_inode(inode); d_instantiate(dentry, inode); return 0; } inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -52,7 +55,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) * Methods themselves. */ -static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) { struct inode * inode; ino_t ino; @@ -60,39 +63,27 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str if (dentry->d_name.len > EXT2_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ino = ext2_inode_by_name(dir, dentry); + ino = ext2_inode_by_name(dir, &dentry->d_name); inode = NULL; if (ino) { - inode = iget(dir->i_sb, ino); - if (!inode) - return ERR_PTR(-EACCES); + inode = ext2_iget(dir->i_sb, ino); + if (inode == ERR_PTR(-ESTALE)) { + ext2_error(dir->i_sb, __func__, + "deleted inode referenced: %lu", + (unsigned long) ino); + return ERR_PTR(-EIO); + } } return d_splice_alias(inode, dentry); } struct dentry *ext2_get_parent(struct dentry *child) { - unsigned long ino; - struct dentry *parent; - struct inode *inode; - struct dentry dotdot; - - dotdot.d_name.name = ".."; - dotdot.d_name.len = 2; - - ino = ext2_inode_by_name(child->d_inode, &dotdot); + struct qstr dotdot = QSTR_INIT("..", 2); + unsigned long ino = ext2_inode_by_name(child->d_inode, &dotdot); if (!ino) return ERR_PTR(-ENOENT); - inode = iget(child->d_inode->i_sb, ino); - - if (!inode) - return ERR_PTR(-EACCES); - parent = d_alloc_anon(inode); - if (!parent) { - iput(inode); - parent = ERR_PTR(-ENOMEM); - } - return parent; + return d_obtain_alias(ext2_iget(child->d_inode->i_sb, ino)); } /* @@ -103,29 +94,55 @@ struct dentry *ext2_get_parent(struct dentry *child) * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) +static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, bool excl) { - struct inode * inode = ext2_new_inode (dir, mode); - int err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext2_file_inode_operations; - if (ext2_use_xip(inode->i_sb)) { - inode->i_mapping->a_ops = &ext2_aops_xip; - inode->i_fop = &ext2_xip_file_operations; - } else if (test_opt(inode->i_sb, NOBH)) { - inode->i_mapping->a_ops = &ext2_nobh_aops; - inode->i_fop = &ext2_file_operations; - } else { - inode->i_mapping->a_ops = &ext2_aops; - inode->i_fop = &ext2_file_operations; - } - mark_inode_dirty(inode); - err = ext2_add_nondir(dentry, inode); + struct inode *inode; + + dquot_initialize(dir); + + inode = ext2_new_inode(dir, mode, &dentry->d_name); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &ext2_file_inode_operations; + if (ext2_use_xip(inode->i_sb)) { + inode->i_mapping->a_ops = &ext2_aops_xip; + inode->i_fop = &ext2_xip_file_operations; + } else if (test_opt(inode->i_sb, NOBH)) { + inode->i_mapping->a_ops = &ext2_nobh_aops; + inode->i_fop = &ext2_file_operations; + } else { + inode->i_mapping->a_ops = &ext2_aops; + inode->i_fop = &ext2_file_operations; } - return err; + mark_inode_dirty(inode); + return ext2_add_nondir(dentry, inode); } -static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) +static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct inode *inode = ext2_new_inode(dir, mode, NULL); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &ext2_file_inode_operations; + if (ext2_use_xip(inode->i_sb)) { + inode->i_mapping->a_ops = &ext2_aops_xip; + inode->i_fop = &ext2_xip_file_operations; + } else if (test_opt(inode->i_sb, NOBH)) { + inode->i_mapping->a_ops = &ext2_nobh_aops; + inode->i_fop = &ext2_file_operations; + } else { + inode->i_mapping->a_ops = &ext2_aops; + inode->i_fop = &ext2_file_operations; + } + mark_inode_dirty(inode); + d_tmpfile(dentry, inode); + unlock_new_inode(inode); + return 0; +} + +static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode * inode; int err; @@ -133,7 +150,9 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_ if (!new_valid_dev(rdev)) return -EINVAL; - inode = ext2_new_inode (dir, mode); + dquot_initialize(dir); + + inode = ext2_new_inode (dir, mode, &dentry->d_name); err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); @@ -157,7 +176,9 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry, if (l > sb->s_blocksize) goto out; - inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO); + dquot_initialize(dir); + + inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO, &dentry->d_name); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out; @@ -186,6 +207,7 @@ out: out_fail: inode_dec_link_count(inode); + unlock_new_inode(inode); iput (inode); goto out; } @@ -194,28 +216,34 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; + int err; - if (inode->i_nlink >= EXT2_LINK_MAX) - return -EMLINK; + dquot_initialize(dir); inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); - atomic_inc(&inode->i_count); + ihold(inode); - return ext2_add_nondir(dentry, inode); + err = ext2_add_link(dentry, inode); + if (!err) { + d_instantiate(dentry, inode); + return 0; + } + inode_dec_link_count(inode); + iput(inode); + return err; } -static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) +static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) { struct inode * inode; - int err = -EMLINK; + int err; - if (dir->i_nlink >= EXT2_LINK_MAX) - goto out; + dquot_initialize(dir); inode_inc_link_count(dir); - inode = ext2_new_inode (dir, S_IFDIR | mode); + inode = ext2_new_inode(dir, S_IFDIR | mode, &dentry->d_name); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_dir; @@ -237,6 +265,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) if (err) goto out_fail; + unlock_new_inode(inode); d_instantiate(dentry, inode); out: return err; @@ -244,6 +273,7 @@ out: out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); out_dir: inode_dec_link_count(dir); @@ -257,7 +287,9 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry) struct page * page; int err = -ENOENT; - de = ext2_find_entry (dir, dentry, &page); + dquot_initialize(dir); + + de = ext2_find_entry (dir, &dentry->d_name, &page); if (!de) goto out; @@ -299,7 +331,10 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, struct ext2_dir_entry_2 * old_de; int err = -ENOENT; - old_de = ext2_find_entry (old_dir, old_dentry, &old_page); + dquot_initialize(old_dir); + dquot_initialize(new_dir); + + old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page); if (!old_de) goto out; @@ -319,27 +354,18 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, goto out_dir; err = -ENOENT; - new_de = ext2_find_entry (new_dir, new_dentry, &new_page); + new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page); if (!new_de) goto out_dir; - inode_inc_link_count(old_inode); - ext2_set_link(new_dir, new_de, new_page, old_inode); + ext2_set_link(new_dir, new_de, new_page, old_inode, 1); new_inode->i_ctime = CURRENT_TIME_SEC; if (dir_de) - new_inode->i_nlink--; + drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { - if (dir_de) { - err = -EMLINK; - if (new_dir->i_nlink >= EXT2_LINK_MAX) - goto out_dir; - } - inode_inc_link_count(old_inode); err = ext2_add_link(new_dentry, old_inode); - if (err) { - inode_dec_link_count(old_inode); + if (err) goto out_dir; - } if (dir_de) inode_inc_link_count(new_dir); } @@ -347,15 +373,19 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, /* * Like most other Unix systems, set the ctime for inodes on a * rename. - * inode_dec_link_count() will mark the inode dirty. */ old_inode->i_ctime = CURRENT_TIME_SEC; + mark_inode_dirty(old_inode); ext2_delete_entry (old_de, old_page); - inode_dec_link_count(old_inode); if (dir_de) { - ext2_set_link(old_inode, dir_de, dir_page, new_dir); + if (old_dir != new_dir) + ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0); + else { + kunmap(dir_page); + page_cache_release(dir_page); + } inode_dec_link_count(old_dir); } return 0; @@ -373,7 +403,7 @@ out: return err; } -struct inode_operations ext2_dir_inode_operations = { +const struct inode_operations ext2_dir_inode_operations = { .create = ext2_create, .lookup = ext2_lookup, .link = ext2_link, @@ -390,10 +420,12 @@ struct inode_operations ext2_dir_inode_operations = { .removexattr = generic_removexattr, #endif .setattr = ext2_setattr, - .permission = ext2_permission, + .get_acl = ext2_get_acl, + .set_acl = ext2_set_acl, + .tmpfile = ext2_tmpfile, }; -struct inode_operations ext2_special_inode_operations = { +const struct inode_operations ext2_special_inode_operations = { #ifdef CONFIG_EXT2_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, @@ -401,5 +433,6 @@ struct inode_operations ext2_special_inode_operations = { .removexattr = generic_removexattr, #endif .setattr = ext2_setattr, - .permission = ext2_permission, + .get_acl = ext2_get_acl, + .set_acl = ext2_set_acl, }; diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 513cd421ac0..3750031cfa2 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -25,10 +25,12 @@ #include <linux/parser.h> #include <linux/random.h> #include <linux/buffer_head.h> -#include <linux/smp_lock.h> +#include <linux/exportfs.h> #include <linux/vfs.h> #include <linux/seq_file.h> #include <linux/mount.h> +#include <linux/log2.h> +#include <linux/quotaops.h> #include <asm/uaccess.h> #include "ext2.h" #include "xattr.h" @@ -36,51 +38,67 @@ #include "xip.h" static void ext2_sync_super(struct super_block *sb, - struct ext2_super_block *es); + struct ext2_super_block *es, int wait); static int ext2_remount (struct super_block * sb, int * flags, char * data); static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); +static int ext2_sync_fs(struct super_block *sb, int wait); +static int ext2_freeze(struct super_block *sb); +static int ext2_unfreeze(struct super_block *sb); -void ext2_error (struct super_block * sb, const char * function, - const char * fmt, ...) +void ext2_error(struct super_block *sb, const char *function, + const char *fmt, ...) { + struct va_format vaf; va_list args; struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_super_block *es = sbi->s_es; if (!(sb->s_flags & MS_RDONLY)) { + spin_lock(&sbi->s_lock); sbi->s_mount_state |= EXT2_ERROR_FS; - es->s_state = - cpu_to_le16(le16_to_cpu(es->s_state) | EXT2_ERROR_FS); - ext2_sync_super(sb, es); + es->s_state |= cpu_to_le16(EXT2_ERROR_FS); + spin_unlock(&sbi->s_lock); + ext2_sync_super(sb, es, 1); } va_start(args, fmt); - printk(KERN_CRIT "EXT2-fs error (device %s): %s: ",sb->s_id, function); - vprintk(fmt, args); - printk("\n"); + + vaf.fmt = fmt; + vaf.va = &args; + + printk(KERN_CRIT "EXT2-fs (%s): error: %s: %pV\n", + sb->s_id, function, &vaf); + va_end(args); if (test_opt(sb, ERRORS_PANIC)) - panic("EXT2-fs panic from previous error\n"); + panic("EXT2-fs: panic from previous error\n"); if (test_opt(sb, ERRORS_RO)) { - printk("Remounting filesystem read-only\n"); + ext2_msg(sb, KERN_CRIT, + "error: remounting filesystem read-only"); sb->s_flags |= MS_RDONLY; } } -void ext2_warning (struct super_block * sb, const char * function, - const char * fmt, ...) +void ext2_msg(struct super_block *sb, const char *prefix, + const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - printk(KERN_WARNING "EXT2-fs warning (device %s): %s: ", - sb->s_id, function); - vprintk(fmt, args); - printk("\n"); + + vaf.fmt = fmt; + vaf.va = &args; + + printk("%sEXT2-fs (%s): %pV\n", prefix, sb->s_id, &vaf); + va_end(args); } +/* + * This must be called with sbi->s_lock held. + */ void ext2_update_dynamic_rev(struct super_block *sb) { struct ext2_super_block *es = EXT2_SB(sb)->s_es; @@ -88,9 +106,9 @@ void ext2_update_dynamic_rev(struct super_block *sb) if (le32_to_cpu(es->s_rev_level) > EXT2_GOOD_OLD_REV) return; - ext2_warning(sb, __FUNCTION__, - "updating to rev %d because of new feature flag, " - "running e2fsck is recommended", + ext2_msg(sb, KERN_WARNING, + "warning: updating to rev %d because of " + "new feature flag, running e2fsck is recommended", EXT2_DYNAMIC_REV); es->s_first_ino = cpu_to_le32(EXT2_GOOD_OLD_FIRST_INO); @@ -112,12 +130,16 @@ static void ext2_put_super (struct super_block * sb) int i; struct ext2_sb_info *sbi = EXT2_SB(sb); + dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + ext2_xattr_put_super(sb); if (!(sb->s_flags & MS_RDONLY)) { struct ext2_super_block *es = sbi->s_es; + spin_lock(&sbi->s_lock); es->s_state = cpu_to_le16(sbi->s_mount_state); - ext2_sync_super(sb, es); + spin_unlock(&sbi->s_lock); + ext2_sync_super(sb, es, 1); } db_count = sbi->s_gdb_count; for (i = 0; i < db_count; i++) @@ -130,53 +152,53 @@ static void ext2_put_super (struct super_block * sb) percpu_counter_destroy(&sbi->s_dirs_counter); brelse (sbi->s_sbh); sb->s_fs_info = NULL; + kfree(sbi->s_blockgroup_lock); kfree(sbi); - - return; } -static kmem_cache_t * ext2_inode_cachep; +static struct kmem_cache * ext2_inode_cachep; static struct inode *ext2_alloc_inode(struct super_block *sb) { struct ext2_inode_info *ei; - ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, SLAB_KERNEL); + ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); if (!ei) return NULL; -#ifdef CONFIG_EXT2_FS_POSIX_ACL - ei->i_acl = EXT2_ACL_NOT_CACHED; - ei->i_default_acl = EXT2_ACL_NOT_CACHED; -#endif + ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; return &ei->vfs_inode; } -static void ext2_destroy_inode(struct inode *inode) +static void ext2_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); } -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void ext2_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ext2_i_callback); +} + +static void init_once(void *foo) { struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { - rwlock_init(&ei->i_meta_lock); + rwlock_init(&ei->i_meta_lock); #ifdef CONFIG_EXT2_FS_XATTR - init_rwsem(&ei->xattr_sem); + init_rwsem(&ei->xattr_sem); #endif - inode_init_once(&ei->vfs_inode); - } + mutex_init(&ei->truncate_mutex); + inode_init_once(&ei->vfs_inode); } - -static int init_inodecache(void) + +static int __init init_inodecache(void) { ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", sizeof(struct ext2_inode_info), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), - init_once, NULL); + init_once); if (ext2_inode_cachep == NULL) return -ENOMEM; return 0; @@ -184,31 +206,79 @@ static int init_inodecache(void) static void destroy_inodecache(void) { + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); kmem_cache_destroy(ext2_inode_cachep); } -static void ext2_clear_inode(struct inode *inode) +static int ext2_show_options(struct seq_file *seq, struct dentry *root) { -#ifdef CONFIG_EXT2_FS_POSIX_ACL - struct ext2_inode_info *ei = EXT2_I(inode); + struct super_block *sb = root->d_sb; + struct ext2_sb_info *sbi = EXT2_SB(sb); + struct ext2_super_block *es = sbi->s_es; + unsigned long def_mount_opts; + + spin_lock(&sbi->s_lock); + def_mount_opts = le32_to_cpu(es->s_default_mount_opts); + + if (sbi->s_sb_block != 1) + seq_printf(seq, ",sb=%lu", sbi->s_sb_block); + if (test_opt(sb, MINIX_DF)) + seq_puts(seq, ",minixdf"); + if (test_opt(sb, GRPID)) + seq_puts(seq, ",grpid"); + if (!test_opt(sb, GRPID) && (def_mount_opts & EXT2_DEFM_BSDGROUPS)) + seq_puts(seq, ",nogrpid"); + if (!uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT2_DEF_RESUID)) || + le16_to_cpu(es->s_def_resuid) != EXT2_DEF_RESUID) { + seq_printf(seq, ",resuid=%u", + from_kuid_munged(&init_user_ns, sbi->s_resuid)); + } + if (!gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT2_DEF_RESGID)) || + le16_to_cpu(es->s_def_resgid) != EXT2_DEF_RESGID) { + seq_printf(seq, ",resgid=%u", + from_kgid_munged(&init_user_ns, sbi->s_resgid)); + } + if (test_opt(sb, ERRORS_RO)) { + int def_errors = le16_to_cpu(es->s_errors); - if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) { - posix_acl_release(ei->i_acl); - ei->i_acl = EXT2_ACL_NOT_CACHED; + if (def_errors == EXT2_ERRORS_PANIC || + def_errors == EXT2_ERRORS_CONTINUE) { + seq_puts(seq, ",errors=remount-ro"); + } } - if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) { - posix_acl_release(ei->i_default_acl); - ei->i_default_acl = EXT2_ACL_NOT_CACHED; + if (test_opt(sb, ERRORS_CONT)) + seq_puts(seq, ",errors=continue"); + if (test_opt(sb, ERRORS_PANIC)) + seq_puts(seq, ",errors=panic"); + if (test_opt(sb, NO_UID32)) + seq_puts(seq, ",nouid32"); + if (test_opt(sb, DEBUG)) + seq_puts(seq, ",debug"); + if (test_opt(sb, OLDALLOC)) + seq_puts(seq, ",oldalloc"); + +#ifdef CONFIG_EXT2_FS_XATTR + if (test_opt(sb, XATTR_USER)) + seq_puts(seq, ",user_xattr"); + if (!test_opt(sb, XATTR_USER) && + (def_mount_opts & EXT2_DEFM_XATTR_USER)) { + seq_puts(seq, ",nouser_xattr"); } #endif -} -static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) -{ - struct ext2_sb_info *sbi = EXT2_SB(vfs->mnt_sb); +#ifdef CONFIG_EXT2_FS_POSIX_ACL + if (test_opt(sb, POSIX_ACL)) + seq_puts(seq, ",acl"); + if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT2_DEFM_ACL)) + seq_puts(seq, ",noacl"); +#endif - if (sbi->s_mount_opt & EXT2_MOUNT_GRPID) - seq_puts(seq, ",grpid"); + if (test_opt(sb, NOBH)) + seq_puts(seq, ",nobh"); #if defined(CONFIG_QUOTA) if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA) @@ -223,6 +293,10 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",xip"); #endif + if (!test_opt(sb, RESERVATION)) + seq_puts(seq, ",noreservation"); + + spin_unlock(&sbi->s_lock); return 0; } @@ -231,18 +305,17 @@ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, siz static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); #endif -static struct super_operations ext2_sops = { +static const struct super_operations ext2_sops = { .alloc_inode = ext2_alloc_inode, .destroy_inode = ext2_destroy_inode, - .read_inode = ext2_read_inode, .write_inode = ext2_write_inode, - .put_inode = ext2_put_inode, - .delete_inode = ext2_delete_inode, + .evict_inode = ext2_evict_inode, .put_super = ext2_put_super, - .write_super = ext2_write_super, + .sync_fs = ext2_sync_fs, + .freeze_fs = ext2_freeze, + .unfreeze_fs = ext2_unfreeze, .statfs = ext2_statfs, .remount_fs = ext2_remount, - .clear_inode = ext2_clear_inode, .show_options = ext2_show_options, #ifdef CONFIG_QUOTA .quota_read = ext2_quota_read, @@ -250,52 +323,50 @@ static struct super_operations ext2_sops = { #endif }; -static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp) +static struct inode *ext2_nfs_get_inode(struct super_block *sb, + u64 ino, u32 generation) { - __u32 *objp = vobjp; - unsigned long ino = objp[0]; - __u32 generation = objp[1]; struct inode *inode; - struct dentry *result; if (ino < EXT2_FIRST_INO(sb) && ino != EXT2_ROOT_INO) return ERR_PTR(-ESTALE); if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count)) return ERR_PTR(-ESTALE); - /* iget isn't really right if the inode is currently unallocated!! - * ext2_read_inode currently does appropriate checks, but - * it might be "neater" to call ext2_get_inode first and check - * if the inode is valid..... + /* + * ext2_iget isn't quite right if the inode is currently unallocated! + * However ext2_iget currently does appropriate checks to handle stale + * inodes so everything is OK. */ - inode = iget(sb, ino); - if (inode == NULL) - return ERR_PTR(-ENOMEM); - if (is_bad_inode(inode) || - (generation && inode->i_generation != generation)) { + inode = ext2_iget(sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (generation && inode->i_generation != generation) { /* we didn't find the right inode.. */ iput(inode); return ERR_PTR(-ESTALE); } - /* now to find a dentry. - * If possible, get a well-connected one - */ - result = d_alloc_anon(inode); - if (!result) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - return result; + return inode; } -/* Yes, most of these are left as NULL!! - * A NULL value implies the default, which works with ext2-like file - * systems, but can be improved upon. - * Currently only get_parent is required. - */ -static struct export_operations ext2_export_ops = { +static struct dentry *ext2_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + ext2_nfs_get_inode); +} + +static struct dentry *ext2_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + ext2_nfs_get_inode); +} + +static const struct export_operations ext2_export_ops = { + .fh_to_dentry = ext2_fh_to_dentry, + .fh_to_parent = ext2_fh_to_parent, .get_parent = ext2_get_parent, - .get_dentry = ext2_get_dentry, }; static unsigned long get_sb_block(void **data) @@ -324,10 +395,10 @@ enum { Opt_err_ro, Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, - Opt_usrquota, Opt_grpquota + Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_bsd_df, "bsddf"}, {Opt_minix_df, "minixdf"}, {Opt_grpid, "grpid"}, @@ -356,16 +427,19 @@ static match_table_t tokens = { {Opt_ignore, "noquota"}, {Opt_quota, "quota"}, {Opt_usrquota, "usrquota"}, + {Opt_reservation, "reservation"}, + {Opt_noreservation, "noreservation"}, {Opt_err, NULL} }; -static int parse_options (char * options, - struct ext2_sb_info *sbi) +static int parse_options(char *options, struct super_block *sb) { - char * p; + char *p; + struct ext2_sb_info *sbi = EXT2_SB(sb); substring_t args[MAX_OPT_ARGS]; - unsigned long kind = EXT2_MOUNT_ERRORS_CONT; int option; + kuid_t uid; + kgid_t gid; if (!options) return 1; @@ -392,25 +466,42 @@ static int parse_options (char * options, case Opt_resuid: if (match_int(&args[0], &option)) return 0; - sbi->s_resuid = option; + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) { + ext2_msg(sb, KERN_ERR, "Invalid uid value %d", option); + return 0; + + } + sbi->s_resuid = uid; break; case Opt_resgid: if (match_int(&args[0], &option)) return 0; - sbi->s_resgid = option; + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) { + ext2_msg(sb, KERN_ERR, "Invalid gid value %d", option); + return 0; + } + sbi->s_resgid = gid; break; case Opt_sb: /* handled by get_sb_block() instead of here */ /* *sb_block = match_int(&args[0]); */ break; case Opt_err_panic: - kind = EXT2_MOUNT_ERRORS_PANIC; + clear_opt (sbi->s_mount_opt, ERRORS_CONT); + clear_opt (sbi->s_mount_opt, ERRORS_RO); + set_opt (sbi->s_mount_opt, ERRORS_PANIC); break; case Opt_err_ro: - kind = EXT2_MOUNT_ERRORS_RO; + clear_opt (sbi->s_mount_opt, ERRORS_CONT); + clear_opt (sbi->s_mount_opt, ERRORS_PANIC); + set_opt (sbi->s_mount_opt, ERRORS_RO); break; case Opt_err_cont: - kind = EXT2_MOUNT_ERRORS_CONT; + clear_opt (sbi->s_mount_opt, ERRORS_RO); + clear_opt (sbi->s_mount_opt, ERRORS_PANIC); + set_opt (sbi->s_mount_opt, ERRORS_CONT); break; case Opt_nouid32: set_opt (sbi->s_mount_opt, NO_UID32); @@ -440,7 +531,8 @@ static int parse_options (char * options, #else case Opt_user_xattr: case Opt_nouser_xattr: - printk("EXT2 (no)user_xattr options not supported\n"); + ext2_msg(sb, KERN_INFO, "(no)user_xattr options" + "not supported"); break; #endif #ifdef CONFIG_EXT2_FS_POSIX_ACL @@ -453,14 +545,15 @@ static int parse_options (char * options, #else case Opt_acl: case Opt_noacl: - printk("EXT2 (no)acl options not supported\n"); + ext2_msg(sb, KERN_INFO, + "(no)acl options not supported"); break; #endif case Opt_xip: #ifdef CONFIG_EXT2_FS_XIP set_opt (sbi->s_mount_opt, XIP); #else - printk("EXT2 xip option not supported\n"); + ext2_msg(sb, KERN_INFO, "xip option not supported"); #endif break; @@ -477,19 +570,25 @@ static int parse_options (char * options, case Opt_quota: case Opt_usrquota: case Opt_grpquota: - printk(KERN_ERR - "EXT2-fs: quota operations not supported.\n"); - + ext2_msg(sb, KERN_INFO, + "quota operations not supported"); break; #endif + case Opt_reservation: + set_opt(sbi->s_mount_opt, RESERVATION); + ext2_msg(sb, KERN_INFO, "reservations ON"); + break; + case Opt_noreservation: + clear_opt(sbi->s_mount_opt, RESERVATION); + ext2_msg(sb, KERN_INFO, "reservations OFF"); + break; case Opt_ignore: break; default: return 0; } } - sbi->s_mount_opt |= kind; return 1; } @@ -501,34 +600,39 @@ static int ext2_setup_super (struct super_block * sb, struct ext2_sb_info *sbi = EXT2_SB(sb); if (le32_to_cpu(es->s_rev_level) > EXT2_MAX_SUPP_REV) { - printk ("EXT2-fs warning: revision level too high, " - "forcing read-only mode\n"); + ext2_msg(sb, KERN_ERR, + "error: revision level too high, " + "forcing read-only mode"); res = MS_RDONLY; } if (read_only) return res; if (!(sbi->s_mount_state & EXT2_VALID_FS)) - printk ("EXT2-fs warning: mounting unchecked fs, " - "running e2fsck is recommended\n"); + ext2_msg(sb, KERN_WARNING, + "warning: mounting unchecked fs, " + "running e2fsck is recommended"); else if ((sbi->s_mount_state & EXT2_ERROR_FS)) - printk ("EXT2-fs warning: mounting fs with errors, " - "running e2fsck is recommended\n"); + ext2_msg(sb, KERN_WARNING, + "warning: mounting fs with errors, " + "running e2fsck is recommended"); else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && le16_to_cpu(es->s_mnt_count) >= (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) - printk ("EXT2-fs warning: maximal mount count reached, " - "running e2fsck is recommended\n"); + ext2_msg(sb, KERN_WARNING, + "warning: maximal mount count reached, " + "running e2fsck is recommended"); else if (le32_to_cpu(es->s_checkinterval) && - (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds())) - printk ("EXT2-fs warning: checktime reached, " - "running e2fsck is recommended\n"); + (le32_to_cpu(es->s_lastcheck) + + le32_to_cpu(es->s_checkinterval) <= get_seconds())) + ext2_msg(sb, KERN_WARNING, + "warning: checktime reached, " + "running e2fsck is recommended"); if (!le16_to_cpu(es->s_max_mnt_count)) es->s_max_mnt_count = cpu_to_le16(EXT2_DFL_MAX_MNT_COUNT); - es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1); - ext2_write_super(sb); + le16_add_cpu(&es->s_mnt_count, 1); if (test_opt (sb, DEBUG)) - printk ("[EXT II FS %s, %s, bs=%lu, fs=%lu, gc=%lu, " - "bpg=%lu, ipg=%lu, mo=%04lx]\n", + ext2_msg(sb, KERN_INFO, "%s, %s, bs=%lu, fs=%lu, gc=%lu, " + "bpg=%lu, ipg=%lu, mo=%04lx]", EXT2FS_VERSION, EXT2FS_DATE, sb->s_blocksize, sbi->s_frag_size, sbi->s_groups_count, @@ -538,27 +642,24 @@ static int ext2_setup_super (struct super_block * sb, return res; } -static int ext2_check_descriptors (struct super_block * sb) +static int ext2_check_descriptors(struct super_block *sb) { int i; - int desc_block = 0; struct ext2_sb_info *sbi = EXT2_SB(sb); - unsigned long first_block = le32_to_cpu(sbi->s_es->s_first_data_block); - unsigned long last_block; - struct ext2_group_desc * gdp = NULL; ext2_debug ("Checking group descriptors"); - for (i = 0; i < sbi->s_groups_count; i++) - { + for (i = 0; i < sbi->s_groups_count; i++) { + struct ext2_group_desc *gdp = ext2_get_group_desc(sb, i, NULL); + ext2_fsblk_t first_block = ext2_group_first_block_no(sb, i); + ext2_fsblk_t last_block; + if (i == sbi->s_groups_count - 1) last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; else last_block = first_block + (EXT2_BLOCKS_PER_GROUP(sb) - 1); - if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0) - gdp = (struct ext2_group_desc *) sbi->s_group_desc[desc_block++]->b_data; if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || le32_to_cpu(gdp->bg_block_bitmap) > last_block) { @@ -578,7 +679,7 @@ static int ext2_check_descriptors (struct super_block * sb) return 0; } if (le32_to_cpu(gdp->bg_inode_table) < first_block || - le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group > + le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 > last_block) { ext2_error (sb, "ext2_check_descriptors", @@ -587,14 +688,10 @@ static int ext2_check_descriptors (struct super_block * sb) i, (unsigned long) le32_to_cpu(gdp->bg_inode_table)); return 0; } - first_block += EXT2_BLOCKS_PER_GROUP(sb); - gdp++; } return 1; } -#define log2(n) ffz(~(n)) - /* * Maximal file size. There is a direct, and {,double-,triple-}indirect * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. @@ -603,11 +700,31 @@ static int ext2_check_descriptors (struct super_block * sb) static loff_t ext2_max_size(int bits) { loff_t res = EXT2_NDIR_BLOCKS; - /* This constant is calculated to be the largest file size for a - * dense, 4k-blocksize file such that the total number of + int meta_blocks; + loff_t upper_limit; + + /* This is calculated to be the largest file size for a + * dense, file such that the total number of * sectors in the file, including data and all indirect blocks, - * does not exceed 2^32. */ - const loff_t upper_limit = 0x1ff7fffd000LL; + * does not exceed 2^32 -1 + * __u32 i_blocks representing the total number of + * 512 bytes blocks of the file + */ + upper_limit = (1LL << 32) - 1; + + /* total blocks in file system block size */ + upper_limit >>= (bits - 9); + + + /* indirect blocks */ + meta_blocks = 1; + /* double indirect blocks */ + meta_blocks += 1 + (1LL << (bits-2)); + /* tripple indirect blocks */ + meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); + + upper_limit -= meta_blocks; + upper_limit <<= bits; res += 1LL << (bits-2); res += 1LL << (2*(bits-2)); @@ -615,6 +732,10 @@ static loff_t ext2_max_size(int bits) res <<= bits; if (res > upper_limit) res = upper_limit; + + if (res > MAX_LFS_FILESIZE) + res = MAX_LFS_FILESIZE; + return res; } @@ -623,10 +744,9 @@ static unsigned long descriptor_loc(struct super_block *sb, int nr) { struct ext2_sb_info *sbi = EXT2_SB(sb); - unsigned long bg, first_data_block, first_meta_bg; + unsigned long bg, first_meta_bg; int has_super = 0; - first_data_block = le32_to_cpu(sbi->s_es->s_first_data_block); first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); if (!EXT2_HAS_INCOMPAT_FEATURE(sb, EXT2_FEATURE_INCOMPAT_META_BG) || @@ -635,7 +755,8 @@ static unsigned long descriptor_loc(struct super_block *sb, bg = sbi->s_desc_per_block * nr; if (ext2_bg_has_super(sb, bg)) has_super = 1; - return (first_data_block + has_super + (bg * sbi->s_blocks_per_group)); + + return ext2_group_first_block_no(sb, bg) + has_super; } static int ext2_fill_super(struct super_block *sb, void *data, int silent) @@ -649,15 +770,28 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) unsigned long logic_sb_block; unsigned long offset = 0; unsigned long def_mount_opts; + long ret = -EINVAL; int blocksize = BLOCK_SIZE; int db_count; int i, j; __le32 features; + int err; + err = -ENOMEM; sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) - return -ENOMEM; + goto failed; + + sbi->s_blockgroup_lock = + kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); + if (!sbi->s_blockgroup_lock) { + kfree(sbi); + goto failed; + } sb->s_fs_info = sbi; + sbi->s_sb_block = sb_block; + + spin_lock_init(&sbi->s_lock); /* * See what the current blocksize for the device is, and @@ -668,7 +802,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) */ blocksize = sb_min_blocksize(sb, BLOCK_SIZE); if (!blocksize) { - printk ("EXT2-fs: unable to set blocksize\n"); + ext2_msg(sb, KERN_ERR, "error: unable to set blocksize"); goto failed_sbi; } @@ -684,7 +818,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) } if (!(bh = sb_bread(sb, logic_sb_block))) { - printk ("EXT2-fs: unable to read superblock\n"); + ext2_msg(sb, KERN_ERR, "error: unable to read superblock"); goto failed_sbi; } /* @@ -706,20 +840,28 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, GRPID); if (def_mount_opts & EXT2_DEFM_UID16) set_opt(sbi->s_mount_opt, NO_UID32); +#ifdef CONFIG_EXT2_FS_XATTR if (def_mount_opts & EXT2_DEFM_XATTR_USER) set_opt(sbi->s_mount_opt, XATTR_USER); +#endif +#ifdef CONFIG_EXT2_FS_POSIX_ACL if (def_mount_opts & EXT2_DEFM_ACL) set_opt(sbi->s_mount_opt, POSIX_ACL); +#endif if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC) set_opt(sbi->s_mount_opt, ERRORS_PANIC); - else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_RO) + else if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_CONTINUE) + set_opt(sbi->s_mount_opt, ERRORS_CONT); + else set_opt(sbi->s_mount_opt, ERRORS_RO); - sbi->s_resuid = le16_to_cpu(es->s_def_resuid); - sbi->s_resgid = le16_to_cpu(es->s_def_resgid); + sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); + sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); - if (!parse_options ((char *) data, sbi)) + set_opt(sbi->s_mount_opt, RESERVATION); + + if (!parse_options((char *) data, sb)) goto failed_mount; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | @@ -733,8 +875,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) || EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) || EXT2_HAS_INCOMPAT_FEATURE(sb, ~0U))) - printk("EXT2-fs warning: feature flags set on rev 0 fs, " - "running e2fsck is recommended\n"); + ext2_msg(sb, KERN_WARNING, + "warning: feature flags set on rev 0 fs, " + "running e2fsck is recommended"); /* * Check feature flags regardless of the revision level, since we * previously didn't change the revision level when setting the flags, @@ -742,25 +885,25 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) */ features = EXT2_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP); if (features) { - printk("EXT2-fs: %s: couldn't mount because of " - "unsupported optional features (%x).\n", - sb->s_id, le32_to_cpu(features)); + ext2_msg(sb, KERN_ERR, "error: couldn't mount because of " + "unsupported optional features (%x)", + le32_to_cpu(features)); goto failed_mount; } if (!(sb->s_flags & MS_RDONLY) && (features = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP))){ - printk("EXT2-fs: %s: couldn't mount RDWR because of " - "unsupported optional features (%x).\n", - sb->s_id, le32_to_cpu(features)); + ext2_msg(sb, KERN_ERR, "error: couldn't mount RDWR because of " + "unsupported optional features (%x)", + le32_to_cpu(features)); goto failed_mount; } blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); - if ((ext2_use_xip(sb)) && ((blocksize != PAGE_SIZE) || - (sb->s_blocksize != blocksize))) { + if (ext2_use_xip(sb) && blocksize != PAGE_SIZE) { if (!silent) - printk("XIP: Unsupported blocksize\n"); + ext2_msg(sb, KERN_ERR, + "error: unsupported blocksize for xip"); goto failed_mount; } @@ -769,7 +912,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) brelse(bh); if (!sb_set_blocksize(sb, blocksize)) { - printk(KERN_ERR "EXT2-fs: blocksize too small for device.\n"); + ext2_msg(sb, KERN_ERR, + "error: bad blocksize %d", blocksize); goto failed_sbi; } @@ -777,19 +921,20 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) offset = (sb_block*BLOCK_SIZE) % blocksize; bh = sb_bread(sb, logic_sb_block); if(!bh) { - printk("EXT2-fs: Couldn't read superblock on " - "2nd try.\n"); + ext2_msg(sb, KERN_ERR, "error: couldn't read" + "superblock on 2nd try"); goto failed_sbi; } es = (struct ext2_super_block *) (((char *)bh->b_data) + offset); sbi->s_es = es; if (es->s_magic != cpu_to_le16(EXT2_SUPER_MAGIC)) { - printk ("EXT2-fs: Magic mismatch, very weird !\n"); + ext2_msg(sb, KERN_ERR, "error: magic mismatch"); goto failed_mount; } } sb->s_maxbytes = ext2_max_size(sb->s_blocksize_bits); + sb->s_max_links = EXT2_LINK_MAX; if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV) { sbi->s_inode_size = EXT2_GOOD_OLD_INODE_SIZE; @@ -798,9 +943,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_inode_size = le16_to_cpu(es->s_inode_size); sbi->s_first_ino = le32_to_cpu(es->s_first_ino); if ((sbi->s_inode_size < EXT2_GOOD_OLD_INODE_SIZE) || - (sbi->s_inode_size & (sbi->s_inode_size - 1)) || + !is_power_of_2(sbi->s_inode_size) || (sbi->s_inode_size > blocksize)) { - printk ("EXT2-fs: unsupported inode size: %d\n", + ext2_msg(sb, KERN_ERR, + "error: unsupported inode size: %d", sbi->s_inode_size); goto failed_mount; } @@ -828,38 +974,42 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_sbh = bh; sbi->s_mount_state = le16_to_cpu(es->s_state); sbi->s_addr_per_block_bits = - log2 (EXT2_ADDR_PER_BLOCK(sb)); + ilog2 (EXT2_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = - log2 (EXT2_DESC_PER_BLOCK(sb)); + ilog2 (EXT2_DESC_PER_BLOCK(sb)); if (sb->s_magic != EXT2_SUPER_MAGIC) goto cantfind_ext2; if (sb->s_blocksize != bh->b_size) { if (!silent) - printk ("VFS: Unsupported blocksize on dev " - "%s.\n", sb->s_id); + ext2_msg(sb, KERN_ERR, "error: unsupported blocksize"); goto failed_mount; } if (sb->s_blocksize != sbi->s_frag_size) { - printk ("EXT2-fs: fragsize %lu != blocksize %lu (not supported yet)\n", + ext2_msg(sb, KERN_ERR, + "error: fragsize %lu != blocksize %lu" + "(not supported yet)", sbi->s_frag_size, sb->s_blocksize); goto failed_mount; } if (sbi->s_blocks_per_group > sb->s_blocksize * 8) { - printk ("EXT2-fs: #blocks per group too big: %lu\n", + ext2_msg(sb, KERN_ERR, + "error: #blocks per group too big: %lu", sbi->s_blocks_per_group); goto failed_mount; } if (sbi->s_frags_per_group > sb->s_blocksize * 8) { - printk ("EXT2-fs: #fragments per group too big: %lu\n", + ext2_msg(sb, KERN_ERR, + "error: #fragments per group too big: %lu", sbi->s_frags_per_group); goto failed_mount; } if (sbi->s_inodes_per_group > sb->s_blocksize * 8) { - printk ("EXT2-fs: #inodes per group too big: %lu\n", + ext2_msg(sb, KERN_ERR, + "error: #inodes per group too big: %lu", sbi->s_inodes_per_group); goto failed_mount; } @@ -873,70 +1023,105 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) EXT2_DESC_PER_BLOCK(sb); sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL); if (sbi->s_group_desc == NULL) { - printk ("EXT2-fs: not enough memory\n"); + ext2_msg(sb, KERN_ERR, "error: not enough memory"); goto failed_mount; } - bgl_lock_init(&sbi->s_blockgroup_lock); - sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts), - GFP_KERNEL); + bgl_lock_init(sbi->s_blockgroup_lock); + sbi->s_debts = kcalloc(sbi->s_groups_count, sizeof(*sbi->s_debts), GFP_KERNEL); if (!sbi->s_debts) { - printk ("EXT2-fs: not enough memory\n"); + ext2_msg(sb, KERN_ERR, "error: not enough memory"); goto failed_mount_group_desc; } - memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts)); for (i = 0; i < db_count; i++) { block = descriptor_loc(sb, logic_sb_block, i); sbi->s_group_desc[i] = sb_bread(sb, block); if (!sbi->s_group_desc[i]) { for (j = 0; j < i; j++) brelse (sbi->s_group_desc[j]); - printk ("EXT2-fs: unable to read group descriptors\n"); + ext2_msg(sb, KERN_ERR, + "error: unable to read group descriptors"); goto failed_mount_group_desc; } } if (!ext2_check_descriptors (sb)) { - printk ("EXT2-fs: group descriptors corrupted!\n"); + ext2_msg(sb, KERN_ERR, "group descriptors corrupted"); goto failed_mount2; } sbi->s_gdb_count = db_count; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); - percpu_counter_init(&sbi->s_freeblocks_counter, + /* per fileystem reservation list head & lock */ + spin_lock_init(&sbi->s_rsv_window_lock); + sbi->s_rsv_window_root = RB_ROOT; + /* + * Add a single, static dummy reservation to the start of the + * reservation window list --- it gives us a placeholder for + * append-at-start-of-list which makes the allocation logic + * _much_ simpler. + */ + sbi->s_rsv_window_head.rsv_start = EXT2_RESERVE_WINDOW_NOT_ALLOCATED; + sbi->s_rsv_window_head.rsv_end = EXT2_RESERVE_WINDOW_NOT_ALLOCATED; + sbi->s_rsv_window_head.rsv_alloc_hit = 0; + sbi->s_rsv_window_head.rsv_goal_size = 0; + ext2_rsv_window_add(sb, &sbi->s_rsv_window_head); + + err = percpu_counter_init(&sbi->s_freeblocks_counter, ext2_count_free_blocks(sb)); - percpu_counter_init(&sbi->s_freeinodes_counter, + if (!err) { + err = percpu_counter_init(&sbi->s_freeinodes_counter, ext2_count_free_inodes(sb)); - percpu_counter_init(&sbi->s_dirs_counter, + } + if (!err) { + err = percpu_counter_init(&sbi->s_dirs_counter, ext2_count_dirs(sb)); + } + if (err) { + ext2_msg(sb, KERN_ERR, "error: insufficient memory"); + goto failed_mount3; + } /* * set up enough so that it can read an inode */ sb->s_op = &ext2_sops; sb->s_export_op = &ext2_export_ops; sb->s_xattr = ext2_xattr_handlers; - root = iget(sb, EXT2_ROOT_INO); - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - iput(root); - printk(KERN_ERR "EXT2-fs: get root inode failed\n"); + +#ifdef CONFIG_QUOTA + sb->dq_op = &dquot_operations; + sb->s_qcop = &dquot_quotactl_ops; +#endif + + root = ext2_iget(sb, EXT2_ROOT_INO); + if (IS_ERR(root)) { + ret = PTR_ERR(root); goto failed_mount3; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { - dput(sb->s_root); - sb->s_root = NULL; - printk(KERN_ERR "EXT2-fs: corrupt root inode, run e2fsck\n"); + iput(root); + ext2_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); + goto failed_mount3; + } + + sb->s_root = d_make_root(root); + if (!sb->s_root) { + ext2_msg(sb, KERN_ERR, "error: get root inode failed"); + ret = -ENOMEM; goto failed_mount3; } if (EXT2_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) - ext2_warning(sb, __FUNCTION__, - "mounting ext3 filesystem as ext2"); - ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY); + ext2_msg(sb, KERN_WARNING, + "warning: mounting ext3 filesystem as ext2"); + if (ext2_setup_super (sb, es, sb->s_flags & MS_RDONLY)) + sb->s_flags |= MS_RDONLY; + ext2_write_super(sb); return 0; cantfind_ext2: if (!silent) - printk("VFS: Can't find an ext2 filesystem on dev %s.\n", - sb->s_id); + ext2_msg(sb, KERN_ERR, + "error: can't find an ext2 filesystem on dev %s.", + sb->s_id); goto failed_mount; failed_mount3: percpu_counter_destroy(&sbi->s_freeblocks_counter); @@ -952,26 +1137,45 @@ failed_mount: brelse(bh); failed_sbi: sb->s_fs_info = NULL; + kfree(sbi->s_blockgroup_lock); kfree(sbi); - return -EINVAL; +failed: + return ret; } -static void ext2_commit_super (struct super_block * sb, - struct ext2_super_block * es) +static void ext2_clear_super_error(struct super_block *sb) { - es->s_wtime = cpu_to_le32(get_seconds()); - mark_buffer_dirty(EXT2_SB(sb)->s_sbh); - sb->s_dirt = 0; + struct buffer_head *sbh = EXT2_SB(sb)->s_sbh; + + if (buffer_write_io_error(sbh)) { + /* + * Oh, dear. A previous attempt to write the + * superblock failed. This could happen because the + * USB device was yanked out. Or it could happen to + * be a transient write error and maybe the block will + * be remapped. Nothing we can do but to retry the + * write and hope for the best. + */ + ext2_msg(sb, KERN_ERR, + "previous I/O error to superblock detected\n"); + clear_buffer_write_io_error(sbh); + set_buffer_uptodate(sbh); + } } -static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) +static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, + int wait) { + ext2_clear_super_error(sb); + spin_lock(&EXT2_SB(sb)->s_lock); es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); es->s_wtime = cpu_to_le32(get_seconds()); + /* unlock before we do IO */ + spin_unlock(&EXT2_SB(sb)->s_lock); mark_buffer_dirty(EXT2_SB(sb)->s_sbh); - sync_dirty_buffer(EXT2_SB(sb)->s_sbh); - sb->s_dirt = 0; + if (wait) + sync_dirty_buffer(EXT2_SB(sb)->s_sbh); } /* @@ -984,27 +1188,61 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es) * may have been checked while mounted and e2fsck may have * set s_state to EXT2_VALID_FS after some corrections. */ +static int ext2_sync_fs(struct super_block *sb, int wait) +{ + struct ext2_sb_info *sbi = EXT2_SB(sb); + struct ext2_super_block *es = EXT2_SB(sb)->s_es; + + /* + * Write quota structures to quota file, sync_blockdev() will write + * them to disk later + */ + dquot_writeback_dquots(sb, -1); + + spin_lock(&sbi->s_lock); + if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { + ext2_debug("setting valid to 0\n"); + es->s_state &= cpu_to_le16(~EXT2_VALID_FS); + } + spin_unlock(&sbi->s_lock); + ext2_sync_super(sb, es, wait); + return 0; +} -void ext2_write_super (struct super_block * sb) +static int ext2_freeze(struct super_block *sb) { - struct ext2_super_block * es; - lock_kernel(); - if (!(sb->s_flags & MS_RDONLY)) { - es = EXT2_SB(sb)->s_es; - - if (le16_to_cpu(es->s_state) & EXT2_VALID_FS) { - ext2_debug ("setting valid to 0\n"); - es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & - ~EXT2_VALID_FS); - es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb)); - es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb)); - es->s_mtime = cpu_to_le32(get_seconds()); - ext2_sync_super(sb, es); - } else - ext2_commit_super (sb, es); + struct ext2_sb_info *sbi = EXT2_SB(sb); + + /* + * Open but unlinked files present? Keep EXT2_VALID_FS flag cleared + * because we have unattached inodes and thus filesystem is not fully + * consistent. + */ + if (atomic_long_read(&sb->s_remove_count)) { + ext2_sync_fs(sb, 1); + return 0; } - sb->s_dirt = 0; - unlock_kernel(); + /* Set EXT2_FS_VALID flag */ + spin_lock(&sbi->s_lock); + sbi->s_es->s_state = cpu_to_le16(sbi->s_mount_state); + spin_unlock(&sbi->s_lock); + ext2_sync_super(sb, sbi->s_es, 1); + + return 0; +} + +static int ext2_unfreeze(struct super_block *sb) +{ + /* Just write sb to clear EXT2_VALID_FS flag */ + ext2_write_super(sb); + + return 0; +} + +void ext2_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + ext2_sync_fs(sb, 1); } static int ext2_remount (struct super_block * sb, int * flags, char * data) @@ -1016,6 +1254,9 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) unsigned long old_sb_flags; int err; + sync_filesystem(sb); + spin_lock(&sbi->s_lock); + /* Store the old options */ old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; @@ -1025,7 +1266,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) /* * Allow the "check" option to be passed as a remount option. */ - if (!parse_options (data, sbi)) { + if (!parse_options(data, sb)) { err = -EINVAL; goto restore_opts; } @@ -1033,31 +1274,57 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); + ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset + EXT2_MOUNT_XIP if not */ + + if ((ext2_use_xip(sb)) && (sb->s_blocksize != PAGE_SIZE)) { + ext2_msg(sb, KERN_WARNING, + "warning: unsupported blocksize for xip"); + err = -EINVAL; + goto restore_opts; + } + es = sbi->s_es; - if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != - (old_mount_opt & EXT2_MOUNT_XIP)) && - invalidate_inodes(sb)) - ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\ - "xip remain in cache (no functional problem)"); - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) + if ((sbi->s_mount_opt ^ old_mount_opt) & EXT2_MOUNT_XIP) { + ext2_msg(sb, KERN_WARNING, "warning: refusing change of " + "xip flag with busy inodes while remounting"); + sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; + sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP; + } + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { + spin_unlock(&sbi->s_lock); return 0; + } if (*flags & MS_RDONLY) { if (le16_to_cpu(es->s_state) & EXT2_VALID_FS || - !(sbi->s_mount_state & EXT2_VALID_FS)) + !(sbi->s_mount_state & EXT2_VALID_FS)) { + spin_unlock(&sbi->s_lock); return 0; + } + /* * OK, we are remounting a valid rw partition rdonly, so set * the rdonly flag and then mark the partition as valid again. */ es->s_state = cpu_to_le16(sbi->s_mount_state); es->s_mtime = cpu_to_le32(get_seconds()); + spin_unlock(&sbi->s_lock); + + err = dquot_suspend(sb, -1); + if (err < 0) { + spin_lock(&sbi->s_lock); + goto restore_opts; + } + + ext2_sync_super(sb, es, 1); } else { __le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP); if (ret) { - printk("EXT2-fs: %s: couldn't remount RDWR because of " - "unsupported optional features (%x).\n", - sb->s_id, le32_to_cpu(ret)); + ext2_msg(sb, KERN_WARNING, + "warning: couldn't remount RDWR because of " + "unsupported optional features (%x).", + le32_to_cpu(ret)); err = -EROFS; goto restore_opts; } @@ -1069,14 +1336,20 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) sbi->s_mount_state = le16_to_cpu(es->s_state); if (!ext2_setup_super (sb, es, 0)) sb->s_flags &= ~MS_RDONLY; + spin_unlock(&sbi->s_lock); + + ext2_write_super(sb); + + dquot_resume(sb, -1); } - ext2_sync_super(sb, es); + return 0; restore_opts: sbi->s_mount_opt = old_opts.s_mount_opt; sbi->s_resuid = old_opts.s_resuid; sbi->s_resgid = old_opts.s_resgid; sb->s_flags = old_sb_flags; + spin_unlock(&sbi->s_lock); return err; } @@ -1084,21 +1357,28 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) { struct super_block *sb = dentry->d_sb; struct ext2_sb_info *sbi = EXT2_SB(sb); - unsigned long overhead; - int i; + struct ext2_super_block *es = sbi->s_es; + u64 fsid; + + spin_lock(&sbi->s_lock); if (test_opt (sb, MINIX_DF)) - overhead = 0; - else { + sbi->s_overhead_last = 0; + else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { + unsigned long i, overhead = 0; + smp_rmb(); + /* - * Compute the overhead (FS structures) + * Compute the overhead (FS structures). This is constant + * for a given filesystem unless the number of block groups + * changes so we cache the previous value until it does. */ /* * All of the blocks before first_data_block are * overhead */ - overhead = le32_to_cpu(sbi->s_es->s_first_data_block); + overhead = le32_to_cpu(es->s_first_data_block); /* * Add the overhead attributed to the superblock and @@ -1115,32 +1395,42 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf) */ overhead += (sbi->s_groups_count * (2 + sbi->s_itb_per_group)); + sbi->s_overhead_last = overhead; + smp_wmb(); + sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); } buf->f_type = EXT2_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; - buf->f_blocks = le32_to_cpu(sbi->s_es->s_blocks_count) - overhead; + buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last; buf->f_bfree = ext2_count_free_blocks(sb); - buf->f_bavail = buf->f_bfree - le32_to_cpu(sbi->s_es->s_r_blocks_count); - if (buf->f_bfree < le32_to_cpu(sbi->s_es->s_r_blocks_count)) + es->s_free_blocks_count = cpu_to_le32(buf->f_bfree); + buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); + if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) buf->f_bavail = 0; - buf->f_files = le32_to_cpu(sbi->s_es->s_inodes_count); - buf->f_ffree = ext2_count_free_inodes (sb); + buf->f_files = le32_to_cpu(es->s_inodes_count); + buf->f_ffree = ext2_count_free_inodes(sb); + es->s_free_inodes_count = cpu_to_le32(buf->f_ffree); buf->f_namelen = EXT2_NAME_LEN; + fsid = le64_to_cpup((void *)es->s_uuid) ^ + le64_to_cpup((void *)es->s_uuid + sizeof(u64)); + buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; + buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; + spin_unlock(&sbi->s_lock); return 0; } -static int ext2_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *ext2_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt); + return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super); } #ifdef CONFIG_QUOTA /* Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and noone else should touch the files) + * itself serializes the operations (and no one else should touch the files) * we don't have to be afraid of races */ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off) @@ -1165,8 +1455,9 @@ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, sb->s_blocksize - offset : toread; tmp_bh.b_state = 0; + tmp_bh.b_size = sb->s_blocksize; err = ext2_get_block(inode, blk, &tmp_bh, 0); - if (err) + if (err < 0) return err; if (!buffer_mapped(&tmp_bh)) /* A hole? */ memset(data, 0, tocopy); @@ -1198,20 +1489,20 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, struct buffer_head tmp_bh; struct buffer_head *bh; - mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? sb->s_blocksize - offset : towrite; tmp_bh.b_state = 0; + tmp_bh.b_size = sb->s_blocksize; err = ext2_get_block(inode, blk, &tmp_bh, 1); - if (err) + if (err < 0) goto out; if (offset || tocopy != EXT2_BLOCK_SIZE(sb)) bh = sb_bread(sb, tmp_bh.b_blocknr); else bh = sb_getblk(sb, tmp_bh.b_blocknr); - if (!bh) { + if (unlikely(!bh)) { err = -EIO; goto out; } @@ -1235,7 +1526,6 @@ out: inode->i_version++; inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); - mutex_unlock(&inode->i_mutex); return len - towrite; } @@ -1244,10 +1534,11 @@ out: static struct file_system_type ext2_fs_type = { .owner = THIS_MODULE, .name = "ext2", - .get_sb = ext2_get_sb, + .mount = ext2_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS_FS("ext2"); static int __init init_ext2_fs(void) { @@ -1275,5 +1566,8 @@ static void __exit exit_ext2_fs(void) exit_ext2_xattr(); } +MODULE_AUTHOR("Remy Card and others"); +MODULE_DESCRIPTION("Second Extended Filesystem"); +MODULE_LICENSE("GPL"); module_init(init_ext2_fs) module_exit(exit_ext2_fs) diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c index 1e67d87cfa9..565cf817bbf 100644 --- a/fs/ext2/symlink.c +++ b/fs/ext2/symlink.c @@ -28,10 +28,11 @@ static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } -struct inode_operations ext2_symlink_inode_operations = { +const struct inode_operations ext2_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, + .setattr = ext2_setattr, #ifdef CONFIG_EXT2_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, @@ -40,9 +41,10 @@ struct inode_operations ext2_symlink_inode_operations = { #endif }; -struct inode_operations ext2_fast_symlink_inode_operations = { +const struct inode_operations ext2_fast_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = ext2_follow_link, + .setattr = ext2_setattr, #ifdef CONFIG_EXT2_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index af52a7f8b29..91426141c33 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -35,7 +35,7 @@ * +------------------+ * * The block header is followed by multiple entry descriptors. These entry - * descriptors are variable in size, and alligned to EXT2_XATTR_PAD + * descriptors are variable in size, and aligned to EXT2_XATTR_PAD * byte boundaries. The entry descriptors are sorted by attribute name, * so that two extended attribute blocks can be compared efficiently. * @@ -54,12 +54,12 @@ */ #include <linux/buffer_head.h> -#include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/mbcache.h> #include <linux/quotaops.h> #include <linux/rwsem.h> +#include <linux/security.h> #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -100,11 +100,11 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *, static struct mb_cache *ext2_xattr_cache; -static struct xattr_handler *ext2_xattr_handler_map[] = { +static const struct xattr_handler *ext2_xattr_handler_map[] = { [EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler, #ifdef CONFIG_EXT2_FS_POSIX_ACL - [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext2_xattr_acl_access_handler, - [EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext2_xattr_acl_default_handler, + [EXT2_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler, + [EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler, #endif [EXT2_XATTR_INDEX_TRUSTED] = &ext2_xattr_trusted_handler, #ifdef CONFIG_EXT2_FS_SECURITY @@ -112,12 +112,12 @@ static struct xattr_handler *ext2_xattr_handler_map[] = { #endif }; -struct xattr_handler *ext2_xattr_handlers[] = { +const struct xattr_handler *ext2_xattr_handlers[] = { &ext2_xattr_user_handler, &ext2_xattr_trusted_handler, #ifdef CONFIG_EXT2_FS_POSIX_ACL - &ext2_xattr_acl_access_handler, - &ext2_xattr_acl_default_handler, + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, #endif #ifdef CONFIG_EXT2_FS_SECURITY &ext2_xattr_security_handler, @@ -125,10 +125,10 @@ struct xattr_handler *ext2_xattr_handlers[] = { NULL }; -static inline struct xattr_handler * +static inline const struct xattr_handler * ext2_xattr_handler(int name_index) { - struct xattr_handler *handler = NULL; + const struct xattr_handler *handler = NULL; if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map)) handler = ext2_xattr_handler_map[name_index]; @@ -160,6 +160,10 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, if (name == NULL) return -EINVAL; + name_len = strlen(name); + if (name_len > 255) + return -ERANGE; + down_read(&EXT2_I(inode)->xattr_sem); error = -ENODATA; if (!EXT2_I(inode)->i_file_acl) @@ -180,12 +184,8 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", error = -EIO; goto cleanup; } - /* find named attribute */ - name_len = strlen(name); - error = -ERANGE; - if (name_len > 255) - goto cleanup; + /* find named attribute */ entry = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(entry)) { struct ext2_xattr_entry *next = @@ -198,14 +198,6 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", goto found; entry = next; } - /* Check the remaining name entries */ - while (!IS_LAST_ENTRY(entry)) { - struct ext2_xattr_entry *next = - EXT2_XATTR_NEXT(entry); - if ((char *)next >= end) - goto bad_block; - entry = next; - } if (ext2_xattr_cache_insert(bh)) ea_idebug(inode, "cache insert failed"); error = -ENODATA; @@ -249,8 +241,9 @@ cleanup: * used / required on success. */ static int -ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) +ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) { + struct inode *inode = dentry->d_inode; struct buffer_head *bh = NULL; struct ext2_xattr_entry *entry; char *end; @@ -296,13 +289,14 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", /* list the attribute names */ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); entry = EXT2_XATTR_NEXT(entry)) { - struct xattr_handler *handler = + const struct xattr_handler *handler = ext2_xattr_handler(entry->e_name_index); if (handler) { - size_t size = handler->list(inode, buffer, rest, + size_t size = handler->list(dentry, buffer, rest, entry->e_name, - entry->e_name_len); + entry->e_name_len, + handler->flags); if (buffer) { if (size > rest) { error = -ERANGE; @@ -330,7 +324,7 @@ cleanup: ssize_t ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) { - return ext2_xattr_list(dentry->d_inode, buffer, size); + return ext2_xattr_list(dentry, buffer, size); } /* @@ -342,18 +336,16 @@ static void ext2_xattr_update_super_block(struct super_block *sb) if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) return; - lock_super(sb); - EXT2_SB(sb)->s_es->s_feature_compat |= - cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR); - sb->s_dirt = 1; + spin_lock(&EXT2_SB(sb)->s_lock); + EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR); + spin_unlock(&EXT2_SB(sb)->s_lock); mark_buffer_dirty(EXT2_SB(sb)->s_sbh); - unlock_super(sb); } /* * ext2_xattr_set() * - * Create, replace or remove an extended attribute for this inode. Buffer + * Create, replace or remove an extended attribute for this inode. Value * is NULL to remove an existing extended attribute, and non-NULL to * either replace an existing extended attribute, or create a new extended * attribute. The flags XATTR_REPLACE and XATTR_CREATE @@ -644,13 +636,12 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, the inode. */ ea_bdebug(new_bh, "reusing block"); - error = -EDQUOT; - if (DQUOT_ALLOC_BLOCK(inode, 1)) { + error = dquot_alloc_block(inode, 1); + if (error) { unlock_buffer(new_bh); goto cleanup; } - HDR(new_bh)->h_refcount = cpu_to_le32(1 + - le32_to_cpu(HDR(new_bh)->h_refcount)); + le32_add_cpu(&HDR(new_bh)->h_refcount, 1); ea_bdebug(new_bh, "refcount now=%d", le32_to_cpu(HDR(new_bh)->h_refcount)); } @@ -663,20 +654,18 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, ext2_xattr_cache_insert(new_bh); } else { /* We need to allocate a new block */ - int goal = le32_to_cpu(EXT2_SB(sb)->s_es-> - s_first_data_block) + - EXT2_I(inode)->i_block_group * - EXT2_BLOCKS_PER_GROUP(sb); - int block = ext2_new_block(inode, goal, - NULL, NULL, &error); + ext2_fsblk_t goal = ext2_group_first_block_no(sb, + EXT2_I(inode)->i_block_group); + int block = ext2_new_block(inode, goal, &error); if (error) goto cleanup; ea_idebug(inode, "creating block %d", block); new_bh = sb_getblk(sb, block); - if (!new_bh) { + if (unlikely(!new_bh)) { ext2_free_blocks(inode, block, 1); - error = -EIO; + mark_inode_dirty(inode); + error = -ENOMEM; goto cleanup; } lock_buffer(new_bh); @@ -700,13 +689,15 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; inode->i_ctime = CURRENT_TIME_SEC; if (IS_SYNC(inode)) { - error = ext2_sync_inode (inode); + error = sync_inode_metadata(inode, 1); /* In case sync failed due to ENOSPC the inode was actually * written (only some dirty data were not) so we just proceed * as if nothing happened and cleanup the unused block */ if (error && error != -ENOSPC) { - if (new_bh && new_bh != old_bh) - DQUOT_FREE_BLOCK(inode, 1); + if (new_bh && new_bh != old_bh) { + dquot_free_block_nodirty(inode, 1); + mark_inode_dirty(inode); + } goto cleanup; } } else @@ -729,17 +720,18 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, mb_cache_entry_free(ce); ea_bdebug(old_bh, "freeing"); ext2_free_blocks(inode, old_bh->b_blocknr, 1); + mark_inode_dirty(inode); /* We let our caller release old_bh, so we * need to duplicate the buffer before. */ get_bh(old_bh); bforget(old_bh); } else { /* Decrement the refcount only. */ - HDR(old_bh)->h_refcount = cpu_to_le32( - le32_to_cpu(HDR(old_bh)->h_refcount) - 1); + le32_add_cpu(&HDR(old_bh)->h_refcount, -1); if (ce) mb_cache_entry_release(ce); - DQUOT_FREE_BLOCK(inode, 1); + dquot_free_block_nodirty(inode, 1); + mark_inode_dirty(inode); mark_buffer_dirty(old_bh); ea_bdebug(old_bh, "refcount now=%d", le32_to_cpu(HDR(old_bh)->h_refcount)); @@ -793,8 +785,7 @@ ext2_xattr_delete_inode(struct inode *inode) bforget(bh); unlock_buffer(bh); } else { - HDR(bh)->h_refcount = cpu_to_le32( - le32_to_cpu(HDR(bh)->h_refcount) - 1); + le32_add_cpu(&HDR(bh)->h_refcount, -1); if (ce) mb_cache_entry_release(ce); ea_bdebug(bh, "refcount now=%d", @@ -803,7 +794,7 @@ ext2_xattr_delete_inode(struct inode *inode) mark_buffer_dirty(bh); if (IS_SYNC(inode)) sync_dirty_buffer(bh); - DQUOT_FREE_BLOCK(inode, 1); + dquot_free_block_nodirty(inode, 1); } EXT2_I(inode)->i_file_acl = 0; @@ -839,10 +830,10 @@ ext2_xattr_cache_insert(struct buffer_head *bh) struct mb_cache_entry *ce; int error; - ce = mb_cache_entry_alloc(ext2_xattr_cache); + ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS); if (!ce) return -ENOMEM; - error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash); + error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); if (error) { mb_cache_entry_free(ce); if (error == -EBUSY) { @@ -916,8 +907,8 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) return NULL; /* never share */ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); again: - ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, - inode->i_sb->s_bdev, hash); + ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev, + hash); while (ce) { struct buffer_head *bh; @@ -949,7 +940,7 @@ again: unlock_buffer(bh); brelse(bh); } - ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash); + ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); } return NULL; } @@ -1025,9 +1016,7 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header, int __init init_ext2_xattr(void) { - ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, - sizeof(struct mb_cache_entry) + - sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6); + ext2_xattr_cache = mb_cache_create("ext2_xattr", 6); if (!ext2_xattr_cache) return -ENOMEM; return 0; diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h index bf8175b2ced..60edf298644 100644 --- a/fs/ext2/xattr.h +++ b/fs/ext2/xattr.h @@ -55,11 +55,9 @@ struct ext2_xattr_entry { # ifdef CONFIG_EXT2_FS_XATTR -extern struct xattr_handler ext2_xattr_user_handler; -extern struct xattr_handler ext2_xattr_trusted_handler; -extern struct xattr_handler ext2_xattr_acl_access_handler; -extern struct xattr_handler ext2_xattr_acl_default_handler; -extern struct xattr_handler ext2_xattr_security_handler; +extern const struct xattr_handler ext2_xattr_user_handler; +extern const struct xattr_handler ext2_xattr_trusted_handler; +extern const struct xattr_handler ext2_xattr_security_handler; extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); @@ -72,7 +70,7 @@ extern void ext2_xattr_put_super(struct super_block *); extern int init_ext2_xattr(void); extern void exit_ext2_xattr(void); -extern struct xattr_handler *ext2_xattr_handlers[]; +extern const struct xattr_handler *ext2_xattr_handlers[]; # else /* CONFIG_EXT2_FS_XATTR */ @@ -116,9 +114,11 @@ exit_ext2_xattr(void) # endif /* CONFIG_EXT2_FS_XATTR */ #ifdef CONFIG_EXT2_FS_SECURITY -extern int ext2_init_security(struct inode *inode, struct inode *dir); +extern int ext2_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr); #else -static inline int ext2_init_security(struct inode *inode, struct inode *dir) +static inline int ext2_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr) { return 0; } diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index a2661279847..c0ebc4db884 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -3,19 +3,15 @@ * Handler for storing security labels as extended attributes. */ -#include <linux/module.h> -#include <linux/string.h> -#include <linux/fs.h> -#include <linux/smp_lock.h> -#include <linux/ext2_fs.h> +#include "ext2.h" #include <linux/security.h> #include "xattr.h" static size_t -ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, - const char *name, size_t name_len) +ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len, int type) { - const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; + const int prefix_len = XATTR_SECURITY_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (list && total_len <= list_size) { @@ -27,47 +23,50 @@ ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, } static int -ext2_xattr_security_get(struct inode *inode, const char *name, - void *buffer, size_t size) +ext2_xattr_security_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) { if (strcmp(name, "") == 0) return -EINVAL; - return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name, + return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name, buffer, size); } static int -ext2_xattr_security_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) +ext2_xattr_security_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) { if (strcmp(name, "") == 0) return -EINVAL; - return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name, + return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name, value, size, flags); } -int -ext2_init_security(struct inode *inode, struct inode *dir) +static int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *fs_info) { - int err; - size_t len; - void *value; - char *name; + const struct xattr *xattr; + int err = 0; - err = security_inode_init_security(inode, dir, &name, &value, &len); - if (err) { - if (err == -EOPNOTSUPP) - return 0; - return err; + for (xattr = xattr_array; xattr->name != NULL; xattr++) { + err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, + xattr->name, xattr->value, + xattr->value_len, 0); + if (err < 0) + break; } - err = ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, - name, value, len, 0); - kfree(name); - kfree(value); return err; } -struct xattr_handler ext2_xattr_security_handler = { +int +ext2_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr) +{ + return security_inode_init_security(inode, dir, qstr, + &ext2_initxattrs, NULL); +} + +const struct xattr_handler ext2_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, .list = ext2_xattr_security_list, .get = ext2_xattr_security_get, diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index f28a6a499c9..7e192574c00 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -5,21 +5,14 @@ * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org> */ -#include <linux/module.h> -#include <linux/string.h> -#include <linux/capability.h> -#include <linux/fs.h> -#include <linux/smp_lock.h> -#include <linux/ext2_fs.h> +#include "ext2.h" #include "xattr.h" -#define XATTR_TRUSTED_PREFIX "trusted." - static size_t -ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, - const char *name, size_t name_len) +ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len, int type) { - const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; + const int prefix_len = XATTR_TRUSTED_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; if (!capable(CAP_SYS_ADMIN)) @@ -34,26 +27,26 @@ ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, } static int -ext2_xattr_trusted_get(struct inode *inode, const char *name, - void *buffer, size_t size) +ext2_xattr_trusted_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) { if (strcmp(name, "") == 0) return -EINVAL; - return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name, + return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name, buffer, size); } static int -ext2_xattr_trusted_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) +ext2_xattr_trusted_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) { if (strcmp(name, "") == 0) return -EINVAL; - return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name, + return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name, value, size, flags); } -struct xattr_handler ext2_xattr_trusted_handler = { +const struct xattr_handler ext2_xattr_trusted_handler = { .prefix = XATTR_TRUSTED_PREFIX, .list = ext2_xattr_trusted_list, .get = ext2_xattr_trusted_get, diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index f383e7c3a7b..f470e44c4b8 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -6,21 +6,18 @@ */ #include <linux/init.h> -#include <linux/module.h> #include <linux/string.h> #include "ext2.h" #include "xattr.h" -#define XATTR_USER_PREFIX "user." - static size_t -ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, - const char *name, size_t name_len) +ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, + const char *name, size_t name_len, int type) { - const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; + const size_t prefix_len = XATTR_USER_PREFIX_LEN; const size_t total_len = prefix_len + name_len + 1; - if (!test_opt(inode->i_sb, XATTR_USER)) + if (!test_opt(dentry->d_sb, XATTR_USER)) return 0; if (list && total_len <= list_size) { @@ -32,30 +29,31 @@ ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, } static int -ext2_xattr_user_get(struct inode *inode, const char *name, - void *buffer, size_t size) +ext2_xattr_user_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) { if (strcmp(name, "") == 0) return -EINVAL; - if (!test_opt(inode->i_sb, XATTR_USER)) + if (!test_opt(dentry->d_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size); + return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_USER, + name, buffer, size); } static int -ext2_xattr_user_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) +ext2_xattr_user_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) { if (strcmp(name, "") == 0) return -EINVAL; - if (!test_opt(inode->i_sb, XATTR_USER)) + if (!test_opt(dentry->d_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, - value, size, flags); + return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_USER, + name, value, size, flags); } -struct xattr_handler ext2_xattr_user_handler = { +const struct xattr_handler ext2_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, .list = ext2_xattr_user_list, .get = ext2_xattr_user_get, diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c index ca7f0031238..e98171a11cf 100644 --- a/fs/ext2/xip.c +++ b/fs/ext2/xip.c @@ -9,30 +9,34 @@ #include <linux/fs.h> #include <linux/genhd.h> #include <linux/buffer_head.h> -#include <linux/ext2_fs_sb.h> -#include <linux/ext2_fs.h> +#include <linux/blkdev.h> #include "ext2.h" #include "xip.h" static inline int -__inode_direct_access(struct inode *inode, sector_t sector, - unsigned long *data) +__inode_direct_access(struct inode *inode, sector_t block, + void **kaddr, unsigned long *pfn) { - BUG_ON(!inode->i_sb->s_bdev->bd_disk->fops->direct_access); - return inode->i_sb->s_bdev->bd_disk->fops - ->direct_access(inode->i_sb->s_bdev,sector,data); + struct block_device *bdev = inode->i_sb->s_bdev; + const struct block_device_operations *ops = bdev->bd_disk->fops; + sector_t sector; + + sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */ + + BUG_ON(!ops->direct_access); + return ops->direct_access(bdev, sector, kaddr, pfn); } static inline int -__ext2_get_sector(struct inode *inode, sector_t offset, int create, +__ext2_get_block(struct inode *inode, pgoff_t pgoff, int create, sector_t *result) { struct buffer_head tmp; int rc; memset(&tmp, 0, sizeof(struct buffer_head)); - rc = ext2_get_block(inode, offset/ (PAGE_SIZE/512), &tmp, - create); + tmp.b_size = 1 << inode->i_blkbits; + rc = ext2_get_block(inode, pgoff, &tmp, create); *result = tmp.b_blocknr; /* did we get a sparse block (hole in the file)? */ @@ -45,15 +49,15 @@ __ext2_get_sector(struct inode *inode, sector_t offset, int create, } int -ext2_clear_xip_target(struct inode *inode, int block) +ext2_clear_xip_target(struct inode *inode, sector_t block) { - sector_t sector = block * (PAGE_SIZE/512); - unsigned long data; + void *kaddr; + unsigned long pfn; int rc; - rc = __inode_direct_access(inode, sector, &data); + rc = __inode_direct_access(inode, block, &kaddr, &pfn); if (!rc) - clear_page((void*)data); + clear_page(kaddr); return rc; } @@ -64,30 +68,24 @@ void ext2_xip_verify_sb(struct super_block *sb) if ((sbi->s_mount_opt & EXT2_MOUNT_XIP) && !sb->s_bdev->bd_disk->fops->direct_access) { sbi->s_mount_opt &= (~EXT2_MOUNT_XIP); - ext2_warning(sb, __FUNCTION__, - "ignoring xip option - not supported by bdev"); + ext2_msg(sb, KERN_WARNING, + "warning: ignoring xip option - " + "not supported by bdev"); } } -struct page * -ext2_get_xip_page(struct address_space *mapping, sector_t offset, - int create) +int ext2_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, int create, + void **kmem, unsigned long *pfn) { int rc; - unsigned long data; - sector_t sector; + sector_t block; /* first, retrieve the sector number */ - rc = __ext2_get_sector(mapping->host, offset, create, §or); + rc = __ext2_get_block(mapping->host, pgoff, create, &block); if (rc) - goto error; + return rc; /* retrieve address of the target data */ - rc = __inode_direct_access - (mapping->host, sector * (PAGE_SIZE/512), &data); - if (!rc) - return virt_to_page(data); - - error: - return ERR_PTR(rc); + rc = __inode_direct_access(mapping->host, block, kmem, pfn); + return rc; } diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h index aa85331d6c5..18b34d2f31b 100644 --- a/fs/ext2/xip.h +++ b/fs/ext2/xip.h @@ -7,19 +7,20 @@ #ifdef CONFIG_EXT2_FS_XIP extern void ext2_xip_verify_sb (struct super_block *); -extern int ext2_clear_xip_target (struct inode *, int); +extern int ext2_clear_xip_target (struct inode *, sector_t); static inline int ext2_use_xip (struct super_block *sb) { struct ext2_sb_info *sbi = EXT2_SB(sb); return (sbi->s_mount_opt & EXT2_MOUNT_XIP); } -struct page* ext2_get_xip_page (struct address_space *, sector_t, int); -#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_page) +int ext2_get_xip_mem(struct address_space *, pgoff_t, int, + void **, unsigned long *); +#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_mem) #else #define mapping_is_xip(map) 0 #define ext2_xip_verify_sb(sb) do { } while (0) #define ext2_use_xip(sb) 0 #define ext2_clear_xip_target(inode, chain) 0 -#define ext2_get_xip_page NULL +#define ext2_get_xip_mem NULL #endif |
