diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/Makefile | 12 | ||||
-rw-r--r-- | fs/ext4/acl.c | 551 | ||||
-rw-r--r-- | fs/ext4/acl.h | 81 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 1818 | ||||
-rw-r--r-- | fs/ext4/bitmap.c | 32 | ||||
-rw-r--r-- | fs/ext4/dir.c | 518 | ||||
-rw-r--r-- | fs/ext4/file.c | 139 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 88 | ||||
-rw-r--r-- | fs/ext4/hash.c | 152 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 758 | ||||
-rw-r--r-- | fs/ext4/inode.c | 3219 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 307 | ||||
-rw-r--r-- | fs/ext4/namei.c | 2397 | ||||
-rw-r--r-- | fs/ext4/namei.h | 8 | ||||
-rw-r--r-- | fs/ext4/resize.c | 1042 | ||||
-rw-r--r-- | fs/ext4/super.c | 2754 | ||||
-rw-r--r-- | fs/ext4/symlink.c | 54 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 1317 | ||||
-rw-r--r-- | fs/ext4/xattr.h | 145 | ||||
-rw-r--r-- | fs/ext4/xattr_security.c | 77 | ||||
-rw-r--r-- | fs/ext4/xattr_trusted.c | 62 | ||||
-rw-r--r-- | fs/ext4/xattr_user.c | 64 |
22 files changed, 15595 insertions, 0 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile new file mode 100644 index 00000000000..704cd44a40c --- /dev/null +++ b/fs/ext4/Makefile @@ -0,0 +1,12 @@ +# +# Makefile for the linux ext3-filesystem routines. +# + +obj-$(CONFIG_EXT3_FS) += ext3.o + +ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ + ioctl.o namei.o super.o symlink.o hash.o resize.o + +ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o +ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o +ext3-$(CONFIG_EXT3_FS_SECURITY) += xattr_security.o diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c new file mode 100644 index 00000000000..1e5038d9a01 --- /dev/null +++ b/fs/ext4/acl.c @@ -0,0 +1,551 @@ +/* + * linux/fs/ext3/acl.c + * + * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> + */ + +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/capability.h> +#include <linux/fs.h> +#include <linux/ext3_jbd.h> +#include <linux/ext3_fs.h> +#include "xattr.h" +#include "acl.h" + +/* + * Convert from filesystem to in-memory representation. + */ +static struct posix_acl * +ext3_acl_from_disk(const void *value, size_t size) +{ + const char *end = (char *)value + size; + int n, count; + struct posix_acl *acl; + + if (!value) + return NULL; + if (size < sizeof(ext3_acl_header)) + return ERR_PTR(-EINVAL); + if (((ext3_acl_header *)value)->a_version != + cpu_to_le32(EXT3_ACL_VERSION)) + return ERR_PTR(-EINVAL); + value = (char *)value + sizeof(ext3_acl_header); + count = ext3_acl_count(size); + if (count < 0) + return ERR_PTR(-EINVAL); + if (count == 0) + return NULL; + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + for (n=0; n < count; n++) { + ext3_acl_entry *entry = + (ext3_acl_entry *)value; + if ((char *)value + sizeof(ext3_acl_entry_short) > end) + goto fail; + acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); + acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); + switch(acl->a_entries[n].e_tag) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + value = (char *)value + + sizeof(ext3_acl_entry_short); + acl->a_entries[n].e_id = ACL_UNDEFINED_ID; + break; + + case ACL_USER: + case ACL_GROUP: + value = (char *)value + sizeof(ext3_acl_entry); + if ((char *)value > end) + goto fail; + acl->a_entries[n].e_id = + le32_to_cpu(entry->e_id); + break; + + default: + goto fail; + } + } + if (value != end) + goto fail; + return acl; + +fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +/* + * Convert from in-memory to filesystem representation. + */ +static void * +ext3_acl_to_disk(const struct posix_acl *acl, size_t *size) +{ + ext3_acl_header *ext_acl; + char *e; + size_t n; + + *size = ext3_acl_size(acl->a_count); + ext_acl = kmalloc(sizeof(ext3_acl_header) + acl->a_count * + sizeof(ext3_acl_entry), GFP_KERNEL); + if (!ext_acl) + return ERR_PTR(-ENOMEM); + ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION); + e = (char *)ext_acl + sizeof(ext3_acl_header); + for (n=0; n < acl->a_count; n++) { + ext3_acl_entry *entry = (ext3_acl_entry *)e; + entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); + entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); + switch(acl->a_entries[n].e_tag) { + case ACL_USER: + case ACL_GROUP: + entry->e_id = + cpu_to_le32(acl->a_entries[n].e_id); + e += sizeof(ext3_acl_entry); + break; + + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + e += sizeof(ext3_acl_entry_short); + break; + + default: + goto fail; + } + } + return (char *)ext_acl; + +fail: + kfree(ext_acl); + return ERR_PTR(-EINVAL); +} + +static inline struct posix_acl * +ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl) +{ + struct posix_acl *acl = EXT3_ACL_NOT_CACHED; + + spin_lock(&inode->i_lock); + if (*i_acl != EXT3_ACL_NOT_CACHED) + acl = posix_acl_dup(*i_acl); + spin_unlock(&inode->i_lock); + + return acl; +} + +static inline void +ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl, + struct posix_acl *acl) +{ + spin_lock(&inode->i_lock); + if (*i_acl != EXT3_ACL_NOT_CACHED) + posix_acl_release(*i_acl); + *i_acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); +} + +/* + * Inode operation get_posix_acl(). + * + * inode->i_mutex: don't care + */ +static struct posix_acl * +ext3_get_acl(struct inode *inode, int type) +{ + struct ext3_inode_info *ei = EXT3_I(inode); + int name_index; + char *value = NULL; + struct posix_acl *acl; + int retval; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return NULL; + + switch(type) { + case ACL_TYPE_ACCESS: + acl = ext3_iget_acl(inode, &ei->i_acl); + if (acl != EXT3_ACL_NOT_CACHED) + return acl; + name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + + case ACL_TYPE_DEFAULT: + acl = ext3_iget_acl(inode, &ei->i_default_acl); + if (acl != EXT3_ACL_NOT_CACHED) + return acl; + name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + + default: + return ERR_PTR(-EINVAL); + } + retval = ext3_xattr_get(inode, name_index, "", NULL, 0); + if (retval > 0) { + value = kmalloc(retval, GFP_KERNEL); + if (!value) + return ERR_PTR(-ENOMEM); + retval = ext3_xattr_get(inode, name_index, "", value, retval); + } + if (retval > 0) + acl = ext3_acl_from_disk(value, retval); + else if (retval == -ENODATA || retval == -ENOSYS) + acl = NULL; + else + acl = ERR_PTR(retval); + kfree(value); + + if (!IS_ERR(acl)) { + switch(type) { + case ACL_TYPE_ACCESS: + ext3_iset_acl(inode, &ei->i_acl, acl); + break; + + case ACL_TYPE_DEFAULT: + ext3_iset_acl(inode, &ei->i_default_acl, acl); + break; + } + } + return acl; +} + +/* + * Set the access or default ACL of an inode. + * + * inode->i_mutex: down unless called from ext3_new_inode + */ +static int +ext3_set_acl(handle_t *handle, struct inode *inode, int type, + struct posix_acl *acl) +{ + struct ext3_inode_info *ei = EXT3_I(inode); + int name_index; + void *value = NULL; + size_t size = 0; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch(type) { + case ACL_TYPE_ACCESS: + name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; + if (acl) { + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + if (error < 0) + return error; + else { + inode->i_mode = mode; + ext3_mark_inode_dirty(handle, inode); + if (error == 0) + acl = NULL; + } + } + break; + + case ACL_TYPE_DEFAULT: + name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; + + default: + return -EINVAL; + } + if (acl) { + value = ext3_acl_to_disk(acl, &size); + if (IS_ERR(value)) + return (int)PTR_ERR(value); + } + + error = ext3_xattr_set_handle(handle, inode, name_index, "", + value, size, 0); + + kfree(value); + if (!error) { + switch(type) { + case ACL_TYPE_ACCESS: + ext3_iset_acl(inode, &ei->i_acl, acl); + break; + + case ACL_TYPE_DEFAULT: + ext3_iset_acl(inode, &ei->i_default_acl, acl); + break; + } + } + return error; +} + +static int +ext3_check_acl(struct inode *inode, int mask) +{ + struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); + + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + int error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return error; + } + + return -EAGAIN; +} + +int +ext3_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + return generic_permission(inode, mask, ext3_check_acl); +} + +/* + * Initialize the ACLs of a new inode. Called from ext3_new_inode. + * + * dir->i_mutex: down + * inode->i_mutex: up (access to inode is still exclusive) + */ +int +ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) +{ + struct posix_acl *acl = NULL; + int error = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (test_opt(dir->i_sb, POSIX_ACL)) { + acl = ext3_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + if (!acl) + inode->i_mode &= ~current->fs->umask; + } + if (test_opt(inode->i_sb, POSIX_ACL) && acl) { + struct posix_acl *clone; + mode_t mode; + + if (S_ISDIR(inode->i_mode)) { + error = ext3_set_acl(handle, inode, + ACL_TYPE_DEFAULT, acl); + if (error) + goto cleanup; + } + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto cleanup; + + mode = inode->i_mode; + error = posix_acl_create_masq(clone, &mode); + if (error >= 0) { + inode->i_mode = mode; + if (error > 0) { + /* This is an extended ACL */ + error = ext3_set_acl(handle, inode, + ACL_TYPE_ACCESS, clone); + } + } + posix_acl_release(clone); + } +cleanup: + posix_acl_release(acl); + return error; +} + +/* + * Does chmod for an inode that may have an Access Control List. The + * inode->i_mode field must be updated to the desired value by the caller + * before calling this function. + * Returns 0 on success, or a negative error number. + * + * We change the ACL rather than storing some ACL entries in the file + * mode permission bits (which would be more efficient), because that + * would break once additional permissions (like ACL_APPEND, ACL_DELETE + * for directories) are added. There are no more bits available in the + * file mode. + * + * inode->i_mutex: down + */ +int +ext3_acl_chmod(struct inode *inode) +{ + struct posix_acl *acl, *clone; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + clone = posix_acl_clone(acl, GFP_KERNEL); + posix_acl_release(acl); + if (!clone) + return -ENOMEM; + error = posix_acl_chmod_masq(clone, inode->i_mode); + if (!error) { + handle_t *handle; + int retries = 0; + + retry: + handle = ext3_journal_start(inode, + EXT3_DATA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + ext3_std_error(inode->i_sb, error); + goto out; + } + error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, clone); + ext3_journal_stop(handle); + if (error == -ENOSPC && + ext3_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + } +out: + posix_acl_release(clone); + return error; +} + +/* + * Extended attribute handlers + */ +static size_t +ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, + const char *name, size_t name_len) +{ + const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS); + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + if (list && size <= list_len) + memcpy(list, POSIX_ACL_XATTR_ACCESS, size); + return size; +} + +static size_t +ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, + const char *name, size_t name_len) +{ + const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT); + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return 0; + if (list && size <= list_len) + memcpy(list, POSIX_ACL_XATTR_DEFAULT, size); + return size; +} + +static int +ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size) +{ + struct posix_acl *acl; + int error; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return -EOPNOTSUPP; + + acl = ext3_get_acl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return error; +} + +static int +ext3_xattr_get_acl_access(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); +} + +static int +ext3_xattr_get_acl_default(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); +} + +static int +ext3_xattr_set_acl(struct inode *inode, int type, const void *value, + size_t size) +{ + handle_t *handle; + struct posix_acl *acl; + int error, retries = 0; + + if (!test_opt(inode->i_sb, POSIX_ACL)) + return -EOPNOTSUPP; + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + + if (value) { + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + error = posix_acl_valid(acl); + if (error) + goto release_and_out; + } + } else + acl = NULL; + +retry: + handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + error = ext3_set_acl(handle, inode, type, acl); + ext3_journal_stop(handle); + if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + +release_and_out: + posix_acl_release(acl); + return error; +} + +static int +ext3_xattr_set_acl_access(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); +} + +static int +ext3_xattr_set_acl_default(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); +} + +struct xattr_handler ext3_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .list = ext3_xattr_list_acl_access, + .get = ext3_xattr_get_acl_access, + .set = ext3_xattr_set_acl_access, +}; + +struct xattr_handler ext3_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .list = ext3_xattr_list_acl_default, + .get = ext3_xattr_get_acl_default, + .set = ext3_xattr_set_acl_default, +}; diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h new file mode 100644 index 00000000000..0d1e6279cbf --- /dev/null +++ b/fs/ext4/acl.h @@ -0,0 +1,81 @@ +/* + File: fs/ext3/acl.h + + (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> +*/ + +#include <linux/posix_acl_xattr.h> + +#define EXT3_ACL_VERSION 0x0001 + +typedef struct { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +} ext3_acl_entry; + +typedef struct { + __le16 e_tag; + __le16 e_perm; +} ext3_acl_entry_short; + +typedef struct { + __le32 a_version; +} ext3_acl_header; + +static inline size_t ext3_acl_size(int count) +{ + if (count <= 4) { + return sizeof(ext3_acl_header) + + count * sizeof(ext3_acl_entry_short); + } else { + return sizeof(ext3_acl_header) + + 4 * sizeof(ext3_acl_entry_short) + + (count - 4) * sizeof(ext3_acl_entry); + } +} + +static inline int ext3_acl_count(size_t size) +{ + ssize_t s; + size -= sizeof(ext3_acl_header); + s = size - 4 * sizeof(ext3_acl_entry_short); + if (s < 0) { + if (size % sizeof(ext3_acl_entry_short)) + return -1; + return size / sizeof(ext3_acl_entry_short); + } else { + if (s % sizeof(ext3_acl_entry)) + return -1; + return s / sizeof(ext3_acl_entry) + 4; + } +} + +#ifdef CONFIG_EXT3_FS_POSIX_ACL + +/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl + if the ACL has not been cached */ +#define EXT3_ACL_NOT_CACHED ((void *)-1) + +/* acl.c */ +extern int ext3_permission (struct inode *, int, struct nameidata *); +extern int ext3_acl_chmod (struct inode *); +extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); + +#else /* CONFIG_EXT3_FS_POSIX_ACL */ +#include <linux/sched.h> +#define ext3_permission NULL + +static inline int +ext3_acl_chmod(struct inode *inode) +{ + return 0; +} + +static inline int +ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) +{ + return 0; +} +#endif /* CONFIG_EXT3_FS_POSIX_ACL */ + diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c new file mode 100644 index 00000000000..b41a7d7e20f --- /dev/null +++ b/fs/ext4/balloc.c @@ -0,0 +1,1818 @@ +/* + * linux/fs/ext3/balloc.c + * + * Copyright (C) 1992, 1993, 1994, 1995 + * Remy Card (card@masi.ibp.fr) + * Laboratoire MASI - Institut Blaise Pascal + * Universite Pierre et Marie Curie (Paris VI) + * + * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 + * Big-endian to little-endian byte-swapping/bitmaps by + * David S. Miller (davem@caip.rutgers.edu), 1995 + */ + +#include <linux/time.h> +#include <linux/capability.h> +#include <linux/fs.h> +#include <linux/jbd.h> +#include <linux/ext3_fs.h> +#include <linux/ext3_jbd.h> +#include <linux/quotaops.h> +#include <linux/buffer_head.h> + +/* + * balloc.c contains the blocks allocation and deallocation routines + */ + +/* + * The free blocks are managed by bitmaps. A file system contains several + * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap + * block for inodes, N blocks for the inode table and data blocks. + * + * The file system contains group descriptors which are located after the + * super block. Each descriptor contains the number of the bitmap block and + * the free blocks count in the block. The descriptors are loaded in memory + * when a file system is mounted (see ext3_read_super). + */ + + +#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) + +/** + * ext3_get_group_desc() -- load group descriptor from disk + * @sb: super block + * @block_group: given block group + * @bh: pointer to the buffer head to store the block + * group descriptor + */ +struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, + unsigned int block_group, + struct buffer_head ** bh) +{ + unsigned long group_desc; + unsigned long offset; + struct ext3_group_desc * desc; + struct ext3_sb_info *sbi = EXT3_SB(sb); + + if (block_group >= sbi->s_groups_count) { + ext3_error (sb, "ext3_get_group_desc", + "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sbi->s_groups_count); + + return NULL; + } + smp_rmb(); + + group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb); + offset = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1); + if (!sbi->s_group_desc[group_desc]) { + ext3_error (sb, "ext3_get_group_desc", + "Group descriptor not loaded - " + "block_group = %d, group_desc = %lu, desc = %lu", + block_group, group_desc, offset); + return NULL; + } + + desc = (struct ext3_group_desc *) sbi->s_group_desc[group_desc]->b_data; + if (bh) + *bh = sbi->s_group_desc[group_desc]; + return desc + offset; +} + +/** + * read_block_bitmap() + * @sb: super block + * @block_group: given block group + * + * Read the bitmap for a given block_group, reading into the specified + * slot in the superblock's bitmap cache. + * + * Return buffer_head on success or NULL in case of failure. + */ +static struct buffer_head * +read_block_bitmap(struct super_block *sb, unsigned int block_group) +{ + struct ext3_group_desc * desc; + struct buffer_head * bh = NULL; + + desc = ext3_get_group_desc (sb, block_group, NULL); + if (!desc) + goto error_out; + bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); + if (!bh) + ext3_error (sb, "read_block_bitmap", + "Cannot read block bitmap - " + "block_group = %d, block_bitmap = %u", + block_group, le32_to_cpu(desc->bg_block_bitmap)); +error_out: + return bh; +} +/* + * The reservation window structure operations + * -------------------------------------------- + * Operations include: + * dump, find, add, remove, is_empty, find_next_reservable_window, etc. + * + * We use a red-black tree to represent per-filesystem reservation + * windows. + * + */ + +/** + * __rsv_window_dump() -- Dump the filesystem block allocation reservation map + * @rb_root: root of per-filesystem reservation rb tree + * @verbose: verbose mode + * @fn: function which wishes to dump the reservation map + * + * If verbose is turned on, it will print the whole block reservation + * windows(start, end). Otherwise, it will only print out the "bad" windows, + * those windows that overlap with their immediate neighbors. + */ +#if 1 +static void __rsv_window_dump(struct rb_root *root, int verbose, + const char *fn) +{ + struct rb_node *n; + struct ext3_reserve_window_node *rsv, *prev; + int bad; + +restart: + n = rb_first(root); + bad = 0; + prev = NULL; + + printk("Block Allocation Reservation Windows Map (%s):\n", fn); + while (n) { + rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node); + if (verbose) + printk("reservation window 0x%p " + "start: %lu, end: %lu\n", + rsv, rsv->rsv_start, rsv->rsv_end); + if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { + printk("Bad reservation %p (start >= end)\n", + rsv); + bad = 1; + } + if (prev && prev->rsv_end >= rsv->rsv_start) { + printk("Bad reservation %p (prev->end >= start)\n", + rsv); + bad = 1; + } + if (bad) { + if (!verbose) { + printk("Restarting reservation walk in verbose mode\n"); + verbose = 1; + goto restart; + } + } + n = rb_next(n); + prev = rsv; + } + printk("Window map complete.\n"); + if (bad) + BUG(); +} +#define rsv_window_dump(root, verbose) \ + __rsv_window_dump((root), (verbose), __FUNCTION__) +#else +#define rsv_window_dump(root, verbose) do {} while (0) +#endif + +/** + * goal_in_my_reservation() + * @rsv: inode's reservation window + * @grp_goal: given goal block relative to the allocation block group + * @group: the current allocation block group + * @sb: filesystem super block + * + * Test if the given goal block (group relative) is within the file's + * own block reservation window range. + * + * If the reservation window is outside the goal allocation group, return 0; + * grp_goal (given goal block) could be -1, which means no specific + * goal block. In this case, always return 1. + * If the goal block is within the reservation window, return 1; + * otherwise, return 0; + */ +static int +goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal, + unsigned int group, struct super_block * sb) +{ + ext3_fsblk_t group_first_block, group_last_block; + + group_first_block = ext3_group_first_block_no(sb, group); + group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); + + if ((rsv->_rsv_start > group_last_block) || + (rsv->_rsv_end < group_first_block)) + return 0; + if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) + || (grp_goal + group_first_block > rsv->_rsv_end))) + return 0; + return 1; +} + +/** + * search_reserve_window() + * @rb_root: root of reservation tree + * @goal: target allocation block + * + * Find the reserved window which includes the goal, or the previous one + * if the goal is not in any window. + * Returns NULL if there are no windows or if all windows start after the goal. + */ +static struct ext3_reserve_window_node * +search_reserve_window(struct rb_root *root, ext3_fsblk_t goal) +{ + struct rb_node *n = root->rb_node; + struct ext3_reserve_window_node *rsv; + + if (!n) + return NULL; + + do { + rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); + + if (goal < rsv->rsv_start) + n = n->rb_left; + else if (goal > rsv->rsv_end) + n = n->rb_right; + else + return rsv; + } while (n); + /* + * We've fallen off the end of the tree: the goal wasn't inside + * any particular node. OK, the previous node must be to one + * side of the interval containing the goal. If it's the RHS, + * we need to back up one. + */ + if (rsv->rsv_start > goal) { + n = rb_prev(&rsv->rsv_node); + rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); + } + return rsv; +} + +/** + * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree. + * @sb: super block + * @rsv: reservation window to add + * + * Must be called with rsv_lock hold. + */ +void ext3_rsv_window_add(struct super_block *sb, + struct ext3_reserve_window_node *rsv) +{ + struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root; + struct rb_node *node = &rsv->rsv_node; + ext3_fsblk_t start = rsv->rsv_start; + + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; + struct ext3_reserve_window_node *this; + + while (*p) + { + parent = *p; + this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node); + + if (start < this->rsv_start) + p = &(*p)->rb_left; + else if (start > this->rsv_end) + p = &(*p)->rb_right; + else { + rsv_window_dump(root, 1); + BUG(); + } + } + + rb_link_node(node, parent, p); + rb_insert_color(node, root); +} + +/** + * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree + * @sb: super block + * @rsv: reservation window to remove + * + * Mark the block reservation window as not allocated, and unlink it + * from the filesystem reservation window rb tree. Must be called with + * rsv_lock hold. + */ +static void rsv_window_remove(struct super_block *sb, + struct ext3_reserve_window_node *rsv) +{ + rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_alloc_hit = 0; + rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root); +} + +/* + * rsv_is_empty() -- Check if the reservation window is allocated. + * @rsv: given reservation window to check + * + * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED. + */ +static inline int rsv_is_empty(struct ext3_reserve_window *rsv) +{ + /* a valid reservation end block could not be 0 */ + return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED; +} + +/** + * ext3_init_block_alloc_info() + * @inode: file inode structure + * + * Allocate and initialize the reservation window structure, and + * link the window to the ext3 inode structure at last + * + * The reservation window structure is only dynamically allocated + * and linked to ext3 inode the first time the open file + * needs a new block. So, before every ext3_new_block(s) call, for + * regular files, we should check whether the reservation window + * structure exists or not. In the latter case, this function is called. + * Fail to do so will result in block reservation being turned off for that + * open file. + * + * This function is called from ext3_get_blocks_handle(), also called + * when setting the reservation window size through ioctl before the file + * is open for write (needs block allocation). + * + * Needs truncate_mutex protection prior to call this function. + */ +void ext3_init_block_alloc_info(struct inode *inode) +{ + struct ext3_inode_info *ei = EXT3_I(inode); + struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info; + struct super_block *sb = inode->i_sb; + + block_i = kmalloc(sizeof(*block_i), GFP_NOFS); + if (block_i) { + struct ext3_reserve_window_node *rsv = &block_i->rsv_window_node; + + rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; + + /* + * if filesystem is mounted with NORESERVATION, the goal + * reservation window size is set to zero to indicate + * block reservation is off + */ |