aboutsummaryrefslogtreecommitdiff
path: root/fs/ext3
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext3')
-rw-r--r--fs/ext3/acl.c259
-rw-r--r--fs/ext3/acl.h9
-rw-r--r--fs/ext3/balloc.c109
-rw-r--r--fs/ext3/bitmap.c16
-rw-r--r--fs/ext3/dir.c360
-rw-r--r--fs/ext3/ext3.h1326
-rw-r--r--fs/ext3/ext3_jbd.c2
-rw-r--r--fs/ext3/file.c17
-rw-r--r--fs/ext3/fsync.c25
-rw-r--r--fs/ext3/hash.c8
-rw-r--r--fs/ext3/ialloc.c35
-rw-r--r--fs/ext3/inode.c257
-rw-r--r--fs/ext3/ioctl.c9
-rw-r--r--fs/ext3/namei.c124
-rw-r--r--fs/ext3/namei.h19
-rw-r--r--fs/ext3/resize.c17
-rw-r--r--fs/ext3/super.c232
-rw-r--r--fs/ext3/symlink.c4
-rw-r--r--fs/ext3/xattr.c19
-rw-r--r--fs/ext3/xattr.h2
-rw-r--r--fs/ext3/xattr_security.c11
-rw-r--r--fs/ext3/xattr_trusted.c6
-rw-r--r--fs/ext3/xattr_user.c5
23 files changed, 2031 insertions, 840 deletions
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 3091f62e55b..8bbaf5bcf98 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -4,13 +4,7 @@
* Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
*/
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/capability.h>
-#include <linux/fs.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include "ext3.h"
#include "xattr.h"
#include "acl.h"
@@ -54,16 +48,23 @@ ext3_acl_from_disk(const void *value, size_t size)
case ACL_OTHER:
value = (char *)value +
sizeof(ext3_acl_entry_short);
- acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
break;
case ACL_USER:
+ value = (char *)value + sizeof(ext3_acl_entry);
+ if ((char *)value > end)
+ goto fail;
+ acl->a_entries[n].e_uid =
+ make_kuid(&init_user_ns,
+ le32_to_cpu(entry->e_id));
+ break;
case ACL_GROUP:
value = (char *)value + sizeof(ext3_acl_entry);
if ((char *)value > end)
goto fail;
- acl->a_entries[n].e_id =
- le32_to_cpu(entry->e_id);
+ acl->a_entries[n].e_gid =
+ make_kgid(&init_user_ns,
+ le32_to_cpu(entry->e_id));
break;
default:
@@ -97,14 +98,19 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION);
e = (char *)ext_acl + sizeof(ext3_acl_header);
for (n=0; n < acl->a_count; n++) {
+ const struct posix_acl_entry *acl_e = &acl->a_entries[n];
ext3_acl_entry *entry = (ext3_acl_entry *)e;
- entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
- entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
- switch(acl->a_entries[n].e_tag) {
+ entry->e_tag = cpu_to_le16(acl_e->e_tag);
+ entry->e_perm = cpu_to_le16(acl_e->e_perm);
+ switch(acl_e->e_tag) {
case ACL_USER:
+ entry->e_id = cpu_to_le32(
+ from_kuid(&init_user_ns, acl_e->e_uid));
+ e += sizeof(ext3_acl_entry);
+ break;
case ACL_GROUP:
- entry->e_id =
- cpu_to_le32(acl->a_entries[n].e_id);
+ entry->e_id = cpu_to_le32(
+ from_kgid(&init_user_ns, acl_e->e_gid));
e += sizeof(ext3_acl_entry);
break;
@@ -139,13 +145,6 @@ ext3_get_acl(struct inode *inode, int type)
struct posix_acl *acl;
int retval;
- if (!test_opt(inode->i_sb, POSIX_ACL))
- return NULL;
-
- acl = get_cached_acl(inode, type);
- if (acl != ACL_NOT_CACHED)
- return acl;
-
switch (type) {
case ACL_TYPE_ACCESS:
name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
@@ -184,7 +183,7 @@ ext3_get_acl(struct inode *inode, int type)
* inode->i_mutex: down unless called from ext3_new_inode
*/
static int
-ext3_set_acl(handle_t *handle, struct inode *inode, int type,
+__ext3_set_acl(handle_t *handle, struct inode *inode, int type,
struct posix_acl *acl)
{
int name_index;
@@ -192,9 +191,6 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
size_t size = 0;
int error;
- if (S_ISLNK(inode->i_mode))
- return -EOPNOTSUPP;
-
switch(type) {
case ACL_TYPE_ACCESS:
name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
@@ -237,204 +233,49 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
return error;
}
-/*
- * Initialize the ACLs of a new inode. Called from ext3_new_inode.
- *
- * dir->i_mutex: down
- * inode->i_mutex: up (access to inode is still exclusive)
- */
-int
-ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
-{
- struct posix_acl *acl = NULL;
- int error = 0;
-
- if (!S_ISLNK(inode->i_mode)) {
- if (test_opt(dir->i_sb, POSIX_ACL)) {
- acl = ext3_get_acl(dir, ACL_TYPE_DEFAULT);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- }
- if (!acl)
- inode->i_mode &= ~current_umask();
- }
- if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
- if (S_ISDIR(inode->i_mode)) {
- error = ext3_set_acl(handle, inode,
- ACL_TYPE_DEFAULT, acl);
- if (error)
- goto cleanup;
- }
- error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
- if (error < 0)
- return error;
-
- if (error > 0) {
- /* This is an extended ACL */
- error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
- }
- }
-cleanup:
- posix_acl_release(acl);
- return error;
-}
-
-/*
- * Does chmod for an inode that may have an Access Control List. The
- * inode->i_mode field must be updated to the desired value by the caller
- * before calling this function.
- * Returns 0 on success, or a negative error number.
- *
- * We change the ACL rather than storing some ACL entries in the file
- * mode permission bits (which would be more efficient), because that
- * would break once additional permissions (like ACL_APPEND, ACL_DELETE
- * for directories) are added. There are no more bits available in the
- * file mode.
- *
- * inode->i_mutex: down
- */
int
-ext3_acl_chmod(struct inode *inode)
+ext3_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
- struct posix_acl *acl;
handle_t *handle;
- int retries = 0;
- int error;
+ int error, retries = 0;
- if (S_ISLNK(inode->i_mode))
- return -EOPNOTSUPP;
- if (!test_opt(inode->i_sb, POSIX_ACL))
- return 0;
- acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl) || !acl)
- return PTR_ERR(acl);
- error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
- if (error)
- return error;
retry:
- handle = ext3_journal_start(inode,
- EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
- if (IS_ERR(handle)) {
- error = PTR_ERR(handle);
- ext3_std_error(inode->i_sb, error);
- goto out;
- }
- error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
+ handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ error = __ext3_set_acl(handle, inode, type, acl);
ext3_journal_stop(handle);
- if (error == -ENOSPC &&
- ext3_should_retry_alloc(inode->i_sb, &retries))
+ if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
-out:
- posix_acl_release(acl);
return error;
}
/*
- * Extended attribute handlers
+ * Initialize the ACLs of a new inode. Called from ext3_new_inode.
+ *
+ * dir->i_mutex: down
+ * inode->i_mutex: up (access to inode is still exclusive)
*/
-static size_t
-ext3_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
- const char *name, size_t name_len, int type)
-{
- const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
-
- if (!test_opt(dentry->d_sb, POSIX_ACL))
- return 0;
- if (list && size <= list_len)
- memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
- return size;
-}
-
-static size_t
-ext3_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
- const char *name, size_t name_len, int type)
-{
- const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
-
- if (!test_opt(dentry->d_sb, POSIX_ACL))
- return 0;
- if (list && size <= list_len)
- memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
- return size;
-}
-
-static int
-ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
- size_t size, int type)
+int
+ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
{
- struct posix_acl *acl;
+ struct posix_acl *default_acl, *acl;
int error;
- if (strcmp(name, "") != 0)
- return -EINVAL;
- if (!test_opt(dentry->d_sb, POSIX_ACL))
- return -EOPNOTSUPP;
-
- acl = ext3_get_acl(dentry->d_inode, type);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl == NULL)
- return -ENODATA;
- error = posix_acl_to_xattr(acl, buffer, size);
- posix_acl_release(acl);
-
- return error;
-}
-
-static int
-ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags, int type)
-{
- struct inode *inode = dentry->d_inode;
- handle_t *handle;
- struct posix_acl *acl;
- int error, retries = 0;
-
- if (strcmp(name, "") != 0)
- return -EINVAL;
- if (!test_opt(inode->i_sb, POSIX_ACL))
- return -EOPNOTSUPP;
- if (!inode_owner_or_capable(inode))
- return -EPERM;
-
- if (value) {
- acl = posix_acl_from_xattr(value, size);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- else if (acl) {
- error = posix_acl_valid(acl);
- if (error)
- goto release_and_out;
- }
- } else
- acl = NULL;
-
-retry:
- handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
- if (IS_ERR(handle))
- return PTR_ERR(handle);
- error = ext3_set_acl(handle, inode, type, acl);
- ext3_journal_stop(handle);
- if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
- goto retry;
+ error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
+ if (error)
+ return error;
-release_and_out:
- posix_acl_release(acl);
+ if (default_acl) {
+ error = __ext3_set_acl(handle, inode, ACL_TYPE_DEFAULT,
+ default_acl);
+ posix_acl_release(default_acl);
+ }
+ if (acl) {
+ if (!error)
+ error = __ext3_set_acl(handle, inode, ACL_TYPE_ACCESS,
+ acl);
+ posix_acl_release(acl);
+ }
return error;
}
-
-const struct xattr_handler ext3_xattr_acl_access_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
- .flags = ACL_TYPE_ACCESS,
- .list = ext3_xattr_list_acl_access,
- .get = ext3_xattr_get_acl,
- .set = ext3_xattr_set_acl,
-};
-
-const struct xattr_handler ext3_xattr_acl_default_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
- .flags = ACL_TYPE_DEFAULT,
- .list = ext3_xattr_list_acl_default,
- .get = ext3_xattr_get_acl,
- .set = ext3_xattr_set_acl,
-};
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index dbc921e458c..ea1c69edab9 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -55,18 +55,13 @@ static inline int ext3_acl_count(size_t size)
/* acl.c */
extern struct posix_acl *ext3_get_acl(struct inode *inode, int type);
-extern int ext3_acl_chmod (struct inode *);
+extern int ext3_set_acl(struct inode *inode, struct posix_acl *acl, int type);
extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
#else /* CONFIG_EXT3_FS_POSIX_ACL */
#include <linux/sched.h>
#define ext3_get_acl NULL
-
-static inline int
-ext3_acl_chmod(struct inode *inode)
-{
- return 0;
-}
+#define ext3_set_acl NULL
static inline int
ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index a2038928f9a..158b5d4ce06 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -11,17 +11,9 @@
* David S. Miller (davem@caip.rutgers.edu), 1995
*/
-#include <linux/time.h>
-#include <linux/capability.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
#include <linux/blkdev.h>
-#include <trace/events/ext3.h>
+#include "ext3.h"
/*
* balloc.c contains the blocks allocation and deallocation routines
@@ -491,7 +483,7 @@ void ext3_discard_reservation(struct inode *inode)
* ext3_free_blocks_sb() -- Free given blocks and update quota
* @handle: handle to this transaction
* @sb: super block
- * @block: start physcial block to free
+ * @block: start physical block to free
* @count: number of blocks to free
* @pdquot_freed_blocks: pointer to quota
*/
@@ -1447,8 +1439,9 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation)
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
- !use_reservation && sbi->s_resuid != current_fsuid() &&
- (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
+ !use_reservation && !uid_eq(sbi->s_resuid, current_fsuid()) &&
+ (gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) ||
+ !in_group_p (sbi->s_resgid))) {
return 0;
}
return 1;
@@ -1734,17 +1727,17 @@ allocated:
percpu_counter_sub(&sbi->s_freeblocks_counter, num);
BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
- err = ext3_journal_dirty_metadata(handle, gdp_bh);
- if (!fatal)
- fatal = err;
-
+ fatal = ext3_journal_dirty_metadata(handle, gdp_bh);
if (fatal)
goto out;
*errp = 0;
brelse(bitmap_bh);
- dquot_free_block(inode, *count-num);
- *count = num;
+
+ if (num < *count) {
+ dquot_free_block(inode, *count-num);
+ *count = num;
+ }
trace_ext3_allocate_blocks(inode, goal, num,
(unsigned long long)ret_block);
@@ -1817,7 +1810,7 @@ ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
brelse(bitmap_bh);
printk("ext3_count_free_blocks: stored = "E3FSBLK
", computed = "E3FSBLK", "E3FSBLK"\n",
- le32_to_cpu(es->s_free_blocks_count),
+ (ext3_fsblk_t)le32_to_cpu(es->s_free_blocks_count),
desc_count, bitmap_count);
return bitmap_count;
#else
@@ -1970,7 +1963,7 @@ static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
sbi = EXT3_SB(sb);
/* Walk through the whole group */
- while (start < max) {
+ while (start <= max) {
start = bitmap_search_next_usable_block(start, bitmap_bh, max);
if (start < 0)
break;
@@ -1980,7 +1973,7 @@ static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
* Allocate contiguous free extents by setting bits in the
* block bitmap
*/
- while (next < max
+ while (next <= max
&& claim_block(sb_bgl_lock(sbi, group),
next, bitmap_bh)) {
next++;
@@ -2091,73 +2084,75 @@ err_out:
*/
int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
{
- ext3_grpblk_t last_block, first_block, free_blocks;
- unsigned long first_group, last_group;
- unsigned long group, ngroups;
+ ext3_grpblk_t last_block, first_block;
+ unsigned long group, first_group, last_group;
struct ext3_group_desc *gdp;
struct ext3_super_block *es = EXT3_SB(sb)->s_es;
- uint64_t start, len, minlen, trimmed;
+ uint64_t start, minlen, end, trimmed = 0;
+ ext3_fsblk_t first_data_blk =
+ le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
int ret = 0;
- start = (range->start >> sb->s_blocksize_bits) +
- le32_to_cpu(es->s_first_data_block);
- len = range->len >> sb->s_blocksize_bits;
+ start = range->start >> sb->s_blocksize_bits;
+ end = start + (range->len >> sb->s_blocksize_bits) - 1;
minlen = range->minlen >> sb->s_blocksize_bits;
- trimmed = 0;
- if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
- return -EINVAL;
- if (start >= max_blks)
+ if (minlen > EXT3_BLOCKS_PER_GROUP(sb) ||
+ start >= max_blks ||
+ range->len < sb->s_blocksize)
return -EINVAL;
- if (start + len > max_blks)
- len = max_blks - start;
+ if (end >= max_blks)
+ end = max_blks - 1;
+ if (end <= first_data_blk)
+ goto out;
+ if (start < first_data_blk)
+ start = first_data_blk;
- ngroups = EXT3_SB(sb)->s_groups_count;
smp_rmb();
/* Determine first and last group to examine based on start and len */
ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start,
&first_group, &first_block);
- ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) (start + len),
+ ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) end,
&last_group, &last_block);
- last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
- last_block = EXT3_BLOCKS_PER_GROUP(sb);
- if (first_group > last_group)
- return -EINVAL;
+ /* end now represents the last block to discard in this group */
+ end = EXT3_BLOCKS_PER_GROUP(sb) - 1;
for (group = first_group; group <= last_group; group++) {
gdp = ext3_get_group_desc(sb, group, NULL);
if (!gdp)
break;
- free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
- if (free_blocks < minlen)
- continue;
-
/*
* For all the groups except the last one, last block will
- * always be EXT3_BLOCKS_PER_GROUP(sb), so we only need to
- * change it for the last group in which case first_block +
- * len < EXT3_BLOCKS_PER_GROUP(sb).
+ * always be EXT3_BLOCKS_PER_GROUP(sb)-1, so we only need to
+ * change it for the last group, note that last_block is
+ * already computed earlier by ext3_get_group_no_and_offset()
*/
- if (first_block + len < EXT3_BLOCKS_PER_GROUP(sb))
- last_block = first_block + len;
- len -= last_block - first_block;
+ if (group == last_group)
+ end = last_block;
- ret = ext3_trim_all_free(sb, group, first_block,
- last_block, minlen);
- if (ret < 0)
- break;
+ if (le16_to_cpu(gdp->bg_free_blocks_count) >= minlen) {
+ ret = ext3_trim_all_free(sb, group, first_block,
+ end, minlen);
+ if (ret < 0)
+ break;
+ trimmed += ret;
+ }
- trimmed += ret;
+ /*
+ * For every group except the first one, we are sure
+ * that the first block to discard will be block #0.
+ */
first_block = 0;
}
- if (ret >= 0)
+ if (ret > 0)
ret = 0;
- range->len = trimmed * sb->s_blocksize;
+out:
+ range->len = trimmed * sb->s_blocksize;
return ret;
}
diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index 6afc39d8025..ef9c643e8e9 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -7,25 +7,13 @@
* Universite Pierre et Marie Curie (Paris VI)
*/
-#include <linux/buffer_head.h>
-#include <linux/jbd.h>
-#include <linux/ext3_fs.h>
+#include "ext3.h"
#ifdef EXT3FS_DEBUG
-static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
-
unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
{
- unsigned int i;
- unsigned long sum = 0;
-
- if (!map)
- return (0);
- for (i = 0; i < numchars; i++)
- sum += nibblemap[map->b_data[i] & 0xf] +
- nibblemap[(map->b_data[i] >> 4) & 0xf];
- return (sum);
+ return numchars * BITS_PER_BYTE - memweight(map->b_data, numchars);
}
#endif /* EXT3FS_DEBUG */
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 34f0a072b93..17742eed2c1 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -21,35 +21,14 @@
*
*/
-#include <linux/fs.h>
-#include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/buffer_head.h>
-#include <linux/slab.h>
-#include <linux/rbtree.h>
+#include <linux/compat.h>
+#include "ext3.h"
static unsigned char ext3_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
-static int ext3_readdir(struct file *, void *, filldir_t);
-static int ext3_dx_readdir(struct file * filp,
- void * dirent, filldir_t filldir);
-static int ext3_release_dir (struct inode * inode,
- struct file * filp);
-
-const struct file_operations ext3_dir_operations = {
- .llseek = generic_file_llseek,
- .read = generic_read_dir,
- .readdir = ext3_readdir, /* we take BKL. needed?*/
- .unlocked_ioctl = ext3_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ext3_compat_ioctl,
-#endif
- .fsync = ext3_sync_file, /* BKL held */
- .release = ext3_release_dir,
-};
-
+static int ext3_dx_readdir(struct file *, struct dir_context *);
static unsigned char get_dtype(struct super_block *sb, int filetype)
{
@@ -60,6 +39,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
return (ext3_filetype_table[filetype]);
}
+/**
+ * Check if the given dir-inode refers to an htree-indexed directory
+ * (or a directory which could potentially get converted to use htree
+ * indexing).
+ *
+ * Return 1 if it is a dx dir, 0 if not
+ */
+static int is_dx_dir(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+
+ if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
+ EXT3_FEATURE_COMPAT_DIR_INDEX) &&
+ ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
+ ((inode->i_size >> sb->s_blocksize_bits) == 1)))
+ return 1;
+
+ return 0;
+}
int ext3_check_dir_entry (const char * function, struct inode * dir,
struct ext3_dir_entry_2 * de,
@@ -92,41 +90,30 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
return error_msg == NULL ? 1 : 0;
}
-static int ext3_readdir(struct file * filp,
- void * dirent, filldir_t filldir)
+static int ext3_readdir(struct file *file, struct dir_context *ctx)
{
- int error = 0;
unsigned long offset;
- int i, stored;
+ int i;
struct ext3_dir_entry_2 *de;
- struct super_block *sb;
int err;
- struct inode *inode = filp->f_path.dentry->d_inode;
- int ret = 0;
+ struct inode *inode = file_inode(file);
+ struct super_block *sb = inode->i_sb;
int dir_has_error = 0;
- sb = inode->i_sb;
-
- if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
- EXT3_FEATURE_COMPAT_DIR_INDEX) &&
- ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
- ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
- err = ext3_dx_readdir(filp, dirent, filldir);
- if (err != ERR_BAD_DX_DIR) {
- ret = err;
- goto out;
- }
+ if (is_dx_dir(inode)) {
+ err = ext3_dx_readdir(file, ctx);
+ if (err != ERR_BAD_DX_DIR)
+ return err;
/*
* We don't set the inode dirty flag since it's not
* critical that it get flushed back to the disk.
*/
- EXT3_I(filp->f_path.dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
+ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
}
- stored = 0;
- offset = filp->f_pos & (sb->s_blocksize - 1);
+ offset = ctx->pos & (sb->s_blocksize - 1);
- while (!error && !stored && filp->f_pos < inode->i_size) {
- unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb);
+ while (ctx->pos < inode->i_size) {
+ unsigned long blk = ctx->pos >> EXT3_BLOCK_SIZE_BITS(sb);
struct buffer_head map_bh;
struct buffer_head *bh = NULL;
@@ -135,12 +122,12 @@ static int ext3_readdir(struct file * filp,
if (err > 0) {
pgoff_t index = map_bh.b_blocknr >>
(PAGE_CACHE_SHIFT - inode->i_blkbits);
- if (!ra_has_index(&filp->f_ra, index))
+ if (!ra_has_index(&file->f_ra, index))
page_cache_sync_readahead(
sb->s_bdev->bd_inode->i_mapping,
- &filp->f_ra, filp,
+ &file->f_ra, file,
index, 1);
- filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
+ file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
bh = ext3_bread(NULL, inode, blk, 0, &err);
}
@@ -152,22 +139,21 @@ static int ext3_readdir(struct file * filp,
if (!dir_has_error) {
ext3_error(sb, __func__, "directory #%lu "
"contains a hole at offset %lld",
- inode->i_ino, filp->f_pos);
+ inode->i_ino, ctx->pos);
dir_has_error = 1;
}
/* corrupt size? Maybe no more blocks to read */
- if (filp->f_pos > inode->i_blocks << 9)
+ if (ctx->pos > inode->i_blocks << 9)
break;
- filp->f_pos += sb->s_blocksize - offset;
+ ctx->pos += sb->s_blocksize - offset;
continue;
}
-revalidate:
/* If the dir block has changed since the last call to
* readdir(2), then we might be pointing to an invalid
* dirent right now. Scan from the start of the block
* to make sure. */
- if (filp->f_version != inode->i_version) {
+ if (offset && file->f_version != inode->i_version) {
for (i = 0; i < sb->s_blocksize && i < offset; ) {
de = (struct ext3_dir_entry_2 *)
(bh->b_data + i);
@@ -183,71 +169,124 @@ revalidate:
i += ext3_rec_len_from_disk(de->rec_len);
}
offset = i;
- filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
+ ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1))
| offset;
- filp->f_version = inode->i_version;
+ file->f_version = inode->i_version;
}
- while (!error && filp->f_pos < inode->i_size
+ while (ctx->pos < inode->i_size
&& offset < sb->s_blocksize) {
de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
bh, offset)) {
- /* On error, skip the f_pos to the
+ /* On error, skip the to the
next block. */
- filp->f_pos = (filp->f_pos |
+ ctx->pos = (ctx->pos |
(sb->s_blocksize - 1)) + 1;
- brelse (bh);
- ret = stored;
- goto out;
+ break;
}
offset += ext3_rec_len_from_disk(de->rec_len);
if (le32_to_cpu(de->inode)) {
- /* We might block in the next section
- * if the data destination is
- * currently swapped out. So, use a
- * version stamp to detect whether or
- * not the directory has been modified
- * during the copy operation.
- */
- u64 version = filp->f_version;
-
- error = filldir(dirent, de->name,
- de->name_len,
- filp->f_pos,
- le32_to_cpu(de->inode),
- get_dtype(sb, de->file_type));
- if (error)
- break;
- if (version != filp->f_version)
- goto revalidate;
- stored ++;
+ if (!dir_emit(ctx, de->name, de->name_len,
+ le32_to_cpu(de->inode),
+ get_dtype(sb, de->file_type))) {
+ brelse(bh);
+ return 0;
+ }
}
- filp->f_pos += ext3_rec_len_from_disk(de->rec_len);
+ ctx->pos += ext3_rec_len_from_disk(de->rec_len);
}
offset = 0;
brelse (bh);
+ if (ctx->pos < inode->i_size)
+ if (!dir_relax(inode))
+ return 0;
}
-out:
- return ret;
+ return 0;
+}
+
+static inline int is_32bit_api(void)
+{
+#ifdef CONFIG_COMPAT
+ return is_compat_task();
+#else
+ return (BITS_PER_LONG == 32);
+#endif
}
/*
* These functions convert from the major/minor hash to an f_pos
- * value.
+ * value for dx directories
+ *
+ * Upper layer (for example NFS) should specify FMODE_32BITHASH or
+ * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted
+ * directly on both 32-bit and 64-bit nodes, under such case, neither
+ * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
+ */
+static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return major >> 1;
+ else
+ return ((__u64)(major >> 1) << 32) | (__u64)minor;
+}
+
+static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return (pos << 1) & 0xffffffff;
+ else
+ return ((pos >> 32) << 1) & 0xffffffff;
+}
+
+static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return 0;
+ else
+ return pos & 0xffffffff;
+}
+
+/*
+ * Return 32- or 64-bit end-of-file for dx directories
+ */
+static inline loff_t ext3_get_htree_eof(struct file *filp)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return EXT3_HTREE_EOF_32BIT;
+ else
+ return EXT3_HTREE_EOF_64BIT;
+}
+
+
+/*
+ * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both
+ * non-htree and htree directories, where the "offset" is in terms
+ * of the filename hash value instead of the byte offset.
+ *
+ * Because we may return a 64-bit hash that is well beyond s_maxbytes,
+ * we need to pass the max hash as the maximum allowable offset in
+ * the htree directory case.
*
- * Currently we only use major hash numer. This is unfortunate, but
- * on 32-bit machines, the same VFS interface is used for lseek and
- * llseek, so if we use the 64 bit offset, then the 32-bit versions of
- * lseek/telldir/seekdir will blow out spectacularly, and from within
- * the ext2 low-level routine, we don't know if we're being called by
- * a 64-bit version of the system call or the 32-bit version of the
- * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
- * cookie. Sigh.
+ * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
+ * will be invalid once the directory was converted into a dx directory
*/
-#define hash2pos(major, minor) (major >> 1)
-#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff)
-#define pos2min_hash(pos) (0)
+static loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ int dx_dir = is_dx_dir(inode);
+ loff_t htree_max = ext3_get_htree_eof(file);
+
+ if (likely(dx_dir))
+ return generic_file_llseek_size(file, offset, whence,
+ htree_max, htree_max);
+ else
+ return generic_file_llseek(file, offset, whence);
+}
/*
* This structure holds the nodes of the red-black tree used to store
@@ -270,53 +309,28 @@ struct fname {
*/
static void free_rb_tree_fname(struct rb_root *root)
{
- struct rb_node *n = root->rb_node;
- struct rb_node *parent;
- struct fname *fname;
-
- while (n) {
- /* Do the node's children first */
- if (n->rb_left) {
- n = n->rb_left;
- continue;
- }
- if (n->rb_right) {
- n = n->rb_right;
- continue;
- }
- /*
- * The node has no children; free it, and then zero
- * out parent's link to it. Finally go to the
- * beginning of the loop and try to free the parent
- * node.
- */
- parent = rb_parent(n);
- fname = rb_entry(n, struct fname, rb_hash);
- while (fname) {
- struct fname * old = fname;
+ struct fname *fname, *next;
+
+ rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash)
+ do {
+ struct fname *old = fname;
fname = fname->next;
- kfree (old);
- }
- if (!parent)
- *root = RB_ROOT;
- else if (parent->rb_left == n)
- parent->rb_left = NULL;
- else if (parent->rb_right == n)
- parent->rb_right = NULL;
- n = parent;
- }
-}
+ kfree(old);
+ } while (fname);
+ *root = RB_ROOT;
+}
-static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
+static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp,
+ loff_t pos)
{
struct dir_private_info *p;
p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
if (!p)
return NULL;
- p->curr_hash = pos2maj_hash(pos);
- p->curr_minor_hash = pos2min_hash(pos);
+ p->curr_hash = pos2maj_hash(filp, pos);
+ p->curr_minor_hash = pos2min_hash(filp, pos);
return p;
}
@@ -391,62 +405,54 @@ int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
* for all entres on the fname linked list. (Normally there is only
* one entry on the linked list, unless there are 62 bit hash collisions.)
*/
-static int call_filldir(struct file * filp, void * dirent,
- filldir_t filldir, struct fname *fname)
+static bool call_filldir(struct file *file, struct dir_context *ctx,
+ struct fname *fname)
{
- struct dir_private_info *info = filp->private_data;
- loff_t curr_pos;
- struct inode *inode = filp->f_path.dentry->d_inode;
- struct super_block * sb;
- int error;
-
- sb = inode->i_sb;
+ struct dir_private_info *info = file->private_data;
+ struct inode *inode = file_inode(file);
+ struct super_block *sb = inode->i_sb;
if (!fname) {
printk("call_filldir: called with null fname?!?\n");
- return 0;
+ return true;
}
- curr_pos = hash2pos(fname->hash, fname->minor_hash);
+ ctx->pos = hash2pos(file, fname->hash, fname->minor_hash);
while (fname) {
- error = filldir(dirent, fname->name,
- fname->name_len, curr_pos,
+ if (!dir_emit(ctx, fname->name, fname->name_len,
fname->inode,
- get_dtype(sb, fname->file_type));
- if (error) {
- filp->f_pos = curr_pos;
+ get_dtype(sb, fname->file_type))) {
info->extra_fname = fname;
- return error;
+ return false;
}
fname = fname->next;
}
- return 0;
+ return true;
}
-static int ext3_dx_readdir(struct file * filp,
- void * dirent, filldir_t filldir)
+static int ext3_dx_readdir(struct file *file, struct dir_context *ctx)
{
- struct dir_private_info *info = filp->private_data;
- struct inode *inode = filp->f_path.dentry->d_inode;
+ struct dir_private_info *info = file->private_data;
+ struct inode *inode = file_inode(file);
struct fname *fname;
int ret;
if (!info) {
- info = ext3_htree_create_dir_info(filp->f_pos);
+ info = ext3_htree_create_dir_info(file, ctx->pos);
if (!info)
return -ENOMEM;
- filp->private_data = info;
+ file->private_data = info;
}
- if (filp->f_pos == EXT3_HTREE_EOF)
+ if (ctx->pos == ext3_get_htree_eof(file))
return 0; /* EOF */
/* Some one has messed with f_pos; reset the world */
- if (info->last_pos != filp->f_pos) {
+ if (info->last_pos != ctx->pos) {
free_rb_tree_fname(&info->root);
info->curr_node = NULL;
info->extra_fname = NULL;
- info->curr_hash = pos2maj_hash(filp->f_pos);
- info->curr_minor_hash = pos2min_hash(filp->f_pos);
+ info->curr_hash = pos2maj_hash(file, ctx->pos);
+ info->curr_minor_hash = pos2min_hash(file, ctx->pos);
}
/*
@@ -454,7 +460,7 @@ static int ext3_dx_readdir(struct file * filp,
* chain, return them first.
*/
if (info->extra_fname) {
- if (call_filldir(filp, dirent, filldir, info->extra_fname))
+ if (!call_filldir(file, ctx, info->extra_fname))
goto finished;
info->extra_fname = NULL;
goto next_node;
@@ -468,17 +474,17 @@ static int ext3_dx_readdir(struct file * filp,
* cached entries.
*/
if ((!info->curr_node) ||
- (filp->f_version != inode->i_version)) {
+ (file->f_version != inode->i_version)) {
info->curr_node = NULL;
free_rb_tree_fname(&info->root);
- filp->f_version = inode->i_version;
- ret = ext3_htree_fill_tree(filp, info->curr_hash,
+ file->f_version = inode->i_version;
+ ret = ext3_htree_fill_tree(file, info->curr_hash,
info->curr_minor_hash,
&info->next_hash);
if (ret < 0)
return ret;
if (ret == 0) {
- filp->f_pos = EXT3_HTREE_EOF;
+ ctx->pos = ext3_get_htree_eof(file);
break;
}
info->curr_node = rb_first(&info->root);
@@ -487,7 +493,7 @@ static int ext3_dx_readdir(struct file * filp,
fname = rb_entry(info->curr_node, struct fname, rb_hash);
info->curr_hash = fname->hash;
info->curr_minor_hash = fname->minor_hash;
- if (call_filldir(filp, dirent, filldir, fname))
+ if (!call_filldir(file, ctx, fname))
break;
next_node:
info->curr_node = rb_next(info->curr_node);
@@ -498,7 +504,7 @@ static int ext3_dx_readdir(struct file * filp,
info->curr_minor_hash = fname->minor_hash;
} else {
if (info->next_hash == ~0) {
- filp->f_pos = EXT3_HTREE_EOF;
+ ctx->pos = ext3_get_htree_eof(file);
break;
}
info->curr_hash = info->next_hash;
@@ -506,7 +512,7 @@ static int ext3_dx_readdir(struct file * filp,
}
}
finished:
- info->last_pos = filp->f_pos;
+ info->last_pos = ctx->pos;
return 0;
}
@@ -517,3 +523,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp)
return 0;
}
+
+const struct file_operations ext3_dir_operations = {
+ .llseek = ext3_dir_llseek,
+ .read = generic_read_dir,
+ .iterate = ext3_readdir,
+ .unlocked_ioctl = ext3_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ext3_compat_ioctl,
+#endif
+ .fsync = ext3_sync_file,
+ .release = ext3_release_dir,
+};
diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h
new file mode 100644
index 00000000000..e85ff15a060
--- /dev/null
+++ b/fs/ext3/ext3.h
@@ -0,0 +1,1326 @@
+/*
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
+ *
+ * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/include/linux/minix_fs.h
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/magic.h>
+#include <linux/bug.h>
+#include <linux/blockgroup_lock.h>
+
+/*
+ * The second extended filesystem constants/structures
+ */
+
+/*
+ * Define EXT3FS_DEBUG to produce debug messages
+ */
+#undef EXT3FS_DEBUG
+
+/*
+ * Define EXT3_RESERVATION to reserve data blocks for expanding files
+ */
+#define EXT3_DEFAULT_RESERVE_BLOCKS 8
+/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
+#define EXT3_MAX_RESERVE_BLOCKS 1027
+#define EXT3_RESERVE_WINDOW_NOT_ALLOCATED 0
+
+/*
+ * Debug code
+ */
+#ifdef EXT3FS_DEBUG
+#define ext3_debug(f, a...) \
+ do { \
+ printk (KERN_DEBUG "EXT3-fs DEBUG (%s, %d): %s:", \
+ __FILE__, __LINE__, __func__); \
+ printk (KERN_DEBUG f, ## a); \
+ } while (0)
+#else
+#define ext3_debug(f, a...) do {} while (0)
+#endif
+
+/*
+ * Special inodes numbers
+ */
+#define EXT3_BAD_INO 1 /* Bad blocks inode */
+#define EXT3_ROOT_INO 2 /* Root inode */
+#define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */
+#define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */
+#define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */
+#define EXT3_JOURNAL_INO 8 /* Journal inode */
+
+/* First non-reserved inode for old ext3 filesystems */
+#define EXT3_GOOD_OLD_FIRST_INO 11
+
+/*
+ * Maximal count of links to a file
+ */
+#define EXT3_LINK_MAX 32000
+
+/*
+ * Macro-instructions used to manage several block sizes
+ */
+#define EXT3_MIN_BLOCK_SIZE 1024
+#define EXT3_MAX_BLOCK_SIZE 65536
+#define EXT3_MIN_BLOCK_LOG_SIZE 10
+#define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize)
+#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32))
+#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
+#define EXT3_ADDR_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_addr_per_block_bits)
+#define EXT3_INODE_SIZE(s) (EXT3_SB(s)->s_inode_size)
+#define EXT3_FIRST_INO(s) (EXT3_SB(s)->s_first_ino)
+
+/*
+ * Macro-instructions used to manage fragments
+ */
+#define EXT3_MIN_FRAG_SIZE 1024
+#define EXT3_MAX_FRAG_SIZE 4096
+#define EXT3_MIN_FRAG_LOG_SIZE 10
+#define EXT3_FRAG_SIZE(s) (EXT3_SB(s)->s_frag_size)
+#define EXT3_FRAGS_PER_BLOCK(s) (EXT3_SB(s)->s_frags_per_block)
+
+/*
+ * Structure of a blocks group descriptor
+ */
+struct ext3_group_desc
+{
+ __le32 bg_block_bitmap; /* Blocks bitmap block */
+ __le32 bg_inode_bitmap; /* Inodes bitmap block */
+ __le32 bg_inode_table; /* Inodes table block */
+ __le16 bg_free_blocks_count; /* Free blocks count */
+ __le16 bg_free_inodes_count; /* Free inodes count */
+ __le16 bg_used_dirs_count; /* Directories count */
+ __u16 bg_pad;
+ __le32 bg_reserved[3];
+};
+
+/*
+ * Macro-instructions used to manage group descriptors
+ */
+#define EXT3_BLOCKS_PER_GROUP(s) (EXT3_SB(s)->s_blocks_per_group)
+#define EXT3_DESC_PER_BLOCK(s) (EXT3_SB(s)->s_desc_per_block)
+#define EXT3_INODES_PER_GROUP(s) (EXT3_SB(s)->s_inodes_per_group)
+#define EXT3_DESC_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_desc_per_block_bits)
+
+/*
+ * Constants relative to the data blocks
+ */
+#define EXT3_NDIR_BLOCKS 12
+#define EXT3_IND_BLOCK EXT3_NDIR_BLOCKS
+#define EXT3_DIND_BLOCK (EXT3_IND_BLOCK + 1)
+#define EXT3_TIND_BLOCK (EXT3_DIND_BLOCK + 1)
+#define EXT3_N_BLOCKS (EXT3_TIND_BLOCK + 1)
+
+/*
+ * Inode flags
+ */
+#define EXT3_SECRM_FL 0x00000001 /* Secure deletion */
+#define EXT3_UNRM_FL 0x00000002 /* Undelete */
+#define EXT3_COMPR_FL 0x00000004 /* Compress file */
+#define EXT3_SYNC_FL 0x00000008 /* Synchronous updates */
+#define EXT3_IMMUTABLE_FL 0x00000010 /* Immutable file */
+#define EXT3_APPEND_FL 0x00000020 /* writes to file may only append */
+#define EXT3_NODUMP_FL 0x00000040 /* do not dump file */
+#define EXT3_NOATIME_FL 0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define EXT3_DIRTY_FL 0x00000100
+#define EXT3_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
+#define EXT3_NOCOMPR_FL 0x00000400 /* Don't compress */
+#define EXT3_ECOMPR_FL 0x00000800 /* Compression error */
+/* End compression flags --- maybe not all used */
+#define EXT3_INDEX_FL 0x00001000 /* hash-indexed directory */
+#define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */
+#define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
+#define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */
+#define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
+#define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+#define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */
+
+#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
+#define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
+
+/* Flags that should be inherited by new inodes from their parent. */
+#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\
+ EXT3_SYNC_FL | EXT3_NODUMP_FL |\
+ EXT3_NOATIME_FL | EXT3_COMPRBLK_FL |\
+ EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\
+ EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL)
+
+/* Flags that are appropriate for regular files (all but dir-specific ones). */
+#define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL))
+
+/* Flags that are appropriate for non-directories/regular files. */
+#define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL)
+
+/* Mask out flags that are inappropriate for the given type of inode. */
+static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags)
+{
+ if (S_ISDIR(mode))
+ return flags;
+ else if (S_ISREG(mode))
+ return flags & EXT3_REG_FLMASK;
+ else
+ return flags & EXT3_OTHER_FLMASK;
+}
+
+/* Used to pass group descriptor data when online resize is done */
+struct ext3_new_group_input {
+ __u32 group; /* Group number for this data */
+ __u32 block_bitmap; /* Absolute block number of block bitmap */
+ __u32 inode_bitmap; /* Absolute block number of inode bitmap */
+ __u32 inode_table; /* Absolute block number of inode table start */
+ __u32 blocks_count; /* Total number of blocks in this group */
+ __u16 reserved_blocks; /* Number of reserved blocks in this group */
+ __u16 unused;
+};
+
+/* The struct ext3_new_group_input in kernel space, with free_blocks_count */
+struct ext3_new_group_data {
+ __u32 group;
+ __u32 block_bitmap;
+ __u32 inode_bitmap;
+ __u32 inode_table;
+ __u32 blocks_count;
+ __u16 reserved_blocks;
+ __u16 unused;
+ __u32 free_blocks_count;
+};
+
+
+/*
+ * ioctl commands
+ */
+#define EXT3_IOC_GETFLAGS FS_IOC_GETFLAGS
+#define EXT3_IOC_SETFLAGS FS_IOC_SETFLAGS
+#define EXT3_IOC_GETVERSION _IOR('f', 3, long)
+#define EXT3_IOC_SETVERSION _IOW('f', 4, long)
+#define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
+#define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input)
+#define EXT3_IOC_GETVERSION_OLD FS_IOC_GETVERSION
+#define EXT3_IOC_SETVERSION_OLD FS_IOC_SETVERSION
+#ifdef CONFIG_JBD_DEBUG
+#define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
+#endif
+#define EXT3_IOC_GETRSVSZ _IOR('f', 5, long)
+#define EXT3_IOC_SETRSVSZ _IOW('f', 6, long)
+
+/*
+ * ioctl commands in 32 bit emulation
+ */
+#define EXT3_IOC32_GETFLAGS FS_IOC32_GETFLAGS
+#define EXT3_IOC32_SETFLAGS FS_IOC32_SETFLAGS
+#define EXT3_IOC32_GETVERSION _IOR('f', 3, int)
+#define EXT3_IOC32_SETVERSION _IOW('f', 4, int)
+#define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int)
+#define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int)
+#define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
+#ifdef CONFIG_JBD_DEBUG
+#define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
+#endif
+#define EXT3_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
+#define EXT3_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
+
+
+/*
+ * Mount options
+ */
+struct ext3_mount_options {
+ unsigned long s_mount_opt;
+ kuid_t s_resuid;
+ kgid_t s_resgid;
+ unsigned long s_commit_interval;
+#ifdef CONFIG_QUOTA
+ int s_jquota_fmt;
+ char *s_qf_names[MAXQUOTAS];
+#endif
+};
+
+/*
+ * Structure of an inode on the disk
+ */
+struct ext3_inode {
+ __le16 i_mode; /* File mode */
+ __le16 i_uid; /* Low 16 bits of Owner Uid */
+ __le32 i_size; /* Size in bytes */
+ __le32 i_atime; /* Access time */
+ __le32 i_ctime; /* Creation time */
+ __le32 i_mtime; /* Modification time */
+ __le32 i_dtime; /* Deletion Time */
+ __le16 i_gid; /* Low 16 bits of Group Id */
+ __le16 i_links_count; /* Links count */
+ __le32 i_blocks; /* Blocks count */
+ __le32 i_flags; /* File flags */
+ union {
+ struct {
+ __u32 l_i_reserved1;
+ } linux1;
+ struct {
+ __u32 h_i_translator;
+ } hurd1;
+ struct {
+ __u32 m_i_reserved1;
+ } masix1;
+ } osd1; /* OS dependent 1 */
+ __le32 i_block[EXT3_N_BLOCKS];/* Pointers to blocks */
+ __le32 i_generation; /* File version (for NFS) */
+ __le32 i_file_acl; /* File ACL */
+ __le32 i_dir_acl; /* Directory ACL */
+ __le32 i_faddr; /* Fragment address */
+ union {
+ struct {
+ __u8 l_i_frag; /* Fragment number */
+ __u8 l_i_fsize; /* Fragment size */
+ __u16 i_pad1;
+ __le16 l_i_uid_high; /* these 2 fields */
+ __le16 l_i_gid_high; /* were reserved2[0] */
+ __u32 l_i_reserved2;
+ } linux2;
+ struct {
+ __u8 h_i_frag; /* Fragment number */
+ __u8 h_i_fsize; /* Fragment size */
+ __u16 h_i_mode_high;
+ __u16 h_i_uid_high;
+ __u16 h_i_gid_high;
+ __u32 h_i_author;
+ } hurd2;
+ struct {
+ __u8 m_i_frag; /* Fragment number */
+ __u8 m_i_fsize; /* Fragment size */
+ __u16 m_pad1;
+ __u32 m_i_reserved2[2];
+ } masix2;
+ } osd2; /* OS dependent 2 */
+ __le16 i_extra_isize;
+ __le16 i_pad1;
+};
+
+#define i_size_high i_dir_acl
+
+#define i_reserved1 osd1.linux1.l_i_reserved1
+#define i_frag osd2.linux2.l_i_frag
+#define i_fsize osd2.linux2.l_i_fsize
+#define i_uid_low i_uid
+#define i_gid_low i_gid
+#define i_uid_high osd2.linux2.l_i_uid_high
+#define i_gid_high osd2.linux2.l_i_gid_high
+#define i_reserved2 osd2.linux2.l_i_reserved2
+
+/*
+ * File system states
+ */
+#define EXT3_VALID_FS 0x0001 /* Unmounted cleanly */
+#define EXT3_ERROR_FS 0x0002 /* Errors detected */
+#define EXT3_ORPHAN_FS 0x0004 /* Orphans being recovered */
+
+/*
+ * Misc. filesystem flags
+ */
+#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
+#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
+#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
+
+/*
+ * Mount flags
+ */
+#define EXT3_MOUNT_CHECK 0x00001 /* Do mount-time checks */
+/* EXT3_MOUNT_OLDALLOC was there */
+#define EXT3_MOUNT_GRPID 0x00004 /* Create files with directory's group */
+#define EXT3_MOUNT_DEBUG 0x00008 /* Some debugging messages */
+#define EXT3_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
+#define EXT3_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
+#define EXT3_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
+#define EXT3_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
+#define EXT3_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
+#define EXT3_MOUNT_ABORT 0x00200 /* Fatal error detected */
+#define EXT3_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
+#define EXT3_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
+#define EXT3_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
+#define EXT3_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */
+#define EXT3_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */
+#define EXT3_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
+#define EXT3_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
+#define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
+#define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */
+#define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */
+#define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */
+#define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
+#define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
+#define EXT3_MOUNT_DATA_ERR_ABORT 0x400000 /* Abort on file data write
+ * error in ordered mode */
+
+/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+#ifndef _LINUX_EXT2_FS_H
+#define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt
+#define set_opt(o, opt) o |= EXT3_MOUNT_##opt
+#define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \
+ EXT3_MOUNT_##opt)
+#else
+#define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD
+#define EXT2_MOUNT_ABORT EXT3_MOUNT_ABORT
+#define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS
+#endif
+
+#define ext3_set_bit __set_bit_le
+#define ext3_set_bit_atomic ext2_set_bit_atomic
+#define ext3_clear_bit __clear_bit_le
+#define ext3_clear_bit_atomic ext2_clear_bit_atomic
+#define ext3_test_bit test_bit_le
+#define ext3_find_next_zero_bit find_next_zero_bit_le
+
+/*
+ * Maximal mount counts between two filesystem checks
+ */
+#define EXT3_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
+#define EXT3_DFL_CHECKINTERVAL 0 /* Don't use interval check */
+
+/*
+ * Behaviour when detecting errors
+ */
+#define EXT3_ERRORS_CONTINUE 1 /* Continue execution */
+#define EXT3_ERRORS_RO 2 /* Remount fs read-only */
+#define EXT3_ERRORS_PANIC 3 /* Panic */
+#define EXT3_ERRORS_DEFAULT EXT3_ERRORS_CONTINUE
+
+/*
+ * Structure of the super block
+ */
+struct ext3_super_block {
+/*00*/ __le32 s_inodes_count; /* Inodes count */
+ __le32 s_blocks_count; /* Blocks count */
+ __le32 s_r_blocks_count; /* Reserved blocks count */
+ __le32 s_free_blocks_count; /* Free blocks count */
+/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
+ __le32 s_first_data_block; /* First Data Block */
+ __le32 s_log_block_size; /* Block size */
+ __le32 s_log_frag_size; /* Fragment size */
+/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
+ __le32 s_frags_per_group; /* # Fragments per group */
+ __le32 s_inodes_per_group; /* # Inodes per group */
+ __le32 s_mtime; /* Mount time */
+/*30*/ __le32 s_wtime; /* Write time */
+ __le16 s_mnt_count; /* Mount count */
+ __le16 s_max_mnt_count; /* Maximal mount count */
+ __le16 s_magic; /* Magic signature */
+ __le16 s_state; /* File system state */
+ __le16 s_errors; /* Behaviour when detecting errors */
+ __le16 s_minor_rev_level; /* minor revision level */
+/*40*/ __le32 s_lastcheck; /* time of last check */
+ __le32 s_checkinterval; /* max. time between checks */
+ __le32 s_creator_os; /* OS */
+ __le32 s_rev_level; /* Revision level */
+/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
+ __le16 s_def_resgid; /* Default gid for reserved blocks */
+ /*
+ * These fields are for EXT3_DYNAMIC_REV superblocks only.
+ *
+ * Note: the difference between the compatible feature set and
+ * the incompatible feature set is that if there is a bit set
+ * in the incompatible feature set that the kernel doesn't
+ * know about, it should refuse to mount the filesystem.
+ *
+ * e2fsck's requirements are more strict; if it doesn't know
+ * about a feature in either the compatible or incompatible
+ * feature set, it must abort and not try to meddle with
+ * things it doesn't understand...
+ */
+ __le32 s_first_ino; /* First non-reserved inode */
+ __le16 s_inode_size; /* size of inode structure */
+ __le16 s_block_group_nr; /* block group # of this superblock */
+ __le32 s_feature_compat; /* compatible feature set */
+/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
+ __le32 s_feature_ro_compat; /* readonly-compatible feature set */
+/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
+/*78*/ char s_volume_name[16]; /* volume name */
+/*88*/ char s_last_mounted[64]; /* directory where last mounted */
+/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
+ /*
+ * Performance hints. Directory preallocation should only
+ * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+ */
+ __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
+ __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
+ __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
+ /*
+ * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
+ */
+/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
+/*E0*/ __le32 s_journal_inum; /* inode number of journal file */
+ __le32 s_journal_dev; /* device number of journal file */
+ __le32 s_last_orphan; /* start of list of inodes to delete */
+ __le32 s_hash_seed[4]; /* HTREE hash seed */
+ __u8 s_def_hash_version; /* Default hash version to use */
+ __u8 s_reserved_char_pad;
+ __u16 s_reserved_word_pad;
+ __le32 s_default_mount_opts;
+ __le32 s_first_meta_bg; /* First metablock block group */
+ __le32 s_mkfs_time; /* When the filesystem was created */
+ __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
+ /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
+/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
+ __le32 s_r_blocks_count_hi; /* Reserved blocks count */
+ __le32 s_free_blocks_count_hi; /* Free blocks count */
+ __le16 s_min_extra_isize; /* All inodes have at least # bytes */
+ __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
+ __le32 s_flags; /* Miscellaneous flags */
+ __le16 s_raid_stride; /* RAID stride */
+ __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
+ __le64 s_mmp_block; /* Block for multi-mount protection */
+ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
+ __u8 s_log_groups_per_flex; /* FLEX_BG group size */
+ __u8 s_reserved_char_pad2;
+ __le16 s_reserved_pad;
+ __u32 s_reserved[162]; /* Padding to the end of the block */
+};
+
+/* data type for block offset of block group */
+typedef int ext3_grpblk_t;
+
+/* data type for filesystem-wide blocks number */
+typedef unsigned long ext3_fsblk_t;
+
+#define E3FSBLK "%lu"
+
+struct ext3_reserve_window {
+ ext3_fsblk_t _rsv_start; /* First byte reserved */
+ ext3_fsblk_t _rsv_end; /* Last byte reserved or 0 */
+};
+
+struct ext3_reserve_window_node {
+ struct rb_node rsv_node;
+ __u32 rsv_goal_size;
+ __u32 rsv_alloc_hit;
+ struct ext3_reserve_window rsv_window;
+};
+
+struct ext3_block_alloc_info {
+ /* information about reservation window */
+ struct ext3_reserve_window_node rsv_window_node;
+ /*
+ * was i_next_alloc_block in ext3_inode_info
+ * is the logical (file-relative) number of the
+ * most-recently-allocated block in this file.
+ * We use this for detecting linearly ascending allocation requests.
+ */
+ __u32 last_alloc_logical_block;
+ /*
+ * Was i_next_alloc_goal in ext3_inode_info
+ * is the *physical* companion to i_next_alloc_block.
+ * it the physical block number of the block which was most-recentl
+ * allocated to this file. This give us the goal (target) for the next
+ * allocation when we detect linearly ascending requests.
+ */
+ ext3_fsblk_t last_alloc_physical_block;
+};
+
+#define rsv_start rsv_window._rsv_start
+#define rsv_end rsv_window._rsv_end
+
+/*
+ * third extended file system inode data in memory
+ */
+struct ext3_inode_info {
+ __le32 i_data[15]; /* unconverted */
+ __u32 i_flags;
+#ifdef EXT3_FRAGMENTS
+ __u32 i_faddr;
+ __u8 i_frag_no;
+ __u8 i_frag_size;
+#endif
+ ext3_fsblk_t i_file_acl;
+ __u32 i_dir_acl;
+ __u32 i_dtime;
+
+ /*
+ * i_block_group is the number of the block group which contains
+ * this file's inode. Constant across the lifetime of the inode,
+ * it is ued for making block allocation decisions - we try to
+ * place a file's data blocks near its inode block, and new inodes
+ * near to their parent directory's inode.
+ */
+ __u32 i_block_group;
+ unsigned long i_state_flags; /* Dynamic state flags for ext3 */
+
+ /* block reservation info */
+ struct ext3_block_alloc_info *i_block_alloc_info;
+
+ __u32 i_dir_start_lookup;
+#ifdef CONFIG_EXT3_FS_XATTR
+ /*
+ * Extended attributes can be read independently of the main file
+ * data. Taking i_mutex even when reading would cause contention
+ * between readers of EAs and writers of regular file data, so
+ * instead we synchronize on xattr_sem when reading or changing
+ * EAs.
+ */
+ struct rw_semaphore xattr_sem;
+#endif
+
+ struct list_head i_orphan; /* unlinked but open inodes */
+
+ /*
+ * i_disksize keeps track of what the inode size is ON DISK, not
+ * in memory. During truncate, i_size is set to the new size by
+ * the VFS prior to calling ext3_truncate(), but the filesystem won't
+ * set i_disksize to 0 until the truncate is actually under way.
+ *
+ * The intent is that i_disksize always represents the blocks which
+ * are used by this file. This allows recovery to restart truncate
+ * on orphans if we crash during truncate. We actually write i_disksize
+ * into the on-disk inode when writing inodes out, instead of i_size.
+ *
+ * The only time when i_disksize and i_size may be different is when
+ * a truncate is in progress. The only things which change i_disksize
+ * are ext3_get_block (growth) and ext3_truncate (shrinkth).
+ */
+ loff_t i_disksize;
+
+ /* on-disk additional length */
+ __u16 i_extra_isize;
+
+ /*
+ * truncate_mutex is for serialising ext3_truncate() against
+ * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's
+ * data tree are chopped off during truncate. We can't do that in
+ * ext3 because whenever we perform intermediate commits during
+ * truncate, the inode and all the metadata blocks *must* be in a
+ * consistent state which allows truncation of the orphans to restart
+ * during recovery. Hence we must fix the get_block-vs-truncate race
+ * by other means, so we have truncate_mutex.
+ */
+ struct mutex truncate_mutex;
+
+ /*
+ * Transactions that contain inode's metadata needed to complete
+ * fsync and fdatasync, respectively.
+ */
+ atomic_t i_sync_tid;
+ atomic_t i_datasync_tid;
+
+ struct inode vfs_inode;
+};
+
+/*
+ * third extended-fs super-block data in memory
+ */
+struct ext3_sb_info {
+ unsigned long s_frag_size; /* Size of a fragment in bytes */
+ unsigned long s_frags_per_block;/* Number of fragments per block */
+ unsigned long s_inodes_per_block;/* Number of inodes per block */
+ unsigned long s_frags_per_group;/* Number of fragments in a group */
+ unsigned long s_blocks_per_group;/* Number of blocks in a group */
+ unsigned long s_inodes_per_group;/* Number of inodes in a group */
+ unsigned long s_itb_per_group; /* Number of inode table blocks per group */
+ unsigned long s_gdb_count; /* Number of group descriptor blocks */
+ unsigned long s_desc_per_block; /* Number of group descriptors per block */
+ unsigned long s_groups_count; /* Number of groups in the fs */
+ unsigned long s_overhead_last; /* Last calculated overhead */
+ unsigned long s_blocks_last; /* Last seen block count */
+ struct buffer_head * s_sbh; /* Buffer containing the super block */
+ struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */
+ struct buffer_head ** s_group_desc;
+ unsigned long s_mount_opt;
+ ext3_fsblk_t s_sb_block;
+ kuid_t s_resuid;
+ kgid_t s_resgid;
+ unsigned short s_mount_state;
+ unsigned short s_pad;
+ int s_addr_per_block_bits;
+ int s_desc_per_block_bits;
+ int s_inode_size;
+ int s_first_ino;
+ spinlock_t s_next_gen_lock;
+ u32 s_next_generation;
+ u32 s_hash_seed[4];
+ int s_def_hash_version;
+ int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
+ struct percpu_counter s_freeblocks_counter;
+ struct percpu_counter s_freeinodes_counter;
+ struct percpu_counter s_dirs_counter;
+ struct blockgroup_lock *s_blockgroup_lock;
+
+ /* root of the per fs reservation window tree */
+ spinlock_t s_rsv_window_lock;
+ struct rb_root s_rsv_window_root;
+ struct ext3_reserve_window_node s_rsv_window_head;
+
+ /* Journaling */
+ struct inode * s_journal_inode;
+ struct journal_s * s_journal;
+ struct list_head s_orphan;
+ struct mutex s_orphan_lock;
+ struct mutex s_resize_lock;
+ unsigned long s_commit_interval;
+ struct block_device *journal_bdev;
+#ifdef CONFIG_QUOTA
+ char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
+ int s_jquota_fmt; /* Format of quota to use */
+#endif
+};
+
+static inline spinlock_t *
+sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group)
+{
+ return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
+}
+
+static inline struct ext3_sb_info * EXT3_SB(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+static inline struct ext3_inode_info *EXT3_I(struct inode *inode)
+{
+ return container_of(inode, struct ext3_inode_info, vfs_inode);
+}
+
+static inline int ext3_valid_inum(struct super_block *sb, unsigned long ino)
+{
+ return ino == EXT3_ROOT_INO ||
+ ino == EXT3_JOURNAL_INO ||
+ ino == EXT3_RESIZE_INO ||
+ (ino >= EXT3_FIRST_INO(sb) &&
+ ino <= le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count));
+}
+
+/*
+ * Inode dynamic state flags
+ */
+enum {
+ EXT3_STATE_JDATA, /* journaled data exists */
+ EXT3_STATE_NEW, /* inode is newly created */
+ EXT3_STATE_XATTR, /* has in-inode xattrs */
+ EXT3_STATE_FLUSH_ON_CLOSE, /* flush dirty pages on close */
+};
+
+static inline int ext3_test_inode_state(struct inode *inode, int bit)
+{
+ return test_bit(bit, &EXT3_I(inode)->i_state_flags);
+}
+
+static inline void ext3_set_inode_state(struct inode *inode, int bit)
+{
+ set_bit(bit, &EXT3_I(inode)->i_state_flags);
+}
+
+static inline void ext3_clear_inode_state(struct inode *inode, int bit)
+{
+ clear_bit(bit, &EXT3_I(inode)->i_state_flags);
+}
+
+#define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime
+
+/*
+ * Codes for operating systems
+ */
+#define EXT3_OS_LINUX 0
+#define EXT3_OS_HURD 1
+#define EXT3_OS_MASIX 2
+#define EXT3_OS_FREEBSD 3
+#define EXT3_OS_LITES 4
+
+/*
+ * Revision levels
+ */
+#define EXT3_GOOD_OLD_REV 0 /* The good old (original) format */
+#define EXT3_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
+
+#define EXT3_CURRENT_REV EXT3_GOOD_OLD_REV
+#define EXT3_MAX_SUPP_REV EXT3_DYNAMIC_REV
+
+#define EXT3_GOOD_OLD_INODE_SIZE 128
+
+/*
+ * Feature set definitions
+ */
+
+#define EXT3_HAS_COMPAT_FEATURE(sb,mask) \
+ ( EXT3_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
+#define EXT3_HAS_RO_COMPAT_FEATURE(sb,mask) \
+ ( EXT3_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
+#define EXT3_HAS_INCOMPAT_FEATURE(sb,mask) \
+ ( EXT3_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
+#define EXT3_SET_COMPAT_FEATURE(sb,mask) \
+ EXT3_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
+#define EXT3_SET_RO_COMPAT_FEATURE(sb,mask) \
+ EXT3_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
+#define EXT3_SET_INCOMPAT_FEATURE(sb,mask) \
+ EXT3_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
+#define EXT3_CLEAR_COMPAT_FEATURE(sb,mask) \
+ EXT3_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
+#define EXT3_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
+ EXT3_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
+#define EXT3_CLEAR_INCOMPAT_FEATURE(sb,mask) \
+ EXT3_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
+
+#define EXT3_FEATURE_COMPAT_DIR_PREALLOC 0x0001
+#define EXT3_FEATURE_COMPAT_IMAGIC_INODES 0x0002
+#define EXT3_FEATURE_COMPAT_HAS_JOURNAL 0x0004
+#define EXT3_FEATURE_COMPAT_EXT_ATTR 0x0008
+#define EXT3_FEATURE_COMPAT_RESIZE_INODE 0x0010
+#define EXT3_FEATURE_COMPAT_DIR_INDEX 0x0020
+
+#define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
+#define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
+#define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
+
+#define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001
+#define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002
+#define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
+#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
+#define EXT3_FEATURE_INCOMPAT_META_BG 0x0010
+
+#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
+#define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
+ EXT3_FEATURE_INCOMPAT_RECOVER| \
+ EXT3_FEATURE_INCOMPAT_META_BG)
+#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+ EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
+ EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
+
+/*
+ * Default values for user and/or group using reserved blocks
+ */
+#define EXT3_DEF_RESUID 0
+#define EXT3_DEF_RESGID 0
+
+/*
+ * Default mount options
+ */
+#define EXT3_DEFM_DEBUG 0x0001
+#define EXT3_DEFM_BSDGROUPS 0x0002
+#define EXT3_DEFM_XATTR_USER 0x0004
+#define EXT3_DEFM_ACL 0x0008
+#define EXT3_DEFM_UID16 0x0010
+#define EXT3_DEFM_JMODE 0x0060
+#define EXT3_DEFM_JMODE_DATA 0x0020
+#define EXT3_DEFM_JMODE_ORDERED 0x0040
+#define EXT3_DEFM_JMODE_WBACK 0x0060
+
+/*
+ * Structure of a directory entry
+ */
+#define EXT3_NAME_LEN 255
+
+struct ext3_dir_entry {
+ __le32 inode; /* Inode number */
+ __le16 rec_len; /* Directory entry length */
+ __le16 name_len; /* Name length */
+ char name[EXT3_NAME_LEN]; /* File name */
+};
+
+/*
+ * The new version of the directory entry. Since EXT3 structures are
+ * stored in intel byte order, and the name_len field could never be
+ * bigger than 255 chars, it's safe to reclaim the extra byte for the
+ * file_type field.
+ */
+struct ext3_dir_entry_2 {
+ __le32 inode; /* Inode number */
+ __le16 rec_len; /* Directory entry length */
+ __u8 name_len; /* Name length */
+ __u8 file_type;
+ char name[EXT3_NAME_LEN]; /* File name */
+};
+
+/*
+ * Ext3 directory file types. Only the low 3 bits are used. The
+ * other bits are reserved for now.
+ */
+#define EXT3_FT_UNKNOWN 0
+#define EXT3_FT_REG_FILE 1
+#define EXT3_FT_DIR 2
+#define EXT3_FT_CHRDEV 3
+#define EXT3_FT_BLKDEV 4
+#define EXT3_FT_FIFO 5
+#define EXT3_FT_SOCK 6
+#define EXT3_FT_SYMLINK 7
+
+#define EXT3_FT_MAX 8
+
+/*
+ * EXT3_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a multiple of 4
+ */
+#define EXT3_DIR_PAD 4
+#define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1)
+#define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \
+ ~EXT3_DIR_ROUND)
+#define EXT3_MAX_REC_LEN ((1<<16)-1)
+
+/*
+ * Tests against MAX_REC_LEN etc were put in place for 64k block
+ * sizes; if that is not possible on this arch, we can skip
+ * those tests and speed things up.
+ */
+static inline unsigned ext3_rec_len_from_disk(__le16 dlen)
+{
+ unsigned len = le16_to_cpu(dlen);
+
+#if (PAGE_CACHE_SIZE >= 65536)
+ if (len == EXT3_MAX_REC_LEN)
+ return 1 << 16;
+#endif
+ return len;
+}
+
+static inline __le16 ext3_rec_len_to_disk(unsigned len)
+{
+#if (PAGE_CACHE_SIZE >= 65536)
+ if (len == (1 << 16))
+ return cpu_to_le16(EXT3_MAX_REC_LEN);
+ else if (len > (1 << 16))
+ BUG();
+#endif
+ return cpu_to_le16(len);
+}
+
+/*
+ * Hash Tree Directory indexing
+ * (c) Daniel Phillips, 2001
+ */
+
+#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
+ EXT3_FEATURE_COMPAT_DIR_INDEX) && \
+ (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
+
+/* Legal values for the dx_root hash_version field: */
+
+#define DX_HASH_LEGACY 0
+#define DX_HASH_HALF_MD4 1
+#define DX_HASH_TEA 2
+#define DX_HASH_LEGACY_UNSIGNED 3
+#define DX_HASH_HALF_MD4_UNSIGNED 4
+#define DX_HASH_TEA_UNSIGNED 5
+
+/* hash info structure used by the directory hash */
+struct dx_hash_info
+{
+ u32 hash;
+ u32 minor_hash;
+ int hash_version;
+ u32 *seed;
+};
+
+
+/* 32 and 64 bit signed EOF for dx directories */
+#define EXT3_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1)
+#define EXT3_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1)
+
+
+/*
+ * Control parameters used by ext3_htree_next_block
+ */
+#define HASH_NB_ALWAYS 1
+
+
+/*
+ * Describe an inode's exact location on disk and in memory
+ */
+struct ext3_iloc
+{
+ struct buffer_head *bh;
+ unsigned long offset;
+ unsigned long block_group;
+};
+
+static inline struct ext3_inode *ext3_raw_inode(struct ext3_iloc *iloc)
+{
+ return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset);
+}
+
+/*
+ * This structure is stuffed into the struct file's private_data field
+ * for directories. It is where we put information so that we can do
+ * readdir operations in hash tree order.
+ */
+struct dir_private_info {
+ struct rb_root root;
+ struct rb_node *curr_node;
+ struct fname *extra_fname;
+ loff_t last_pos;
+ __u32 curr_hash;
+ __u32 curr_minor_hash;
+ __u32 next_hash;
+};
+
+/* calculate the first block number of the group */
+static inline ext3_fsblk_t
+ext3_group_first_block_no(struct super_block *sb, unsigned long group_no)
+{
+ return group_no * (ext3_fsblk_t)EXT3_BLOCKS_PER_GROUP(sb) +
+ le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
+}
+
+/*
+ * Special error return code only used by dx_probe() and its callers.
+ */
+#define ERR_BAD_DX_DIR -75000
+
+/*
+ * Function prototypes
+ */
+
+/*
+ * Ok, these declarations are also in <linux/kernel.h> but none of the
+ * ext3 source programs needs to include it so they are duplicated here.
+ */
+# define NORET_TYPE /**/
+# define ATTRIB_NORET __attribute__((noreturn))
+# define NORET_AND noreturn,
+
+/* balloc.c */
+extern int ext3_bg_has_super(struct super_block *sb, int group);
+extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
+ ext3_fsblk_t goal, int *errp);
+extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
+ ext3_fsblk_t goal, unsigned long *count, int *errp);
+extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
+ ext3_fsblk_t block, unsigned long count);
+extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb,
+ ext3_fsblk_t block, unsigned long count,
+ unsigned long *pdquot_freed_blocks);
+extern ext3_fsblk_t ext3_count_free_blocks (struct super_block *);
+extern void ext3_check_blocks_bitmap (struct super_block *);
+extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+ unsigned int block_group,
+ struct buffer_head ** bh);
+extern int ext3_should_retry_alloc(struct super_block *sb, int *retries);
+extern void ext3_init_block_alloc_info(struct inode *);
+extern void ext3_rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv);
+extern int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range);
+
+/* dir.c */
+extern int ext3_check_dir_entry(const char *, struct inode *,
+ struct ext3_dir_entry_2 *,
+ struct buffer_head *, unsigned long);
+extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
+ __u32 minor_hash,
+ struct ext3_dir_entry_2 *dirent);
+extern void ext3_htree_free_dir_info(struct dir_private_info *p);
+
+/* fsync.c */
+extern int ext3_sync_file(struct file *, loff_t, loff_t, int);
+
+/* hash.c */
+extern int ext3fs_dirhash(const char *name, int len, struct
+ dx_hash_info *hinfo);
+
+/* ialloc.c */
+extern struct inode * ext3_new_inode (handle_t *, struct inode *,
+ const struct qstr *, umode_t);
+extern void ext3_free_inode (handle_t *, struct inode *);
+extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
+extern unsigned long ext3_count_free_inodes (struct super_block *);
+extern unsigned long ext3_count_dirs (struct super_block *);
+extern void ext3_check_inodes_bitmap (struct super_block *);
+extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
+
+
+/* inode.c */
+int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
+ struct buffer_head *bh, ext3_fsblk_t blocknr);
+struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
+struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
+ sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
+ int create);
+
+extern struct inode *ext3_iget(struct super_block *, unsigned long);
+extern int ext3_write_inode (struct inode *, struct writeback_control *);
+extern int ext3_setattr (struct dentry *, struct iattr *);
+extern void ext3_evict_inode (struct inode *);
+extern int ext3_sync_inode (handle_t *, struct inode *);
+extern void ext3_discard_reservation (struct inode *);
+extern void ext3_dirty_inode(struct inode *, int);
+extern int ext3_change_inode_journal_flag(struct inode *, int);
+extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
+extern int ext3_can_truncate(struct inode *inode);
+extern void ext3_truncate(struct inode *inode);
+extern void ext3_set_inode_flags(struct inode *);
+extern void ext3_get_inode_flags(struct ext3_inode_info *);
+extern void ext3_set_aops(struct inode *inode);
+extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len);
+
+/* ioctl.c */
+extern long ext3_ioctl(struct file *, unsigned int, unsigned long);
+extern long ext3_compat_ioctl(struct file *, unsigned int, unsigned long);
+
+/* namei.c */
+extern int ext3_orphan_add(handle_t *, struct inode *);
+extern int ext3_orphan_del(handle_t *, struct inode *);
+extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+ __u32 start_minor_hash, __u32 *next_hash);
+
+/* resize.c */
+extern int ext3_group_add(struct super_block *sb,
+ struct ext3_new_group_data *input);
+extern int ext3_group_extend(struct super_block *sb,
+ struct ext3_super_block *es,
+ ext3_fsblk_t n_blocks_count);
+
+/* super.c */
+extern __printf(3, 4)
+void ext3_error(struct super_block *, const char *, const char *, ...);
+extern void __ext3_std_error (struct super_block *, const char *, int);
+extern __printf(3, 4)
+void ext3_abort(struct super_block *, const char *, const char *, ...);
+extern __printf(3, 4)
+void ext3_warning(struct super_block *, const char *, const char *, ...);
+extern __printf(3, 4)
+void ext3_msg(struct super_block *, const char *, const char *, ...);
+extern void ext3_update_dynamic_rev (struct super_block *sb);
+
+#define ext3_std_error(sb, errno) \
+do { \
+ if ((errno)) \
+ __ext3_std_error((sb), __func__, (errno)); \
+} while (0)
+
+/*
+ * Inodes and files operations
+ */
+
+/* dir.c */
+extern const struct file_operations ext3_dir_operations;
+
+/* file.c */
+extern const struct inode_operations ext3_file_inode_operations;
+extern const struct file_operations ext3_file_operations;
+
+/* namei.c */
+extern const struct inode_operations ext3_dir_inode_operations;
+extern const struct inode_operations ext3_special_inode_operations;
+
+/* symlink.c */
+extern const struct inode_operations ext3_symlink_inode_operations;
+extern const struct inode_operations ext3_fast_symlink_inode_operations;
+
+#define EXT3_JOURNAL(inode) (EXT3_SB((inode)->i_sb)->s_journal)
+
+/* Define the number of blocks we need to account to a transaction to
+ * modify one block of data.
+ *
+ * We may have to touch one inode, one bitmap buffer, up to three
+ * indirection blocks, the group and superblock summaries, and the data
+ * block to complete the transaction. */
+
+#define EXT3_SINGLEDATA_TRANS_BLOCKS 8U
+
+/* Extended attribute operations touch at most two data buffers,
+ * two bitmap buffers, and two group summaries, in addition to the inode
+ * and the superblock, which are already accounted for. */
+
+#define EXT3_XATTR_TRANS_BLOCKS 6U
+
+/* Define the minimum size for a transaction which modifies data. This
+ * needs to take into account the fact that we may end up modifying two
+ * quota files too (one for the group, one for the user quota). The
+ * superblock only gets updated once, of course, so don't bother
+ * counting that again for the quota updates. */
+
+#define EXT3_DATA_TRANS_BLOCKS(sb) (EXT3_SINGLEDATA_TRANS_BLOCKS + \
+ EXT3_XATTR_TRANS_BLOCKS - 2 + \
+ EXT3_MAXQUOTAS_TRANS_BLOCKS(sb))
+
+/* Delete operations potentially hit one directory's namespace plus an
+ * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
+ * generous. We can grow the delete transaction later if necessary. */
+
+#define EXT3_DELETE_TRANS_BLOCKS(sb) (EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) + 64)
+
+/* Define an arbitrary limit for the amount of data we will anticipate
+ * writing to any given transaction. For unbounded transactions such as
+ * write(2) and truncate(2) we can write more than this, but we always
+ * start off at the maximum transaction size and grow the transaction
+ * optimistically as we go. */
+
+#define EXT3_MAX_TRANS_DATA 64U
+
+/* We break up a large truncate or write transaction once the handle's
+ * buffer credits gets this low, we need either to extend the
+ * transaction or to start a new one. Reserve enough space here for
+ * inode, bitmap, superblock, group and indirection updates for at least
+ * one block, plus two quota updates. Quota allocations are not
+ * needed. */
+
+#define EXT3_RESERVE_TRANS_BLOCKS 12U
+
+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8
+
+#ifdef CONFIG_QUOTA
+/* Amount of blocks needed for quota update - we know that the structure was
+ * allocated so we need to update only inode+data */
+#define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
+/* Amount of blocks needed for quota insert/delete - we do some block writes
+ * but inode, sb and group updates are done only once */
+#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+ (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
+#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
+ (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
+#else
+#define EXT3_QUOTA_TRANS_BLOCKS(sb) 0
+#define EXT3_QUOTA_INIT_BLOCKS(sb) 0
+#define EXT3_QUOTA_DEL_BLOCKS(sb) 0
+#endif
+#define EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_TRANS_BLOCKS(sb))
+#define EXT3_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_INIT_BLOCKS(sb))
+#define EXT3_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT3_QUOTA_DEL_BLOCKS(sb))
+
+int
+ext3_mark_iloc_dirty(handle_t *handle,
+ struct inode *inode,
+ struct ext3_iloc *iloc);
+
+/*
+ * On success, We end up with an outstanding reference count against
+ * iloc->bh. This _must_ be cleaned up later.
+ */
+
+int ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
+ struct ext3_iloc *iloc);
+
+int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode);
+
+/*
+ * Wrapper functions with which ext3 calls into JBD. The intent here is
+ * to allow these to be turned into appropriate stubs so ext3 can control
+ * ext2 filesystems, so ext2+ext3 systems only nee one fs. This work hasn't
+ * been done yet.
+ */
+
+static inline void ext3_journal_release_buffer(handle_t *handle,
+ struct buffer_head *bh)
+{
+ journal_release_buffer(handle, bh);
+}
+
+void ext3_journal_abort_handle(const char *caller, const char *err_fn,
+ struct buffer_head *bh, handle_t *handle, int err);
+
+int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
+ struct buffer_head *bh);
+
+int __ext3_journal_get_write_access(const char *where, handle_t *handle,
+ struct buffer_head *bh);
+
+int __ext3_journal_forget(const char *where, handle_t *handle,
+ struct buffer_head *bh);
+
+int __ext3_journal_revoke(const char *where, handle_t *handle,
+ unsigned long blocknr, struct buffer_head *bh);
+
+int __ext3_journal_get_create_access(const char *where,
+ handle_t *handle, struct buffer_head *bh);
+
+int __ext3_journal_dirty_metadata(const char *where,
+ handle_t *handle, struct buffer_head *bh);
+
+#define ext3_journal_get_undo_access(handle, bh) \
+ __ext3_journal_get_undo_access(__func__, (handle), (bh))
+#define ext3_journal_get_write_access(handle, bh) \
+ __ext3_journal_get_write_access(__func__, (handle), (bh))
+#define ext3_journal_revoke(handle, blocknr, bh) \
+ __ext3_journal_revoke(__func__, (handle), (blocknr), (bh))
+#define ext3_journal_get_create_access(handle, bh) \
+ __ext3_journal_get_create_access(__func__, (handle), (bh))
+#define ext3_journal_dirty_metadata(handle, bh) \
+ __ext3_journal_dirty_metadata(__func__, (handle), (bh))
+#define ext3_journal_forget(handle, bh) \
+ __ext3_journal_forget(__func__, (handle), (bh))
+
+int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
+
+handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks);
+int __ext3_journal_stop(const char *where, handle_t *handle);
+
+static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks)
+{
+ return ext3_journal_start_sb(inode->i_sb, nblocks);
+}
+
+#define ext3_journal_stop(handle) \
+ __ext3_journal_stop(__func__, (handle))
+
+static inline handle_t *ext3_journal_current_handle(void)
+{
+ return journal_current_handle();
+}
+
+static inline int ext3_journal_extend(handle_t *handle, int nblocks)
+{
+ return journal_extend(handle, nblocks);
+}
+
+static inline int ext3_journal_restart(handle_t *handle, int nblocks)
+{
+ return journal_restart(handle, nblocks);
+}
+
+static inline int ext3_journal_blocks_per_page(struct inode *inode)
+{
+ return journal_blocks_per_page(inode);
+}
+
+static inline int ext3_journal_force_commit(journal_t *journal)
+{
+ return journal_force_commit(journal);
+}
+
+/* super.c */
+int ext3_force_commit(struct super_block *sb);
+
+static inline int ext3_should_journal_data(struct inode *inode)
+{
+ if (!S_ISREG(inode->i_mode))
+ return 1;
+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
+ return 1;
+ if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
+ return 1;
+ return 0;
+}
+
+static inline int ext3_should_order_data(struct inode *inode)
+{
+ if (!S_ISREG(inode->i_mode))
+ return 0;
+ if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
+ return 0;
+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
+ return 1;
+ return 0;
+}
+
+static inline int ext3_should_writeback_data(struct inode *inode)
+{
+ if (!S_ISREG(inode->i_mode))
+ return 0;
+ if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
+ return 0;
+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
+ return 1;
+ return 0;
+}
+
+#include <trace/events/ext3.h>
diff --git a/fs/ext3/ext3_jbd.c b/fs/ext3/ext3_jbd.c
index d401f148d74..785a3261a26 100644
--- a/fs/ext3/ext3_jbd.c
+++ b/fs/ext3/ext3_jbd.c
@@ -2,7 +2,7 @@
* Interface between ext3 and JBD
*/
-#include <linux/ext3_jbd.h>
+#include "ext3.h"
int __ext3_journal_get_undo_access(const char *where, handle_t *handle,
struct buffer_head *bh)
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 724df69847d..a062fa1e1b1 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -18,12 +18,8 @@
* (jj@sunsite.ms.mff.cuni.cz)
*/
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/jbd.h>
#include <linux/quotaops.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include "ext3.h"
#include "xattr.h"
#include "acl.h"
@@ -54,10 +50,10 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
const struct file_operations ext3_file_operations = {
.llseek = generic_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .aio_write = generic_file_aio_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = generic_file_read_iter,
+ .write_iter = generic_file_write_iter,
.unlocked_ioctl = ext3_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext3_compat_ioctl,
@@ -67,7 +63,7 @@ const struct file_operations ext3_file_operations = {
.release = ext3_release_file,
.fsync = ext3_sync_file,
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
};
const struct inode_operations ext3_file_inode_operations = {
@@ -79,6 +75,7 @@ const struct inode_operations ext3_file_inode_operations = {
.removexattr = generic_removexattr,
#endif
.get_acl = ext3_get_acl,
+ .set_acl = ext3_set_acl,
.fiemap = ext3_fiemap,
};
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 1860ed35632..1cb9c7e10c6 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -22,15 +22,9 @@
* we can depend on generic_block_fdatasync() to sync the data blocks.
*/
-#include <linux/time.h>
#include <linux/blkdev.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
#include <linux/writeback.h>
-#include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
-#include <trace/events/ext3.h>
+#include "ext3.h"
/*
* akpm: A new design for ext3_sync_file().
@@ -54,9 +48,13 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trace_ext3_sync_file_enter(file, datasync);
- if (inode->i_sb->s_flags & MS_RDONLY)
+ if (inode->i_sb->s_flags & MS_RDONLY) {
+ /* Make sure that we read updated state */
+ smp_rmb();
+ if (EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)
+ return -EROFS;
return 0;
-
+ }
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
goto out;
@@ -98,8 +96,13 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* disk caches manually so that data really is on persistent
* storage
*/
- if (needs_barrier)
- blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+ if (needs_barrier) {
+ int err;
+
+ err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+ if (!ret)
+ ret = err;
+ }
out:
trace_ext3_sync_file_exit(inode, ret);
return ret;
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
index 7d215b4d4f2..ede315cdf12 100644
--- a/fs/ext3/hash.c
+++ b/fs/ext3/hash.c
@@ -9,9 +9,7 @@
* License.
*/
-#include <linux/fs.h>
-#include <linux/jbd.h>
-#include <linux/ext3_fs.h>
+#include "ext3.h"
#include <linux/cryptohash.h>
#define DELTA 0x9E3779B9
@@ -200,8 +198,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
return -1;
}
hash = hash & ~1;
- if (hash == (EXT3_HTREE_EOF << 1))
- hash = (EXT3_HTREE_EOF-1) << 1;
+ if (hash == (EXT3_HTREE_EOF_32BIT << 1))
+ hash = (EXT3_HTREE_EOF_32BIT - 1) << 1;
hinfo->hash = hash;
hinfo->minor_hash = minor_hash;
return 0;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 1cde2843801..a1b810230cc 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -12,21 +12,10 @@
* David S. Miller (davem@caip.rutgers.edu), 1995
*/
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
-#include <linux/stat.h>
-#include <linux/string.h>
#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
#include <linux/random.h>
-#include <linux/bitops.h>
-#include <trace/events/ext3.h>
-
-#include <asm/byteorder.h>
+#include "ext3.h"
#include "xattr.h"
#include "acl.h"
@@ -191,8 +180,7 @@ error_return:
* It's OK to put directory into a group unless
* it has too many directories already (max_dirs) or
* it has too few free inodes left (min_inodes) or
- * it has too few free blocks left (min_blocks) or
- * it's already running too large debt (max_debt).
+ * it has too few free blocks left (min_blocks).
* Parent's group is preferred, if it doesn't satisfy these
* conditions we search cyclically through the rest. If none
* of the groups look good we just look for a group with more
@@ -202,21 +190,16 @@ error_return:
* when we allocate an inode, within 0--255.
*/
-#define INODE_COST 64
-#define BLOCK_COST 256
-
static int find_group_orlov(struct super_block *sb, struct inode *parent)
{
int parent_group = EXT3_I(parent)->i_block_group;
struct ext3_sb_info *sbi = EXT3_SB(sb);
- struct ext3_super_block *es = sbi->s_es;
int ngroups = sbi->s_groups_count;
int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
unsigned int freei, avefreei;
ext3_fsblk_t freeb, avefreeb;
- ext3_fsblk_t blocks_per_dir;
unsigned int ndirs;
- int max_debt, max_dirs, min_inodes;
+ int max_dirs, min_inodes;
ext3_grpblk_t min_blocks;
int group = -1, i;
struct ext3_group_desc *desc;
@@ -232,7 +215,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
int best_ndir = inodes_per_group;
int best_group = -1;
- get_random_bytes(&group, sizeof(group));
+ group = prandom_u32();
parent_group = (unsigned)group % ngroups;
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
@@ -253,20 +236,10 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
goto fallback;
}
- blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs;
-
max_dirs = ndirs / ngroups + inodes_per_group / 16;
min_inodes = avefreei - inodes_per_group / 4;
min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
- max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST);
- if (max_debt * INODE_COST > inodes_per_group)
- max_debt = inodes_per_group / INODE_COST;
- if (max_debt > 255)
- max_debt = 255;
- if (max_debt == 0)
- max_debt = 1;
-
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;
desc = ext3_get_group_desc (sb, group, NULL);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2d0afeca0b4..2c6ccc49ba2 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -22,22 +22,13 @@
* Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
*/
-#include <linux/fs.h>
-#include <linux/time.h>
-#include <linux/ext3_jbd.h>
-#include <linux/jbd.h>
#include <linux/highuid.h>
-#include <linux/pagemap.h>
#include <linux/quotaops.h>
-#include <linux/string.h>
-#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/mpage.h>
-#include <linux/uio.h>
-#include <linux/bio.h>
-#include <linux/fiemap.h>
#include <linux/namei.h>
-#include <trace/events/ext3.h>
+#include <linux/aio.h>
+#include "ext3.h"
#include "xattr.h"
#include "acl.h"
@@ -228,7 +219,8 @@ void ext3_evict_inode (struct inode *inode)
*/
if (inode->i_nlink && ext3_should_journal_data(inode) &&
EXT3_SB(inode->i_sb)->s_journal &&
- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
+ inode->i_ino != EXT3_JOURNAL_INO) {
tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
@@ -236,7 +228,7 @@ void ext3_evict_inode (struct inode *inode)
log_wait_commit(journal, commit_tid);
filemap_write_and_wait(&inode->i_data);
}
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
ext3_discard_reservation(inode);
rsv = ei->i_block_alloc_info;
@@ -282,18 +274,18 @@ void ext3_evict_inode (struct inode *inode)
if (ext3_mark_inode_dirty(handle, inode)) {
/* If that failed, just dquot_drop() and be done with that */
dquot_drop(inode);
- end_writeback(inode);
+ clear_inode(inode);
} else {
ext3_xattr_delete_inode(handle, inode);
dquot_free_inode(inode);
dquot_drop(inode);
- end_writeback(inode);
+ clear_inode(inode);
ext3_free_inode(handle, inode);
}
ext3_journal_stop(handle);
return;
no_delete:
- end_writeback(inode);
+ clear_inode(inode);
dquot_drop(inode);
}
@@ -686,6 +678,10 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
* parent to disk.
*/
bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+ if (unlikely(!bh)) {
+ err = -ENOMEM;
+ goto failed;
+ }
branch[n].bh = bh;
lock_buffer(bh);
BUFFER_TRACE(bh, "call get_create_access");
@@ -727,7 +723,7 @@ failed:
BUFFER_TRACE(branch[i].bh, "call journal_forget");
ext3_journal_forget(handle, branch[i].bh);
}
- for (i = 0; i <indirect_blks; i++)
+ for (i = 0; i < indirect_blks; i++)
ext3_free_blocks(handle, inode, new_blocks[i], 1);
ext3_free_blocks(handle, inode, new_blocks[i], num);
@@ -756,6 +752,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
struct ext3_block_alloc_info *block_i;
ext3_fsblk_t current_block;
struct ext3_inode_info *ei = EXT3_I(inode);
+ struct timespec now;
block_i = ei->i_block_alloc_info;
/*
@@ -795,9 +792,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
}
/* We are done with atomic stuff, now do the rest of housekeeping */
-
- inode->i_ctime = CURRENT_TIME_SEC;
- ext3_mark_inode_dirty(handle, inode);
+ now = CURRENT_TIME_SEC;
+ if (!timespec_equal(&inode->i_ctime, &now) || !where->bh) {
+ inode->i_ctime = now;
+ ext3_mark_inode_dirty(handle, inode);
+ }
/* ext3_mark_inode_dirty already updated i_sync_tid */
atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
@@ -1078,16 +1077,15 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
* mapped. 0 in case of a HOLE.
*/
if (err > 0) {
- if (err > 1)
- WARN_ON(1);
+ WARN_ON(err > 1);
err = 0;
}
*errp = err;
if (!err && buffer_mapped(&dummy)) {
struct buffer_head *bh;
bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
- if (!bh) {
- *errp = -EIO;
+ if (unlikely(!bh)) {
+ *errp = -ENOMEM;
goto err;
}
if (buffer_new(&dummy)) {
@@ -1561,56 +1559,17 @@ static int buffer_unmapped(handle_t *handle, struct buffer_head *bh)
}
/*
- * Note that we always start a transaction even if we're not journalling
- * data. This is to preserve ordering: any hole instantiation within
- * __block_write_full_page -> ext3_get_block() should be journalled
- * along with the data so we don't crash and then get metadata which
+ * Note that whenever we need to map blocks we start a transaction even if
+ * we're not journalling data. This is to preserve ordering: any hole
+ * instantiation within __block_write_full_page -> ext3_get_block() should be
+ * journalled along with the data so we don't crash and then get metadata which
* refers to old data.
*
* In all journalling modes block_write_full_page() will start the I/O.
*
- * Problem:
- *
- * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
- * ext3_writepage()
- *
- * Similar for:
- *
- * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
- *
- * Same applies to ext3_get_block(). We will deadlock on various things like
- * lock_journal and i_truncate_mutex.
- *
- * Setting PF_MEMALLOC here doesn't work - too many internal memory
- * allocations fail.
- *
- * 16May01: If we're reentered then journal_current_handle() will be
- * non-zero. We simply *return*.
- *
- * 1 July 2001: @@@ FIXME:
- * In journalled data mode, a data buffer may be metadata against the
- * current transaction. But the same file is part of a shared mapping
- * and someone does a writepage() on it.
- *
- * We will move the buffer onto the async_data list, but *after* it has
- * been dirtied. So there's a small window where we have dirty data on
- * BJ_Metadata.
- *
- * Note that this only applies to the last partial page in the file. The
- * bit which block_write_full_page() uses prepare/commit for. (That's
- * broken code anyway: it's wrong for msync()).
- *
- * It's a rare case: affects the final partial page, for journalled data
- * where the file is subject to bith write() and writepage() in the same
- * transction. To fix it we'll need a custom block_write_full_page().
- * We'll probably need that anyway for journalling writepage() output.
- *
* We don't honour synchronous mounts for writepage(). That would be
* disastrous. Any write() or metadata operation will sync the fs for
* us.
- *
- * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
- * we don't need to open a transaction here.
*/
static int ext3_ordered_writepage(struct page *page,
struct writeback_control *wbc)
@@ -1675,12 +1634,9 @@ static int ext3_ordered_writepage(struct page *page,
* block_write_full_page() succeeded. Otherwise they are unmapped,
* and generally junk.
*/
- if (ret == 0) {
- err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
+ if (ret == 0)
+ ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
NULL, journal_dirty_data_fn);
- if (!ret)
- ret = err;
- }
walk_page_buffers(handle, page_bufs, 0,
PAGE_CACHE_SIZE, NULL, bput_one);
err = ext3_journal_stop(handle);
@@ -1760,17 +1716,17 @@ static int ext3_journalled_writepage(struct page *page,
WARN_ON_ONCE(IS_RDONLY(inode) &&
!(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
- if (ext3_journal_current_handle())
- goto no_write;
-
trace_ext3_journalled_writepage(page);
- handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- goto no_write;
- }
-
if (!page_has_buffers(page) || PageChecked(page)) {
+ if (ext3_journal_current_handle())
+ goto no_write;
+
+ handle = ext3_journal_start(inode,
+ ext3_writepage_trans_blocks(inode));
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto no_write;
+ }
/*
* It's mmapped pagecache. Add buffers and journal it. There
* doesn't seem much point in redirtying the page here.
@@ -1793,17 +1749,18 @@ static int ext3_journalled_writepage(struct page *page,
atomic_set(&EXT3_I(inode)->i_datasync_tid,
handle->h_transaction->t_tid);
unlock_page(page);
+ err = ext3_journal_stop(handle);
+ if (!ret)
+ ret = err;
} else {
/*
- * It may be a page full of checkpoint-mode buffers. We don't
- * really know unless we go poke around in the buffer_heads.
- * But block_write_full_page will do the right thing.
+ * It is a page full of checkpoint-mode buffers. Go and write
+ * them. They should have been already mapped when they went
+ * to the journal so provide NULL get_block function to catch
+ * errors.
*/
- ret = block_write_full_page(page, ext3_get_block, wbc);
+ ret = block_write_full_page(page, NULL, wbc);
}
- err = ext3_journal_stop(handle);
- if (!ret)
- ret = err;
out:
return ret;
@@ -1827,19 +1784,20 @@ ext3_readpages(struct file *file, struct address_space *mapping,
return mpage_readpages(mapping, pages, nr_pages, ext3_get_block);
}
-static void ext3_invalidatepage(struct page *page, unsigned long offset)
+static void ext3_invalidatepage(struct page *page, unsigned int offset,
+ unsigned int length)
{
journal_t *journal = EXT3_JOURNAL(page->mapping->host);
- trace_ext3_invalidatepage(page, offset);
+ trace_ext3_invalidatepage(page, offset, length);
/*
* If it's a full truncate we just forget about the pending dirtying
*/
- if (offset == 0)
+ if (offset == 0 && length == PAGE_CACHE_SIZE)
ClearPageChecked(page);
- journal_invalidatepage(journal, page, offset);
+ journal_invalidatepage(journal, page, offset, length);
}
static int ext3_releasepage(struct page *page, gfp_t wait)
@@ -1863,8 +1821,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
* VFS code falls back into buffered path in that case so we are safe.
*/
static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset,
- unsigned long nr_segs)
+ struct iov_iter *iter, loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
@@ -1872,10 +1829,10 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
handle_t *handle;
ssize_t ret;
int orphan = 0;
- size_t count = iov_length(iov, nr_segs);
+ size_t count = iov_iter_count(iter);
int retries = 0;
- trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+ trace_ext3_direct_IO_enter(inode, offset, count, rw);
if (rw == WRITE) {
loff_t final_size = offset + count;
@@ -1899,15 +1856,14 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
}
retry:
- ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
- ext3_get_block);
+ ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block);
/*
* In case of error extending write may have instantiated a few
* blocks outside i_size. Trim these off again.
*/
if (unlikely((rw & WRITE) && ret < 0)) {
loff_t isize = i_size_read(inode);
- loff_t end = offset + iov_length(iov, nr_segs);
+ loff_t end = offset + count;
if (end > isize)
ext3_truncate_failed_direct_write(inode);
@@ -1926,6 +1882,8 @@ retry:
* and pretend the write failed... */
ext3_truncate_failed_direct_write(inode);
ret = PTR_ERR(handle);
+ if (inode->i_nlink)
+ ext3_orphan_del(NULL, inode);
goto out;
}
if (inode->i_nlink)
@@ -1950,8 +1908,7 @@ retry:
ret = err;
}
out:
- trace_ext3_direct_IO_exit(inode, offset,
- iov_length(iov, nr_segs), rw, ret);
+ trace_ext3_direct_IO_exit(inode, offset, count, rw, ret);
return ret;
}
@@ -1986,6 +1943,7 @@ static const struct address_space_operations ext3_ordered_aops = {
.direct_IO = ext3_direct_IO,
.migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate,
+ .is_dirty_writeback = buffer_check_dirty_writeback,
.error_remove_page = generic_error_remove_page,
};
@@ -2737,12 +2695,12 @@ static int __ext3_get_inode_loc(struct inode *inode,
return -EIO;
bh = sb_getblk(inode->i_sb, block);
- if (!bh) {
+ if (unlikely(!bh)) {
ext3_error (inode->i_sb, "ext3_get_inode_loc",
"unable to read inode block - "
"inode=%lu, block="E3FSBLK,
inode->i_ino, block);
- return -EIO;
+ return -ENOMEM;
}
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
@@ -2791,7 +2749,7 @@ static int __ext3_get_inode_loc(struct inode *inode,
bitmap_bh = sb_getblk(inode->i_sb,
le32_to_cpu(desc->bg_inode_bitmap));
- if (!bitmap_bh)
+ if (unlikely(!bitmap_bh))
goto make_io;
/*
@@ -2898,6 +2856,8 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
transaction_t *transaction;
long ret;
int block;
+ uid_t i_uid;
+ gid_t i_gid;
inode = iget_locked(sb, ino);
if (!inode)
@@ -2914,12 +2874,14 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
bh = iloc.bh;
raw_inode = ext3_raw_inode(&iloc);
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
- inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
- inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+ i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
if(!(test_opt (inode->i_sb, NO_UID32))) {
- inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
- inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+ i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+ i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
+ i_uid_write(inode, i_uid);
+ i_gid_write(inode, i_gid);
set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
inode->i_size = le32_to_cpu(raw_inode->i_size);
inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
@@ -3075,6 +3037,10 @@ static int ext3_do_update_inode(handle_t *handle,
struct ext3_inode_info *ei = EXT3_I(inode);
struct buffer_head *bh = iloc->bh;
int err = 0, rc, block;
+ int need_datasync = 0;
+ __le32 disksize;
+ uid_t i_uid;
+ gid_t i_gid;
again:
/* we can't allow multiple procs in here at once, its a bit racey */
@@ -3087,32 +3053,38 @@ again:
ext3_get_inode_flags(ei);
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
+ i_uid = i_uid_read(inode);
+ i_gid = i_gid_read(inode);
if(!(test_opt(inode->i_sb, NO_UID32))) {
- raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
- raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
+ raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
+ raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
/*
* Fix up interoperability with old kernels. Otherwise, old inodes get
* re-used with the upper 16 bits of the uid/gid intact
*/
if(!ei->i_dtime) {
raw_inode->i_uid_high =
- cpu_to_le16(high_16_bits(inode->i_uid));
+ cpu_to_le16(high_16_bits(i_uid));
raw_inode->i_gid_high =
- cpu_to_le16(high_16_bits(inode->i_gid));
+ cpu_to_le16(high_16_bits(i_gid));
} else {
raw_inode->i_uid_high = 0;
raw_inode->i_gid_high = 0;
}
} else {
raw_inode->i_uid_low =
- cpu_to_le16(fs_high2lowuid(inode->i_uid));
+ cpu_to_le16(fs_high2lowuid(i_uid));
raw_inode->i_gid_low =
- cpu_to_le16(fs_high2lowgid(inode->i_gid));
+ cpu_to_le16(fs_high2lowgid(i_gid));
raw_inode->i_uid_high = 0;
raw_inode->i_gid_high = 0;
}
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
- raw_inode->i_size = cpu_to_le32(ei->i_disksize);
+ disksize = cpu_to_le32(ei->i_disksize);
+ if (disksize != raw_inode->i_size) {
+ need_datasync = 1;
+ raw_inode->i_size = disksize;
+ }
raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
@@ -3128,8 +3100,11 @@ again:
if (!S_ISREG(inode->i_mode)) {
raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
} else {
- raw_inode->i_size_high =
- cpu_to_le32(ei->i_disksize >> 32);
+ disksize = cpu_to_le32(ei->i_disksize >> 32);
+ if (disksize != raw_inode->i_size_high) {
+ raw_inode->i_size_high = disksize;
+ need_datasync = 1;
+ }
if (ei->i_disksize > 0x7fffffffULL) {
struct super_block *sb = inode->i_sb;
if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
@@ -3182,6 +3157,8 @@ again:
ext3_clear_inode_state(inode, EXT3_STATE_NEW);
atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid);
+ if (need_datasync)
+ atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
out_brelse:
brelse (bh);
ext3_std_error(inode->i_sb, err);
@@ -3193,21 +3170,20 @@ out_brelse:
*
* We are called from a few places:
*
- * - Within generic_file_write() for O_SYNC files.
+ * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
* Here, there will be no transaction running. We wait for any running
- * trasnaction to commit.
+ * transaction to commit.
*
- * - Within sys_sync(), kupdate and such.
- * We wait on commit, if tol to.
+ * - Within flush work (for sys_sync(), kupdate and such).
+ * We wait on commit, if told to.
*
- * - Within prune_icache() (PF_MEMALLOC == true)
- * Here we simply return. We can't afford to block kswapd on the
- * journal commit.
+ * - Within iput_final() -> write_inode_now()
+ * We wait on commit, if told to.
*
* In all cases it is actually safe for us to return without doing anything,
* because the inode has been copied into a raw inode buffer in
- * ext3_mark_inode_dirty(). This is a correctness thing for O_SYNC and for
- * knfsd.
+ * ext3_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL
+ * writeback.
*
* Note that we are absolutely dependent upon all inode dirtiers doing the
* right thing: they *must* call mark_inode_dirty() after dirtying info in
@@ -3219,13 +3195,13 @@ out_brelse:
* stuff();
* inode->i_size = expr;
*
- * is in error because a kswapd-driven write_inode() could occur while
- * `stuff()' is running, and the new i_size will be lost. Plus the inode
- * will no longer be on the superblock's dirty inode list.
+ * is in error because write_inode() could occur while `stuff()' is running,
+ * and the new i_size will be lost. Plus the inode will no longer be on the
+ * superblock's dirty inode list.
*/
int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
{
- if (current->flags & PF_MEMALLOC)
+ if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
return 0;
if (ext3_journal_current_handle()) {
@@ -3234,7 +3210,12 @@ int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
return -EIO;
}
- if (wbc->sync_mode != WB_SYNC_ALL)
+ /*
+ * No need to force transaction in WB_SYNC_NONE mode. Also
+ * ext3_sync_fs() will force the commit after everything is
+ * written.
+ */
+ if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
return 0;
return ext3_force_commit(inode->i_sb);
@@ -3269,8 +3250,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
if (is_quota_modification(inode, attr))
dquot_initialize(inode);
- if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
- (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
+ if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) ||
+ (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) {
handle_t *handle;
/* (user+group)*(old+new) structure, inode write (sb,
@@ -3346,7 +3327,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
mark_inode_dirty(inode);
if (ia_valid & ATTR_MODE)
- rc = ext3_acl_chmod(inode);
+ rc = posix_acl_chmod(inode, inode->i_mode);
err_out:
ext3_std_error(inode->i_sb, error);
@@ -3458,14 +3439,6 @@ ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
* inode out, but prune_icache isn't a user-visible syncing function.
* Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
* we start and wait on commits.
- *
- * Is this efficient/effective? Well, we're being nice to the system
- * by cleaning up our inodes proactively so they can be reaped
- * without I/O. But we are potentially leaving up to five seconds'
- * worth of inodes floating about which prune_icache wants us to
- * write out. One way to fix that would be to get prune_icache()
- * to do a write_super() to free up some memory. It has the desired
- * effect.
*/
int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
{
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 4af574ce4a4..4d96e9a6453 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -7,19 +7,14 @@
* Universite Pierre et Marie Curie (Paris VI)
*/
-#include <linux/fs.h>
-#include <linux/jbd.h>
-#include <linux/capability.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
#include <linux/mount.h>
-#include <linux/time.h>
#include <linux/compat.h>
#include <asm/uaccess.h>
+#include "ext3.h"
long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
- struct inode *inode = filp->f_dentry->d_inode;
+ struct inode *inode = file_inode(filp);
struct ext3_inode_info *ei = EXT3_I(inode);
unsigned int flags;
unsigned short rsv_window_size;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index e8e211795e9..f197736dccf 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -24,20 +24,8 @@
* Theodore Ts'o, 2002
*/
-#include <linux/fs.h>
-#include <linux/pagemap.h>
-#include <linux/jbd.h>
-#include <linux/time.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/string.h>
#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
-#include <linux/bio.h>
-#include <trace/events/ext3.h>
-
+#include "ext3.h"
#include "namei.h"
#include "xattr.h"
#include "acl.h"
@@ -48,7 +36,6 @@
#define NAMEI_RA_CHUNKS 2
#define NAMEI_RA_BLOCKS 4
#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
-#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
static struct buffer_head *ext3_append(handle_t *handle,
struct inode *inode,
@@ -58,8 +45,7 @@ static struct buffer_head *ext3_append(handle_t *handle,
*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
- bh = ext3_bread(handle, inode, *block, 1, err);
- if (bh) {
+ if ((bh = ext3_dir_bread(handle, inode, *block, 1, err))) {
inode->i_size += inode->i_sb->s_blocksize;
EXT3_I(inode)->i_disksize = inode->i_size;
*err = ext3_journal_get_write_access(handle, bh);
@@ -351,8 +337,10 @@ dx_probe(struct qstr *entry, struct inode *dir,
u32 hash;
frame->bh = NULL;
- if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
+ if (!(bh = ext3_dir_bread(NULL, dir, 0, 0, err))) {
+ *err = ERR_BAD_DX_DIR;
goto fail;
+ }
root = (struct dx_root *) bh->b_data;
if (root->info.hash_version != DX_HASH_TEA &&
root->info.hash_version != DX_HASH_HALF_MD4 &&
@@ -448,8 +436,10 @@ dx_probe(struct qstr *entry, struct inode *dir,
frame->entries = entries;
frame->at = at;
if (!indirect--) return frame;
- if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
+ if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(at), 0, err))) {
+ *err = ERR_BAD_DX_DIR;
goto fail2;
+ }
at = entries = ((struct dx_node *) bh->b_data)->entries;
if (dx_get_limit(entries) != dx_node_limit (dir)) {
ext3_warning(dir->i_sb, __func__,
@@ -547,8 +537,8 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
* block so no check is necessary
*/
while (num_frames--) {
- if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
- 0, &err)))
+ if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(p->at),
+ 0, &err)))
return err; /* Failure */
p++;
brelse (p->bh);
@@ -571,10 +561,11 @@ static int htree_dirblock_to_tree(struct file *dir_file,
{
struct buffer_head *bh;
struct ext3_dir_entry_2 *de, *top;
- int err, count = 0;
+ int err = 0, count = 0;
dxtrace(printk("In htree dirblock_to_tree: block %d\n", block));
- if (!(bh = ext3_bread (NULL, dir, block, 0, &err)))
+
+ if (!(bh = ext3_dir_bread(NULL, dir, block, 0, &err)))
return err;
de = (struct ext3_dir_entry_2 *) bh->b_data;
@@ -585,11 +576,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
(block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
+((char *)de - bh->b_data))) {
- /* On error, skip the f_pos to the next block. */
- dir_file->f_pos = (dir_file->f_pos |
- (dir->i_sb->s_blocksize - 1)) + 1;
- brelse (bh);
- return count;
+ /* silently ignore the rest of the block */
+ break;
}
ext3fs_dirhash(de->name, de->name_len, hinfo);
if ((hinfo->hash < start_hash) ||
@@ -632,7 +620,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
start_minor_hash));
- dir = dir_file->f_path.dentry->d_inode;
+ dir = file_inode(dir_file);
if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
if (hinfo.hash_version <= DX_HASH_TEA)
@@ -646,7 +634,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
}
hinfo.hash = start_hash;
hinfo.minor_hash = 0;
- frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err);
+ frame = dx_probe(NULL, file_inode(dir_file), &hinfo, frames, &err);
if (!frame)
return err;
@@ -988,7 +976,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
return NULL;
do {
block = dx_get_block(frame->at);
- if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
+ if (!(bh = ext3_dir_bread (NULL, dir, block, 0, err)))
goto errout;
retval = search_dirblock(bh, dir, entry,
@@ -1023,7 +1011,7 @@ errout:
return NULL;
}
-static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags)
{
struct inode * inode;
struct ext3_dir_entry_2 * de;
@@ -1057,7 +1045,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
struct dentry *ext3_get_parent(struct dentry *child)
{
unsigned long ino;
- struct qstr dotdot = {.name = "..", .len = 2};
+ struct qstr dotdot = QSTR_INIT("..", 2);
struct ext3_dir_entry_2 * de;
struct buffer_head *bh;
@@ -1470,9 +1458,9 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
}
blocks = dir->i_size >> sb->s_blocksize_bits;
for (block = 0; block < blocks; block++) {
- bh = ext3_bread(handle, dir, block, 0, &retval);
- if(!bh)
+ if (!(bh = ext3_dir_bread(handle, dir, block, 0, &retval)))
return retval;
+
retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
if (retval != -ENOSPC)
return retval;
@@ -1512,7 +1500,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
entries = frame->entries;
at = frame->at;
- if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
+ if (!(bh = ext3_dir_bread(handle, dir, dx_get_block(frame->at), 0, &err)))
goto cleanup;
BUFFER_TRACE(bh, "get_write_access");
@@ -1683,8 +1671,8 @@ static int ext3_add_nondir(handle_t *handle,
int err = ext3_add_entry(handle, dentry, inode);
if (!err) {
ext3_mark_inode_dirty(handle, inode);
- d_instantiate(dentry, inode);
unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
return 0;
}
drop_nlink(inode);
@@ -1702,7 +1690,7 @@ static int ext3_add_nondir(handle_t *handle,
* with d_instantiate().
*/
static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode,
- struct nameidata *nd)
+ bool excl)
{
handle_t *handle;
struct inode * inode;
@@ -1771,6 +1759,44 @@ retry:
return err;
}
+static int ext3_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ handle_t *handle;
+ struct inode *inode;
+ int err, retries = 0;
+
+ dquot_initialize(dir);
+
+retry:
+ handle = ext3_journal_start(dir, EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
+ 4 + EXT3_XATTR_TRANS_BLOCKS);
+
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ inode = ext3_new_inode (handle, dir, NULL, mode);
+ err = PTR_ERR(inode);
+ if (!IS_ERR(inode)) {
+ inode->i_op = &ext3_file_inode_operations;
+ inode->i_fop = &ext3_file_operations;
+ ext3_set_aops(inode);
+ d_tmpfile(dentry, inode);
+ err = ext3_orphan_add(handle, inode);
+ if (err)
+ goto err_unlock_inode;
+ mark_inode_dirty(inode);
+ unlock_new_inode(inode);
+ }
+ ext3_journal_stop(handle);
+ if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+ goto retry;
+ return err;
+err_unlock_inode:
+ ext3_journal_stop(handle);
+ unlock_new_inode(inode);
+ return err;
+}
+
static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
{
handle_t *handle;
@@ -1802,8 +1828,7 @@ retry:
inode->i_op = &ext3_dir_inode_operations;
inode->i_fop = &ext3_dir_operations;
inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
- dir_block = ext3_bread (handle, inode, 0, 1, &err);
- if (!dir_block)
+ if (!(dir_block = ext3_dir_bread(handle, inode, 0, 1, &err)))
goto out_clear_inode;
BUFFER_TRACE(dir_block, "get_write_access");
@@ -1848,8 +1873,8 @@ out_clear_inode:
if (err)
goto out_clear_inode;
- d_instantiate(dentry, inode);
unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
out_stop:
brelse(dir_block);
ext3_journal_stop(handle);
@@ -1871,7 +1896,7 @@ static int empty_dir (struct inode * inode)
sb = inode->i_sb;
if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
- !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
+ !(bh = ext3_dir_bread(NULL, inode, 0, 0, &err))) {
if (err)
ext3_error(inode->i_sb, __func__,
"error %d reading directory #%lu offset 0",
@@ -1902,9 +1927,8 @@ static int empty_dir (struct inode * inode)
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
err = 0;
brelse (bh);
- bh = ext3_bread (NULL, inode,
- offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
- if (!bh) {
+ if (!(bh = ext3_dir_bread (NULL, inode,
+ offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err))) {
if (err)
ext3_error(sb, __func__,
"error %d reading directory"
@@ -2314,7 +2338,7 @@ static int ext3_link (struct dentry * old_dentry,
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
- EXT3_INDEX_EXTRA_TRANS_BLOCKS);
+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1);
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -2328,6 +2352,11 @@ retry:
err = ext3_add_entry(handle, dentry, inode);
if (!err) {
ext3_mark_inode_dirty(handle, inode);
+ /* this can happen only for tmpfile being
+ * linked the first time
+ */
+ if (inode->i_nlink == 1)
+ ext3_orphan_del(handle, inode);
d_instantiate(dentry, inode);
} else {
drop_nlink(inode);
@@ -2400,7 +2429,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
goto end_rename;
}
retval = -EIO;
- dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval);
+ dir_bh = ext3_dir_bread(handle, old_inode, 0, 0, &retval);
if (!dir_bh)
goto end_rename;
if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
@@ -2530,6 +2559,7 @@ const struct inode_operations ext3_dir_inode_operations = {
.mkdir = ext3_mkdir,
.rmdir = ext3_rmdir,
.mknod = ext3_mknod,
+ .tmpfile = ext3_tmpfile,
.rename = ext3_rename,
.setattr = ext3_setattr,
#ifdef CONFIG_EXT3_FS_XATTR
@@ -2539,6 +2569,7 @@ const struct inode_operations ext3_dir_inode_operations = {
.removexattr = generic_removexattr,
#endif
.get_acl = ext3_get_acl,
+ .set_acl = ext3_set_acl,
};
const struct inode_operations ext3_special_inode_operations = {
@@ -2550,4 +2581,5 @@ const struct inode_operations ext3_special_inode_operations = {
.removexattr = generic_removexattr,
#endif
.get_acl = ext3_get_acl,
+ .set_acl = ext3_set_acl,
};
diff --git a/fs/ext3/namei.h b/fs/ext3/namei.h
index f2ce2b0065c..46304d8c9f0 100644
--- a/fs/ext3/namei.h
+++ b/fs/ext3/namei.h
@@ -6,3 +6,22 @@
*/
extern struct dentry *ext3_get_parent(struct dentry *child);
+
+static inline struct buffer_head *ext3_dir_bread(handle_t *handle,
+ struct inode *inode,
+ int block, int create,
+ int *err)
+{
+ struct buffer_head *bh;
+
+ bh = ext3_bread(handle, inode, block, create, err);
+
+ if (!bh && !(*err)) {
+ *err = -EIO;
+ ext3_error(inode->i_sb, __func__,
+ "Directory hole detected on inode %lu\n",
+ inode->i_ino);
+ return NULL;
+ }
+ return bh;
+}
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 7916e4ce166..27105655502 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -11,10 +11,7 @@
#define EXT3FS_DEBUG
-#include <linux/ext3_jbd.h>
-
-#include <linux/errno.h>
-#include <linux/slab.h>
+#include "ext3.h"
#define outside(b, first, last) ((b) < (first) || (b) >= (last))
@@ -119,8 +116,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
int err;
bh = sb_getblk(sb, blk);
- if (!bh)
- return ERR_PTR(-EIO);
+ if (unlikely(!bh))
+ return ERR_PTR(-ENOMEM);
if ((err = ext3_journal_get_write_access(handle, bh))) {
brelse(bh);
bh = ERR_PTR(err);
@@ -237,8 +234,8 @@ static int setup_new_group_blocks(struct super_block *sb,
goto exit_bh;
gdb = sb_getblk(sb, block);
- if (!gdb) {
- err = -EIO;
+ if (unlikely(!gdb)) {
+ err = -ENOMEM;
goto exit_bh;
}
if ((err = ext3_journal_get_write_access(handle, gdb))) {
@@ -725,8 +722,8 @@ static void update_backups(struct super_block *sb,
break;
bh = sb_getblk(sb, group * bpg + blk_off);
- if (!bh) {
- err = -EIO;
+ if (unlikely(!bh)) {
+ err = -ENOMEM;
break;
}
ext3_debug("update metadata backup %#04lx\n",
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 726c7ef6cdf..08cdfe5461e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -17,36 +17,27 @@
*/
#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/time.h>
-#include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
-#include <linux/slab.h>
-#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/parser.h>
-#include <linux/buffer_head.h>
#include <linux/exportfs.h>
-#include <linux/vfs.h>
+#include <linux/statfs.h>
#include <linux/random.h>
#include <linux/mount.h>
-#include <linux/namei.h>
#include <linux/quotaops.h>
#include <linux/seq_file.h>
#include <linux/log2.h>
#include <linux/cleancache.h>
+#include <linux/namei.h>
#include <asm/uaccess.h>
+#define CREATE_TRACE_POINTS
+
+#include "ext3.h"
#include "xattr.h"
#include "acl.h"
#include "namei.h"
-#define CREATE_TRACE_POINTS
-#include <trace/events/ext3.h>
-
#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
#define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
#else
@@ -74,11 +65,6 @@ static int ext3_freeze(struct super_block *sb);
/*
* Wrappers for journal_start/end.
- *
- * The only special thing we need to do here is to make sure that all
- * journal_end calls result in the superblock being marked dirty, so
- * that sync() will call the filesystem's write_super callback if
- * appropriate.
*/
handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
{
@@ -100,12 +86,6 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
return journal_start(journal, nblocks);
}
-/*
- * The only special thing we need to do here is to make sure that all
- * journal_stop calls result in the superblock being marked dirty, so
- * that sync() will call the filesystem's write_super callback if
- * appropriate.
- */
int __ext3_journal_stop(const char *where, handle_t *handle)
{
struct super_block *sb;
@@ -195,6 +175,11 @@ static void ext3_handle_error(struct super_block *sb)
if (test_opt (sb, ERRORS_RO)) {
ext3_msg(sb, KERN_CRIT,
"error: remounting filesystem read-only");
+ /*
+ * Make sure updated value of ->s_mount_state will be visible
+ * before ->s_flags update.
+ */
+ smp_wmb();
sb->s_flags |= MS_RDONLY;
}
ext3_commit_super(sb, es, 1);
@@ -312,8 +297,14 @@ void ext3_abort(struct super_block *sb, const char *function,
ext3_msg(sb, KERN_CRIT,
"error: remounting filesystem read-only");
EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
- sb->s_flags |= MS_RDONLY;
set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
+ /*
+ * Make sure updated value of ->s_mount_state will be visible
+ * before ->s_flags update.
+ */
+ smp_wmb();
+ sb->s_flags |= MS_RDONLY;
+
if (EXT3_SB(sb)->s_journal)
journal_abort(EXT3_SB(sb)->s_journal, -EIO);
}
@@ -374,7 +365,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
return bdev;
fail:
- ext3_msg(sb, "error: failed to open journal device %s: %ld",
+ ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld",
__bdevname(dev, b), PTR_ERR(bdev));
return NULL;
@@ -383,22 +374,19 @@ fail:
/*
* Release the journal device
*/
-static int ext3_blkdev_put(struct block_device *bdev)
+static void ext3_blkdev_put(struct block_device *bdev)
{
- return blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
-static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
+static void ext3_blkdev_remove(struct ext3_sb_info *sbi)
{
struct block_device *bdev;
- int ret = -ENODEV;
-
bdev = sbi->journal_bdev;
if (bdev) {
- ret = ext3_blkdev_put(bdev);
+ ext3_blkdev_put(bdev);
sbi->journal_bdev = NULL;
}
- return ret;
}
static inline struct inode *orphan_list_entry(struct list_head *l)
@@ -539,7 +527,7 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
}
-static int init_inodecache(void)
+static int __init init_inodecache(void)
{
ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
sizeof(struct ext3_inode_info),
@@ -553,6 +541,11 @@ static int init_inodecache(void)
static void destroy_inodecache(void)
{
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
kmem_cache_destroy(ext3_inode_cachep);
}
@@ -627,13 +620,15 @@ static int ext3_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",grpid");
if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS))
seq_puts(seq, ",nogrpid");
- if (sbi->s_resuid != EXT3_DEF_RESUID ||
+ if (!uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT3_DEF_RESUID)) ||
le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) {
- seq_printf(seq, ",resuid=%u", sbi->s_resuid);
+ seq_printf(seq, ",resuid=%u",
+ from_kuid_munged(&init_user_ns, sbi->s_resuid));
}
- if (sbi->s_resgid != EXT3_DEF_RESGID ||
+ if (!gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT3_DEF_RESGID)) ||
le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) {
- seq_printf(seq, ",resgid=%u", sbi->s_resgid);
+ seq_printf(seq, ",resgid=%u",
+ from_kgid_munged(&init_user_ns, sbi->s_resgid));
}
if (test_opt(sb, ERRORS_RO)) {
int def_errors = le16_to_cpu(es->s_errors);
@@ -825,6 +820,7 @@ enum {
Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
+ Opt_journal_path,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_data_err_abort, Opt_data_err_ignore,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
@@ -866,6 +862,7 @@ static const match_table_t tokens = {
{Opt_journal_update, "journal=update"},
{Opt_journal_inum, "journal=%u"},
{Opt_journal_dev, "journal_dev=%u"},
+ {Opt_journal_path, "journal_path=%s"},
{Opt_abort, "abort"},
{Opt_data_journal, "data=journal"},
{Opt_data_ordered, "data=ordered"},
@@ -901,7 +898,7 @@ static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
/*todo: use simple_strtoll with >32bit ext3 */
sb_block = simple_strtoul(options, &options, 0);
if (*options && *options != ',') {
- ext3_msg(sb, "error: invalid sb specification: %s",
+ ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s",
(char *) *data);
return 1;
}
@@ -930,21 +927,24 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
"Not enough memory for storing quotafile name");
return 0;
}
- if (sbi->s_qf_names[qtype] &&
- strcmp(sbi->s_qf_names[qtype], qname)) {
- ext3_msg(sb, KERN_ERR,
- "%s quota file already specified", QTYPE2NAME(qtype));
+ if (sbi->s_qf_names[qtype]) {
+ int same = !strcmp(sbi->s_qf_names[qtype], qname);
+
kfree(qname);
- return 0;
+ if (!same) {
+ ext3_msg(sb, KERN_ERR,
+ "%s quota file already specified",
+ QTYPE2NAME(qtype));
+ }
+ return same;
}
- sbi->s_qf_names[qtype] = qname;
- if (strchr(sbi->s_qf_names[qtype], '/')) {
+ if (strchr(qname, '/')) {
ext3_msg(sb, KERN_ERR,
"quotafile must be on filesystem root");
- kfree(sbi->s_qf_names[qtype]);
- sbi->s_qf_names[qtype] = NULL;
+ kfree(qname);
return 0;
}
+ sbi->s_qf_names[qtype] = qname;
set_opt(sbi->s_mount_opt, QUOTA);
return 1;
}
@@ -959,11 +959,10 @@ static int clear_qf_name(struct super_block *sb, int qtype) {
" when quota turned on");
return 0;
}
- /*
- * The space will be released later when all options are confirmed
- * to be correct
- */
- sbi->s_qf_names[qtype] = NULL;
+ if (sbi->s_qf_names[qtype]) {
+ kfree(sbi->s_qf_names[qtype]);
+ sbi->s_qf_names[qtype] = NULL;
+ }
return 1;
}
#endif
@@ -977,6 +976,13 @@ static int parse_options (char *options, struct super_block *sb,
substring_t args[MAX_OPT_ARGS];
int data_opt = 0;
int option;
+ kuid_t uid;
+ kgid_t gid;
+ char *journal_path;
+ struct inode *journal_inode;
+ struct path path;
+ int error;
+
#ifdef CONFIG_QUOTA
int qfmt;
#endif
@@ -992,7 +998,7 @@ static int parse_options (char *options, struct super_block *sb,
* Initialize args struct so we know whether arg was
* found; some options take optional arguments.
*/
- args[0].to = args[0].from = 0;
+ args[0].to = args[0].from = NULL;
token = match_token(p, tokens, args);
switch (token) {
case Opt_bsd_df:
@@ -1010,12 +1016,23 @@ static int parse_options (char *options, struct super_block *sb,
case Opt_resuid:
if (match_int(&args[0], &option))
return 0;
- sbi->s_resuid = option;
+ uid = make_kuid(current_user_ns(), option);
+ if (!uid_valid(uid)) {
+ ext3_msg(sb, KERN_ERR, "Invalid uid value %d", option);
+ return 0;
+
+ }
+ sbi->s_resuid = uid;
break;
case Opt_resgid:
if (match_int(&args[0], &option))
return 0;
- sbi->s_resgid = option;
+ gid = make_kgid(current_user_ns(), option);
+ if (!gid_valid(gid)) {
+ ext3_msg(sb, KERN_ERR, "Invalid gid value %d", option);
+ return 0;
+ }
+ sbi->s_resgid = gid;
break;
case Opt_sb:
/* handled by get_sb_block() instead of here */
@@ -1120,6 +1137,41 @@ static int parse_options (char *options, struct super_block *sb,
return 0;
*journal_devnum = option;
break;
+ case Opt_journal_path:
+ if (is_remount) {
+ ext3_msg(sb, KERN_ERR, "error: cannot specify "
+ "journal on remount");
+ return 0;
+ }
+
+ journal_path = match_strdup(&args[0]);
+ if (!journal_path) {
+ ext3_msg(sb, KERN_ERR, "error: could not dup "
+ "journal device string");
+ return 0;
+ }
+
+ error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
+ if (error) {
+ ext3_msg(sb, KERN_ERR, "error: could not find "
+ "journal device path: error %d", error);
+ kfree(journal_path);
+ return 0;
+ }
+
+ journal_inode = path.dentry->d_inode;
+ if (!S_ISBLK(journal_inode->i_mode)) {
+ ext3_msg(sb, KERN_ERR, "error: journal path %s "
+ "is not a block device", journal_path);
+ path_put(&path);
+ kfree(journal_path);
+ return 0;
+ }
+
+ *journal_devnum = new_encode_dev(journal_inode->i_rdev);
+ path_put(&path);
+ kfree(journal_path);
+ break;
case Opt_noload:
set_opt (sbi->s_mount_opt, NOLOAD);
break;
@@ -1485,10 +1537,12 @@ static void ext3_orphan_cleanup (struct super_block * sb,
}
if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
- if (es->s_last_orphan)
+ /* don't clear list on RO mount w/ errors */
+ if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
jbd_debug(1, "Errors on filesystem, "
"clearing orphan list.\n");
- es->s_last_orphan = 0;
+ es->s_last_orphan = 0;
+ }
jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
return;
}
@@ -1660,9 +1714,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
return -ENOMEM;
}
sb->s_fs_info = sbi;
- sbi->s_mount_opt = 0;
- sbi->s_resuid = EXT3_DEF_RESUID;
- sbi->s_resgid = EXT3_DEF_RESGID;
sbi->s_sb_block = sb_block;
blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
@@ -1726,8 +1777,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
else
set_opt(sbi->s_mount_opt, ERRORS_RO);
- sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
- sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
+ sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
+ sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
/* enable barriers by default */
set_opt(sbi->s_mount_opt, BARRIER);
@@ -2046,15 +2097,15 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
goto failed_mount3;
}
- sb->s_root = d_alloc_root(root);
+ sb->s_root = d_make_root(root);
if (!sb->s_root) {
ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
- iput(root);
ret = -ENOMEM;
goto failed_mount3;
}
- ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+ if (ext3_setup_super(sb, es, sb->s_flags & MS_RDONLY))
+ sb->s_flags |= MS_RDONLY;
EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
ext3_orphan_cleanup(sb, es);
@@ -2522,6 +2573,11 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
tid_t target;
trace_ext3_sync_fs(sb, wait);
+ /*
+ * Writeback quota in non-journalled quota case - journalled quota has
+ * no dirty dquots
+ */
+ dquot_writeback_dquots(sb, -1);
if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
if (wait)
log_wait_commit(EXT3_SB(sb)->s_journal, target);
@@ -2572,11 +2628,9 @@ out:
static int ext3_unfreeze(struct super_block *sb)
{
if (!(sb->s_flags & MS_RDONLY)) {
- lock_super(sb);
/* Reser the needs_recovery flag before the fs is unlocked. */
EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
- unlock_super(sb);
journal_unlock_updates(EXT3_SB(sb)->s_journal);
}
return 0;
@@ -2595,8 +2649,9 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
int i;
#endif
+ sync_filesystem(sb);
+
/* Store the original options */
- lock_super(sb);
old_sb_flags = sb->s_flags;
old_opts.s_mount_opt = sbi->s_mount_opt;
old_opts.s_resuid = sbi->s_resuid;
@@ -2605,7 +2660,18 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
#ifdef CONFIG_QUOTA
old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
for (i = 0; i < MAXQUOTAS; i++)
- old_opts.s_qf_names[i] = sbi->s_qf_names[i];
+ if (sbi->s_qf_names[i]) {
+ old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
+ GFP_KERNEL);
+ if (!old_opts.s_qf_names[i]) {
+ int j;
+
+ for (j = 0; j < i; j++)
+ kfree(old_opts.s_qf_names[j]);
+ return -ENOMEM;
+ }
+ } else
+ old_opts.s_qf_names[i] = NULL;
#endif
/*
@@ -2698,12 +2764,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
#ifdef CONFIG_QUOTA
/* Release old quota file names */
for (i = 0; i < MAXQUOTAS; i++)
- if (old_opts.s_qf_names[i] &&
- old_opts.s_qf_names[i] != sbi->s_qf_names[i])
- kfree(old_opts.s_qf_names[i]);
+ kfree(old_opts.s_qf_names[i]);
#endif
- unlock_super(sb);
-
if (enable_quota)
dquot_resume(sb, -1);
return 0;
@@ -2716,13 +2778,10 @@ restore_opts:
#ifdef CONFIG_QUOTA
sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
for (i = 0; i < MAXQUOTAS; i++) {
- if (sbi->s_qf_names[i] &&
- old_opts.s_qf_names[i] != sbi->s_qf_names[i])
- kfree(sbi->s_qf_names[i]);
+ kfree(sbi->s_qf_names[i]);
sbi->s_qf_names[i] = old_opts.s_qf_names[i];
}
#endif
- unlock_super(sb);
return err;
}
@@ -2768,6 +2827,10 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
* bitmap, and an inode table.
*/
overhead += ngroups * (2 + sbi->s_itb_per_group);
+
+ /* Add the journal blocks as well */
+ overhead += sbi->s_journal->j_maxlen;
+
sbi->s_overhead_last = overhead;
smp_wmb();
sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
@@ -2804,7 +2867,7 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
static inline struct inode *dquot_to_inode(struct dquot *dquot)
{
- return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
+ return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
}
static int ext3_write_dquot(struct dquot *dquot)
@@ -3011,7 +3074,6 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
(unsigned long long)off, (unsigned long long)len);
return -EIO;
}
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
bh = ext3_bread(handle, inode, blk, 1, &err);
if (!bh)
goto out;
@@ -3035,10 +3097,8 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
}
brelse(bh);
out:
- if (err) {
- mutex_unlock(&inode->i_mutex);
+ if (err)
return err;
- }
if (inode->i_size < off + len) {
i_size_write(inode, off + len);
EXT3_I(inode)->i_disksize = inode->i_size;
@@ -3046,7 +3106,6 @@ out:
inode->i_version++;
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
ext3_mark_inode_dirty(handle, inode);
- mutex_unlock(&inode->i_mutex);
return len;
}
@@ -3065,6 +3124,7 @@ static struct file_system_type ext3_fs_type = {
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
+MODULE_ALIAS_FS("ext3");
static int __init init_ext3_fs(void)
{
diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c
index 7c489820777..6b01c3eab1f 100644
--- a/fs/ext3/symlink.c
+++ b/fs/ext3/symlink.c
@@ -17,10 +17,8 @@
* ext3 symlink handling code
*/
-#include <linux/fs.h>
-#include <linux/jbd.h>
-#include <linux/ext3_fs.h>
#include <linux/namei.h>
+#include "ext3.h"
#include "xattr.h"
static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index d565759d82e..c6874be6d58 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -50,14 +50,9 @@
* by the buffer lock.
*/
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include "ext3.h"
#include <linux/mbcache.h>
#include <linux/quotaops.h>
-#include <linux/rwsem.h>
#include "xattr.h"
#include "acl.h"
@@ -107,8 +102,8 @@ static struct mb_cache *ext3_xattr_cache;
static const struct xattr_handler *ext3_xattr_handler_map[] = {
[EXT3_XATTR_INDEX_USER] = &ext3_xattr_user_handler,
#ifdef CONFIG_EXT3_FS_POSIX_ACL
- [EXT3_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext3_xattr_acl_access_handler,
- [EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext3_xattr_acl_default_handler,
+ [EXT3_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler,
+ [EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,
#endif
[EXT3_XATTR_INDEX_TRUSTED] = &ext3_xattr_trusted_handler,
#ifdef CONFIG_EXT3_FS_SECURITY
@@ -120,8 +115,8 @@ const struct xattr_handler *ext3_xattr_handlers[] = {
&ext3_xattr_user_handler,
&ext3_xattr_trusted_handler,
#ifdef CONFIG_EXT3_FS_POSIX_ACL
- &ext3_xattr_acl_access_handler,
- &ext3_xattr_acl_default_handler,
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
#endif
#ifdef CONFIG_EXT3_FS_SECURITY
&ext3_xattr_security_handler,
@@ -818,10 +813,10 @@ inserted:
ea_idebug(inode, "creating block %d", block);
new_bh = sb_getblk(sb, block);
- if (!new_bh) {
+ if (unlikely(!new_bh)) {
getblk_failed:
ext3_free_blocks(handle, inode, block, 1);
- error = -EIO;
+ error = -ENOMEM;
goto cleanup;
}
lock_buffer(new_bh);
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index 2be4f69bfa6..32e93ebf803 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -60,8 +60,6 @@ struct ext3_xattr_entry {
extern const struct xattr_handler ext3_xattr_user_handler;
extern const struct xattr_handler ext3_xattr_trusted_handler;
-extern const struct xattr_handler ext3_xattr_acl_access_handler;
-extern const struct xattr_handler ext3_xattr_acl_default_handler;
extern const struct xattr_handler ext3_xattr_security_handler;
extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index ea26f2acab9..722c2bf9645 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -3,12 +3,8 @@
* Handler for storing security labels as extended attributes.
*/
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
#include <linux/security.h>
+#include "ext3.h"
#include "xattr.h"
static size_t
@@ -47,8 +43,9 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
name, value, size, flags);
}
-int ext3_initxattrs(struct inode *inode, const struct xattr *xattr_array,
- void *fs_info)
+static int ext3_initxattrs(struct inode *inode,
+ const struct xattr *xattr_array,
+ void *fs_info)
{
const struct xattr *xattr;
handle_t *handle = fs_info;
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index 2526a8829de..d75727cc67f 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -5,11 +5,7 @@
* Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
*/
-#include <linux/string.h>
-#include <linux/capability.h>
-#include <linux/fs.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include "ext3.h"
#include "xattr.h"
static size_t
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index b32e473a1e3..5612af3567e 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -5,10 +5,7 @@
* Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
*/
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include "ext3.h"
#include "xattr.h"
static size_t