aboutsummaryrefslogtreecommitdiff
path: root/fs/ext3
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/ext3
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'fs/ext3')
-rw-r--r--fs/ext3/Makefile12
-rw-r--r--fs/ext3/acl.c547
-rw-r--r--fs/ext3/acl.h84
-rw-r--r--fs/ext3/balloc.c1600
-rw-r--r--fs/ext3/bitmap.c26
-rw-r--r--fs/ext3/dir.c519
-rw-r--r--fs/ext3/file.c131
-rw-r--r--fs/ext3/fsync.c88
-rw-r--r--fs/ext3/hash.c152
-rw-r--r--fs/ext3/ialloc.c794
-rw-r--r--fs/ext3/inode.c3132
-rw-r--r--fs/ext3/ioctl.c243
-rw-r--r--fs/ext3/namei.c2378
-rw-r--r--fs/ext3/resize.c996
-rw-r--r--fs/ext3/super.c2539
-rw-r--r--fs/ext3/symlink.c54
-rw-r--r--fs/ext3/xattr.c1320
-rw-r--r--fs/ext3/xattr.h135
-rw-r--r--fs/ext3/xattr_security.c55
-rw-r--r--fs/ext3/xattr_trusted.c65
-rw-r--r--fs/ext3/xattr_user.c79
21 files changed, 14949 insertions, 0 deletions
diff --git a/fs/ext3/Makefile b/fs/ext3/Makefile
new file mode 100644
index 00000000000..704cd44a40c
--- /dev/null
+++ b/fs/ext3/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the linux ext3-filesystem routines.
+#
+
+obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ ioctl.o namei.o super.o symlink.o hash.o resize.o
+
+ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+ext3-$(CONFIG_EXT3_FS_SECURITY) += xattr_security.o
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
new file mode 100644
index 00000000000..328592c3a95
--- /dev/null
+++ b/fs/ext3/acl.c
@@ -0,0 +1,547 @@
+/*
+ * linux/fs/ext3/acl.c
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/ext3_fs.h>
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * Convert from filesystem to in-memory representation.
+ */
+static struct posix_acl *
+ext3_acl_from_disk(const void *value, size_t size)
+{
+ const char *end = (char *)value + size;
+ int n, count;
+ struct posix_acl *acl;
+
+ if (!value)
+ return NULL;
+ if (size < sizeof(ext3_acl_header))
+ return ERR_PTR(-EINVAL);
+ if (((ext3_acl_header *)value)->a_version !=
+ cpu_to_le32(EXT3_ACL_VERSION))
+ return ERR_PTR(-EINVAL);
+ value = (char *)value + sizeof(ext3_acl_header);
+ count = ext3_acl_count(size);
+ if (count < 0)
+ return ERR_PTR(-EINVAL);
+ if (count == 0)
+ return NULL;
+ acl = posix_acl_alloc(count, GFP_KERNEL);
+ if (!acl)
+ return ERR_PTR(-ENOMEM);
+ for (n=0; n < count; n++) {
+ ext3_acl_entry *entry =
+ (ext3_acl_entry *)value;
+ if ((char *)value + sizeof(ext3_acl_entry_short) > end)
+ goto fail;
+ acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
+ acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
+ switch(acl->a_entries[n].e_tag) {
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ value = (char *)value +
+ sizeof(ext3_acl_entry_short);
+ acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
+ break;
+
+ case ACL_USER:
+ case ACL_GROUP:
+ value = (char *)value + sizeof(ext3_acl_entry);
+ if ((char *)value > end)
+ goto fail;
+ acl->a_entries[n].e_id =
+ le32_to_cpu(entry->e_id);
+ break;
+
+ default:
+ goto fail;
+ }
+ }
+ if (value != end)
+ goto fail;
+ return acl;
+
+fail:
+ posix_acl_release(acl);
+ return ERR_PTR(-EINVAL);
+}
+
+/*
+ * Convert from in-memory to filesystem representation.
+ */
+static void *
+ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
+{
+ ext3_acl_header *ext_acl;
+ char *e;
+ size_t n;
+
+ *size = ext3_acl_size(acl->a_count);
+ ext_acl = (ext3_acl_header *)kmalloc(sizeof(ext3_acl_header) +
+ acl->a_count * sizeof(ext3_acl_entry), GFP_KERNEL);
+ if (!ext_acl)
+ return ERR_PTR(-ENOMEM);
+ ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION);
+ e = (char *)ext_acl + sizeof(ext3_acl_header);
+ for (n=0; n < acl->a_count; n++) {
+ ext3_acl_entry *entry = (ext3_acl_entry *)e;
+ entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
+ entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
+ switch(acl->a_entries[n].e_tag) {
+ case ACL_USER:
+ case ACL_GROUP:
+ entry->e_id =
+ cpu_to_le32(acl->a_entries[n].e_id);
+ e += sizeof(ext3_acl_entry);
+ break;
+
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ e += sizeof(ext3_acl_entry_short);
+ break;
+
+ default:
+ goto fail;
+ }
+ }
+ return (char *)ext_acl;
+
+fail:
+ kfree(ext_acl);
+ return ERR_PTR(-EINVAL);
+}
+
+static inline struct posix_acl *
+ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
+{
+ struct posix_acl *acl = EXT3_ACL_NOT_CACHED;
+
+ spin_lock(&inode->i_lock);
+ if (*i_acl != EXT3_ACL_NOT_CACHED)
+ acl = posix_acl_dup(*i_acl);
+ spin_unlock(&inode->i_lock);
+
+ return acl;
+}
+
+static inline void
+ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
+ struct posix_acl *acl)
+{
+ spin_lock(&inode->i_lock);
+ if (*i_acl != EXT3_ACL_NOT_CACHED)
+ posix_acl_release(*i_acl);
+ *i_acl = posix_acl_dup(acl);
+ spin_unlock(&inode->i_lock);
+}
+
+/*
+ * Inode operation get_posix_acl().
+ *
+ * inode->i_sem: don't care
+ */
+static struct posix_acl *
+ext3_get_acl(struct inode *inode, int type)
+{
+ struct ext3_inode_info *ei = EXT3_I(inode);
+ int name_index;
+ char *value = NULL;
+ struct posix_acl *acl;
+ int retval;
+
+ if (!test_opt(inode->i_sb, POSIX_ACL))
+ return NULL;
+
+ switch(type) {
+ case ACL_TYPE_ACCESS:
+ acl = ext3_iget_acl(inode, &ei->i_acl);
+ if (acl != EXT3_ACL_NOT_CACHED)
+ return acl;
+ name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ acl = ext3_iget_acl(inode, &ei->i_default_acl);
+ if (acl != EXT3_ACL_NOT_CACHED)
+ return acl;
+ name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ break;
+
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+ retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
+ if (retval > 0) {
+ value = kmalloc(retval, GFP_KERNEL);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ retval = ext3_xattr_get(inode, name_index, "", value, retval);
+ }
+ if (retval > 0)
+ acl = ext3_acl_from_disk(value, retval);
+ else if (retval == -ENODATA || retval == -ENOSYS)
+ acl = NULL;
+ else
+ acl = ERR_PTR(retval);
+ kfree(value);
+
+ if (!IS_ERR(acl)) {
+ switch(type) {
+ case ACL_TYPE_ACCESS:
+ ext3_iset_acl(inode, &ei->i_acl, acl);
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ ext3_iset_acl(inode, &ei->i_default_acl, acl);
+ break;
+ }
+ }
+ return acl;
+}
+
+/*
+ * Set the access or default ACL of an inode.
+ *
+ * inode->i_sem: down unless called from ext3_new_inode
+ */
+static int
+ext3_set_acl(handle_t *handle, struct inode *inode, int type,
+ struct posix_acl *acl)
+{
+ struct ext3_inode_info *ei = EXT3_I(inode);
+ int name_index;
+ void *value = NULL;
+ size_t size;
+ int error;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ switch(type) {
+ case ACL_TYPE_ACCESS:
+ name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+ if (acl) {
+ mode_t mode = inode->i_mode;
+ error = posix_acl_equiv_mode(acl, &mode);
+ if (error < 0)
+ return error;
+ else {
+ inode->i_mode = mode;
+ ext3_mark_inode_dirty(handle, inode);
+ if (error == 0)
+ acl = NULL;
+ }
+ }
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ if (acl) {
+ value = ext3_acl_to_disk(acl, &size);
+ if (IS_ERR(value))
+ return (int)PTR_ERR(value);
+ }
+
+ error = ext3_xattr_set_handle(handle, inode, name_index, "",
+ value, size, 0);
+
+ kfree(value);
+ if (!error) {
+ switch(type) {
+ case ACL_TYPE_ACCESS:
+ ext3_iset_acl(inode, &ei->i_acl, acl);
+ break;
+
+ case ACL_TYPE_DEFAULT:
+ ext3_iset_acl(inode, &ei->i_default_acl, acl);
+ break;
+ }
+ }
+ return error;
+}
+
+static int
+ext3_check_acl(struct inode *inode, int mask)
+{
+ struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
+
+ if (acl) {
+ int error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ return error;
+ }
+
+ return -EAGAIN;
+}
+
+int
+ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+ return generic_permission(inode, mask, ext3_check_acl);
+}
+
+/*
+ * Initialize the ACLs of a new inode. Called from ext3_new_inode.
+ *
+ * dir->i_sem: down
+ * inode->i_sem: up (access to inode is still exclusive)
+ */
+int
+ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
+{
+ struct posix_acl *acl = NULL;
+ int error = 0;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ if (test_opt(dir->i_sb, POSIX_ACL)) {
+ acl = ext3_get_acl(dir, ACL_TYPE_DEFAULT);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ }
+ if (!acl)
+ inode->i_mode &= ~current->fs->umask;
+ }
+ if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
+ struct posix_acl *clone;
+ mode_t mode;
+
+ if (S_ISDIR(inode->i_mode)) {
+ error = ext3_set_acl(handle, inode,
+ ACL_TYPE_DEFAULT, acl);
+ if (error)
+ goto cleanup;
+ }
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ error = -ENOMEM;
+ if (!clone)
+ goto cleanup;
+
+ mode = inode->i_mode;
+ error = posix_acl_create_masq(clone, &mode);
+ if (error >= 0) {
+ inode->i_mode = mode;
+ if (error > 0) {
+ /* This is an extended ACL */
+ error = ext3_set_acl(handle, inode,
+ ACL_TYPE_ACCESS, clone);
+ }
+ }
+ posix_acl_release(clone);
+ }
+cleanup:
+ posix_acl_release(acl);
+ return error;
+}
+
+/*
+ * Does chmod for an inode that may have an Access Control List. The
+ * inode->i_mode field must be updated to the desired value by the caller
+ * before calling this function.
+ * Returns 0 on success, or a negative error number.
+ *
+ * We change the ACL rather than storing some ACL entries in the file
+ * mode permission bits (which would be more efficient), because that
+ * would break once additional permissions (like ACL_APPEND, ACL_DELETE
+ * for directories) are added. There are no more bits available in the
+ * file mode.
+ *
+ * inode->i_sem: down
+ */
+int
+ext3_acl_chmod(struct inode *inode)
+{
+ struct posix_acl *acl, *clone;
+ int error;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+ if (!test_opt(inode->i_sb, POSIX_ACL))
+ return 0;
+ acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl) || !acl)
+ return PTR_ERR(acl);
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ posix_acl_release(acl);
+ if (!clone)
+ return -ENOMEM;
+ error = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!error) {
+ handle_t *handle;
+ int retries = 0;
+
+ retry:
+ handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+ if (IS_ERR(handle)) {
+ error = PTR_ERR(handle);
+ ext3_std_error(inode->i_sb, error);
+ goto out;
+ }
+ error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, clone);
+ ext3_journal_stop(handle);
+ if (error == -ENOSPC &&
+ ext3_should_retry_alloc(inode->i_sb, &retries))
+ goto retry;
+ }
+out:
+ posix_acl_release(clone);
+ return error;
+}
+
+/*
+ * Extended attribute handlers
+ */
+static size_t
+ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
+ const char *name, size_t name_len)
+{
+ const size_t size = sizeof(XATTR_NAME_ACL_ACCESS);
+
+ if (!test_opt(inode->i_sb, POSIX_ACL))
+ return 0;
+ if (list && size <= list_len)
+ memcpy(list, XATTR_NAME_ACL_ACCESS, size);
+ return size;
+}
+
+static size_t
+ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
+ const char *name, size_t name_len)
+{
+ const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT);
+
+ if (!test_opt(inode->i_sb, POSIX_ACL))
+ return 0;
+ if (list && size <= list_len)
+ memcpy(list, XATTR_NAME_ACL_DEFAULT, size);
+ return size;
+}
+
+static int
+ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+{
+ struct posix_acl *acl;
+ int error;
+
+ if (!test_opt(inode->i_sb, POSIX_ACL))
+ return -EOPNOTSUPP;
+
+ acl = ext3_get_acl(inode, type);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl == NULL)
+ return -ENODATA;
+ error = posix_acl_to_xattr(acl, buffer, size);
+ posix_acl_release(acl);
+
+ return error;
+}
+
+static int
+ext3_xattr_get_acl_access(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
+}
+
+static int
+ext3_xattr_get_acl_default(struct inode *inode, const char *name,
+ void *buffer, size_t size)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
+}
+
+static int
+ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
+ size_t size)
+{
+ handle_t *handle;
+ struct posix_acl *acl;
+ int error, retries = 0;
+
+ if (!test_opt(inode->i_sb, POSIX_ACL))
+ return -EOPNOTSUPP;
+ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+ return -EPERM;
+
+ if (value) {
+ acl = posix_acl_from_xattr(value, size);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ else if (acl) {
+ error = posix_acl_valid(acl);
+ if (error)
+ goto release_and_out;
+ }
+ } else
+ acl = NULL;
+
+retry:
+ handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ error = ext3_set_acl(handle, inode, type, acl);
+ ext3_journal_stop(handle);
+ if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+ goto retry;
+
+release_and_out:
+ posix_acl_release(acl);
+ return error;
+}
+
+static int
+ext3_xattr_set_acl_access(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int
+ext3_xattr_set_acl_default(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+struct xattr_handler ext3_xattr_acl_access_handler = {
+ .prefix = XATTR_NAME_ACL_ACCESS,
+ .list = ext3_xattr_list_acl_access,
+ .get = ext3_xattr_get_acl_access,
+ .set = ext3_xattr_set_acl_access,
+};
+
+struct xattr_handler ext3_xattr_acl_default_handler = {
+ .prefix = XATTR_NAME_ACL_DEFAULT,
+ .list = ext3_xattr_list_acl_default,
+ .get = ext3_xattr_get_acl_default,
+ .set = ext3_xattr_set_acl_default,
+};
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
new file mode 100644
index 00000000000..98af0c0d0ba
--- /dev/null
+++ b/fs/ext3/acl.h
@@ -0,0 +1,84 @@
+/*
+ File: fs/ext3/acl.h
+
+ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
+*/
+
+#include <linux/xattr_acl.h>
+
+#define EXT3_ACL_VERSION 0x0001
+
+typedef struct {
+ __le16 e_tag;
+ __le16 e_perm;
+ __le32 e_id;
+} ext3_acl_entry;
+
+typedef struct {
+ __le16 e_tag;
+ __le16 e_perm;
+} ext3_acl_entry_short;
+
+typedef struct {
+ __le32 a_version;
+} ext3_acl_header;
+
+static inline size_t ext3_acl_size(int count)
+{
+ if (count <= 4) {
+ return sizeof(ext3_acl_header) +
+ count * sizeof(ext3_acl_entry_short);
+ } else {
+ return sizeof(ext3_acl_header) +
+ 4 * sizeof(ext3_acl_entry_short) +
+ (count - 4) * sizeof(ext3_acl_entry);
+ }
+}
+
+static inline int ext3_acl_count(size_t size)
+{
+ ssize_t s;
+ size -= sizeof(ext3_acl_header);
+ s = size - 4 * sizeof(ext3_acl_entry_short);
+ if (s < 0) {
+ if (size % sizeof(ext3_acl_entry_short))
+ return -1;
+ return size / sizeof(ext3_acl_entry_short);
+ } else {
+ if (s % sizeof(ext3_acl_entry))
+ return -1;
+ return s / sizeof(ext3_acl_entry) + 4;
+ }
+}
+
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+
+/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl
+ if the ACL has not been cached */
+#define EXT3_ACL_NOT_CACHED ((void *)-1)
+
+/* acl.c */
+extern int ext3_permission (struct inode *, int, struct nameidata *);
+extern int ext3_acl_chmod (struct inode *);
+extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
+
+extern int init_ext3_acl(void);
+extern void exit_ext3_acl(void);
+
+#else /* CONFIG_EXT3_FS_POSIX_ACL */
+#include <linux/sched.h>
+#define ext3_permission NULL
+
+static inline int
+ext3_acl_chmod(struct inode *inode)
+{
+ return 0;
+}
+
+static inline int
+ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
+{
+ return 0;
+}
+#endif /* CONFIG_EXT3_FS_POSIX_ACL */
+
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
new file mode 100644
index 00000000000..ccd632fcc6d
--- /dev/null
+++ b/fs/ext3/balloc.c
@@ -0,0 +1,1600 @@
+/*
+ * linux/fs/ext3/balloc.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
+ * Big-endian to little-endian byte-swapping/bitmaps by
+ * David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+#include <linux/config.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+
+/*
+ * balloc.c contains the blocks allocation and deallocation routines
+ */
+
+/*
+ * The free blocks are managed by bitmaps. A file system contains several
+ * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
+ * block for inodes, N blocks for the inode table and data blocks.
+ *
+ * The file system contains group descriptors which are located after the
+ * super block. Each descriptor contains the number of the bitmap block and
+ * the free blocks count in the block. The descriptors are loaded in memory
+ * when a file system is mounted (see ext3_read_super).
+ */
+
+
+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+
+struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+ unsigned int block_group,
+ struct buffer_head ** bh)
+{
+ unsigned long group_desc;
+ unsigned long offset;
+ struct ext3_group_desc * desc;
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+
+ if (block_group >= sbi->s_groups_count) {
+ ext3_error (sb, "ext3_get_group_desc",
+ "block_group >= groups_count - "
+ "block_group = %d, groups_count = %lu",
+ block_group, sbi->s_groups_count);
+
+ return NULL;
+ }
+ smp_rmb();
+
+ group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
+ offset = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
+ if (!sbi->s_group_desc[group_desc]) {
+ ext3_error (sb, "ext3_get_group_desc",
+ "Group descriptor not loaded - "
+ "block_group = %d, group_desc = %lu, desc = %lu",
+ block_group, group_desc, offset);
+ return NULL;
+ }
+
+ desc = (struct ext3_group_desc *) sbi->s_group_desc[group_desc]->b_data;
+ if (bh)
+ *bh = sbi->s_group_desc[group_desc];
+ return desc + offset;
+}
+
+/*
+ * Read the bitmap for a given block_group, reading into the specified
+ * slot in the superblock's bitmap cache.
+ *
+ * Return buffer_head on success or NULL in case of failure.
+ */
+static struct buffer_head *
+read_block_bitmap(struct super_block *sb, unsigned int block_group)
+{
+ struct ext3_group_desc * desc;
+ struct buffer_head * bh = NULL;
+
+ desc = ext3_get_group_desc (sb, block_group, NULL);
+ if (!desc)
+ goto error_out;
+ bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
+ if (!bh)
+ ext3_error (sb, "read_block_bitmap",
+ "Cannot read block bitmap - "
+ "block_group = %d, block_bitmap = %u",
+ block_group, le32_to_cpu(desc->bg_block_bitmap));
+error_out:
+ return bh;
+}
+/*
+ * The reservation window structure operations
+ * --------------------------------------------
+ * Operations include:
+ * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
+ *
+ * We use sorted double linked list for the per-filesystem reservation
+ * window list. (like in vm_region).
+ *
+ * Initially, we keep those small operations in the abstract functions,
+ * so later if we need a better searching tree than double linked-list,
+ * we could easily switch to that without changing too much
+ * code.
+ */
+#if 0
+static void __rsv_window_dump(struct rb_root *root, int verbose,
+ const char *fn)
+{
+ struct rb_node *n;
+ struct ext3_reserve_window_node *rsv, *prev;
+ int bad;
+
+restart:
+ n = rb_first(root);
+ bad = 0;
+ prev = NULL;
+
+ printk("Block Allocation Reservation Windows Map (%s):\n", fn);
+ while (n) {
+ rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node);
+ if (verbose)
+ printk("reservation window 0x%p "
+ "start: %d, end: %d\n",
+ rsv, rsv->rsv_start, rsv->rsv_end);
+ if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
+ printk("Bad reservation %p (start >= end)\n",
+ rsv);
+ bad = 1;
+ }
+ if (prev && prev->rsv_end >= rsv->rsv_start) {
+ printk("Bad reservation %p (prev->end >= start)\n",
+ rsv);
+ bad = 1;
+ }
+ if (bad) {
+ if (!verbose) {
+ printk("Restarting reservation walk in verbose mode\n");
+ verbose = 1;
+ goto restart;
+ }
+ }
+ n = rb_next(n);
+ prev = rsv;
+ }
+ printk("Window map complete.\n");
+ if (bad)
+ BUG();
+}
+#define rsv_window_dump(root, verbose) \
+ __rsv_window_dump((root), (verbose), __FUNCTION__)
+#else
+#define rsv_window_dump(root, verbose) do {} while (0)
+#endif
+
+static int
+goal_in_my_reservation(struct ext3_reserve_window *rsv, int goal,
+ unsigned int group, struct super_block * sb)
+{
+ unsigned long group_first_block, group_last_block;
+
+ group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) +
+ group * EXT3_BLOCKS_PER_GROUP(sb);
+ group_last_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1;
+
+ if ((rsv->_rsv_start > group_last_block) ||
+ (rsv->_rsv_end < group_first_block))
+ return 0;
+ if ((goal >= 0) && ((goal + group_first_block < rsv->_rsv_start)
+ || (goal + group_first_block > rsv->_rsv_end)))
+ return 0;
+ return 1;
+}
+
+/*
+ * Find the reserved window which includes the goal, or the previous one
+ * if the goal is not in any window.
+ * Returns NULL if there are no windows or if all windows start after the goal.
+ */
+static struct ext3_reserve_window_node *
+search_reserve_window(struct rb_root *root, unsigned long goal)
+{
+ struct rb_node *n = root->rb_node;
+ struct ext3_reserve_window_node *rsv;
+
+ if (!n)
+ return NULL;
+
+ do {
+ rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
+
+ if (goal < rsv->rsv_start)
+ n = n->rb_left;
+ else if (goal > rsv->rsv_end)
+ n = n->rb_right;
+ else
+ return rsv;
+ } while (n);
+ /*
+ * We've fallen off the end of the tree: the goal wasn't inside
+ * any particular node. OK, the previous node must be to one
+ * side of the interval containing the goal. If it's the RHS,
+ * we need to back up one.
+ */
+ if (rsv->rsv_start > goal) {
+ n = rb_prev(&rsv->rsv_node);
+ rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
+ }
+ return rsv;
+}
+
+void ext3_rsv_window_add(struct super_block *sb,
+ struct ext3_reserve_window_node *rsv)
+{
+ struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root;
+ struct rb_node *node = &rsv->rsv_node;
+ unsigned int start = rsv->rsv_start;
+
+ struct rb_node ** p = &root->rb_node;
+ struct rb_node * parent = NULL;
+ struct ext3_reserve_window_node *this;
+
+ while (*p)
+ {
+ parent = *p;
+ this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node);
+
+ if (start < this->rsv_start)
+ p = &(*p)->rb_left;
+ else if (start > this->rsv_end)
+ p = &(*p)->rb_right;
+ else
+ BUG();
+ }
+
+ rb_link_node(node, parent, p);
+ rb_insert_color(node, root);
+}
+
+static void rsv_window_remove(struct super_block *sb,
+ struct ext3_reserve_window_node *rsv)
+{
+ rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+ rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+ rsv->rsv_alloc_hit = 0;
+ rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root);
+}
+
+static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
+{
+ /* a valid reservation end block could not be 0 */
+ return (rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED);
+}
+void ext3_init_block_alloc_info(struct inode *inode)
+{
+ struct ext3_inode_info *ei = EXT3_I(inode);
+ struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+ struct super_block *sb = inode->i_sb;
+
+ block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
+ if (block_i) {
+ struct ext3_reserve_window_node *rsv = &block_i->rsv_window_node;
+
+ rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+ rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+
+ /*
+ * if filesystem is mounted with NORESERVATION, the goal
+ * reservation window size is set to zero to indicate
+ * block reservation is off
+ */
+ if (!test_opt(sb, RESERVATION))
+ rsv->rsv_goal_size = 0;
+ else
+ rsv->rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS;
+ rsv->rsv_alloc_hit = 0;
+ block_i->last_alloc_logical_block = 0;
+ block_i->last_alloc_physical_block = 0;
+ }
+ ei->i_block_alloc_info = block_i;
+}
+
+void ext3_discard_reservation(struct inode *inode)
+{
+ struct ext3_inode_info *ei = EXT3_I(inode);
+ struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+ struct ext3_reserve_window_node *rsv;
+ spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock;
+
+ if (!block_i)
+ return;
+
+ rsv = &block_i->rsv_window_node;
+ if (!rsv_is_empty(&rsv->rsv_window)) {
+ spin_lock(rsv_lock);
+ if (!rsv_is_empty(&rsv->rsv_window))
+ rsv_window_remove(inode->i_sb, rsv);
+ spin_unlock(rsv_lock);
+ }
+}
+
+/* Free given blocks, update quota and i_blocks field */
+void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
+ unsigned long block, unsigned long count,
+ int *pdquot_freed_blocks)
+{
+ struct buffer_head *bitmap_bh = NULL;
+ struct buffer_head *gd_bh;
+ unsigned long block_group;
+ unsigned long bit;
+ unsigned long i;
+ unsigned long overflow;
+ struct ext3_group_desc * desc;
+ struct ext3_super_block * es;
+ struct ext3_sb_info *sbi;
+ int err = 0, ret;
+ unsigned group_freed;
+
+ *pdquot_freed_blocks = 0;
+ sbi = EXT3_SB(sb);
+ es = sbi->s_es;
+ if (block < le32_to_cpu(es->s_first_data_block) ||
+ block + count < block ||
+ block + count > le32_to_cpu(es->s_blocks_count)) {
+ ext3_error (sb, "ext3_free_blocks",
+ "Freeing blocks not in datazone - "
+ "block = %lu, count = %lu", block, count);
+ goto error_return;
+ }
+
+ ext3_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
+
+do_more:
+ overflow = 0;
+ block_group = (block - le32_to_cpu(es->s_first_data_block)) /
+ EXT3_BLOCKS_PER_GROUP(sb);
+ bit = (block - le32_to_cpu(es->s_first_data_block)) %
+ EXT3_BLOCKS_PER_GROUP(sb);
+ /*
+ * Check to see if we are freeing blocks across a group
+ * boundary.
+ */
+ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
+ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
+ count -= overflow;
+ }
+ brelse(bitmap_bh);
+ bitmap_bh = read_block_bitmap(sb, block_group);
+ if (!bitmap_bh)
+ goto error_return;
+ desc = ext3_get_group_desc (sb, block_group, &gd_bh);
+ if (!desc)
+ goto error_return;
+
+ if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
+ in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
+ in_range (block, le32_to_cpu(desc->bg_inode_table),
+ sbi->s_itb_per_group) ||
+ in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
+ sbi->s_itb_per_group))
+ ext3_error (sb, "ext3_free_blocks",
+ "Freeing blocks in system zones - "
+ "Block = %lu, count = %lu",
+ block, count);
+
+ /*
+ * We are about to start releasing blocks in the bitmap,