aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-01-09 13:01:38 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-09 13:01:38 -0800
commit73d59314e6ed268d6f322ae1bdd723b23fa5a4ed (patch)
treeec7159b13dfd57739ed840e88a436d8d6f4eee5f
parent6ddaab20c32af03d68de00e7c97ae8d9820e4dab (diff)
parente293e97e363e419d8a3628a927321e3f75206a0b (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (864 commits) Btrfs: explicitly mark the tree log root for writeback Btrfs: Drop the hardware crc32c asm code Btrfs: Add Documentation/filesystem/btrfs.txt, remove old COPYING Btrfs: kmap_atomic(KM_USER0) is safe for btrfs_readpage_end_io_hook Btrfs: Don't use kmap_atomic(..., KM_IRQ0) during checksum verifies Btrfs: tree logging checksum fixes Btrfs: don't change file extent's ram_bytes in btrfs_drop_extents Btrfs: Use btrfs_join_transaction to avoid deadlocks during snapshot creation Btrfs: drop remaining LINUX_KERNEL_VERSION checks and compat code Btrfs: drop EXPORT symbols from extent_io.c Btrfs: Fix checkpatch.pl warnings Btrfs: Fix free block discard calls down to the block layer Btrfs: avoid orphan inode caused by log replay Btrfs: avoid potential super block corruption Btrfs: do not call kfree if kmalloc failed in btrfs_sysfs_add_super Btrfs: fix a memory leak in btrfs_get_sb Btrfs: Fix typo in clear_state_cb Btrfs: Fix memset length in btrfs_file_write Btrfs: update directory's size when creating subvol/snapshot Btrfs: add permission checks to the ioctls ...
-rw-r--r--Documentation/filesystems/btrfs.txt91
-rw-r--r--fs/Kconfig19
-rw-r--r--fs/Makefile1
-rw-r--r--fs/btrfs/Makefile25
-rw-r--r--fs/btrfs/acl.c351
-rw-r--r--fs/btrfs/async-thread.c419
-rw-r--r--fs/btrfs/async-thread.h101
-rw-r--r--fs/btrfs/btrfs_inode.h131
-rw-r--r--fs/btrfs/compat.h7
-rw-r--r--fs/btrfs/compression.c709
-rw-r--r--fs/btrfs/compression.h47
-rw-r--r--fs/btrfs/crc32c.h29
-rw-r--r--fs/btrfs/ctree.c3953
-rw-r--r--fs/btrfs/ctree.h2129
-rw-r--r--fs/btrfs/dir-item.c386
-rw-r--r--fs/btrfs/disk-io.c2343
-rw-r--r--fs/btrfs/disk-io.h102
-rw-r--r--fs/btrfs/export.c203
-rw-r--r--fs/btrfs/export.h19
-rw-r--r--fs/btrfs/extent-tree.c5986
-rw-r--r--fs/btrfs/extent_io.c3717
-rw-r--r--fs/btrfs/extent_io.h269
-rw-r--r--fs/btrfs/extent_map.c351
-rw-r--r--fs/btrfs/extent_map.h62
-rw-r--r--fs/btrfs/file-item.c831
-rw-r--r--fs/btrfs/file.c1288
-rw-r--r--fs/btrfs/free-space-cache.c495
-rw-r--r--fs/btrfs/hash.h27
-rw-r--r--fs/btrfs/inode-item.c206
-rw-r--r--fs/btrfs/inode-map.c144
-rw-r--r--fs/btrfs/inode.c5035
-rw-r--r--fs/btrfs/ioctl.c1132
-rw-r--r--fs/btrfs/ioctl.h67
-rw-r--r--fs/btrfs/locking.c88
-rw-r--r--fs/btrfs/locking.h27
-rw-r--r--fs/btrfs/ordered-data.c730
-rw-r--r--fs/btrfs/ordered-data.h158
-rw-r--r--fs/btrfs/orphan.c67
-rw-r--r--fs/btrfs/print-tree.c216
-rw-r--r--fs/btrfs/print-tree.h23
-rw-r--r--fs/btrfs/ref-cache.c230
-rw-r--r--fs/btrfs/ref-cache.h77
-rw-r--r--fs/btrfs/root-tree.c366
-rw-r--r--fs/btrfs/struct-funcs.c139
-rw-r--r--fs/btrfs/super.c720
-rw-r--r--fs/btrfs/sysfs.c269
-rw-r--r--fs/btrfs/transaction.c1097
-rw-r--r--fs/btrfs/transaction.h106
-rw-r--r--fs/btrfs/tree-defrag.c147
-rw-r--r--fs/btrfs/tree-log.c2898
-rw-r--r--fs/btrfs/tree-log.h41
-rw-r--r--fs/btrfs/version.h4
-rw-r--r--fs/btrfs/version.sh43
-rw-r--r--fs/btrfs/volumes.c3218
-rw-r--r--fs/btrfs/volumes.h162
-rw-r--r--fs/btrfs/xattr.c322
-rw-r--r--fs/btrfs/xattr.h39
-rw-r--r--fs/btrfs/zlib.c632
58 files changed, 42494 insertions, 0 deletions
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt
new file mode 100644
index 00000000000..64087c34327
--- /dev/null
+++ b/Documentation/filesystems/btrfs.txt
@@ -0,0 +1,91 @@
+
+ BTRFS
+ =====
+
+Btrfs is a new copy on write filesystem for Linux aimed at
+implementing advanced features while focusing on fault tolerance,
+repair and easy administration. Initially developed by Oracle, Btrfs
+is licensed under the GPL and open for contribution from anyone.
+
+Linux has a wealth of filesystems to choose from, but we are facing a
+number of challenges with scaling to the large storage subsystems that
+are becoming common in today's data centers. Filesystems need to scale
+in their ability to address and manage large storage, and also in
+their ability to detect, repair and tolerate errors in the data stored
+on disk. Btrfs is under heavy development, and is not suitable for
+any uses other than benchmarking and review. The Btrfs disk format is
+not yet finalized.
+
+The main Btrfs features include:
+
+ * Extent based file storage (2^64 max file size)
+ * Space efficient packing of small files
+ * Space efficient indexed directories
+ * Dynamic inode allocation
+ * Writable snapshots
+ * Subvolumes (separate internal filesystem roots)
+ * Object level mirroring and striping
+ * Checksums on data and metadata (multiple algorithms available)
+ * Compression
+ * Integrated multiple device support, with several raid algorithms
+ * Online filesystem check (not yet implemented)
+ * Very fast offline filesystem check
+ * Efficient incremental backup and FS mirroring (not yet implemented)
+ * Online filesystem defragmentation
+
+
+
+ MAILING LIST
+ ============
+
+There is a Btrfs mailing list hosted on vger.kernel.org. You can
+find details on how to subscribe here:
+
+http://vger.kernel.org/vger-lists.html#linux-btrfs
+
+Mailing list archives are available from gmane:
+
+http://dir.gmane.org/gmane.comp.file-systems.btrfs
+
+
+
+ IRC
+ ===
+
+Discussion of Btrfs also occurs on the #btrfs channel of the Freenode
+IRC network.
+
+
+
+ UTILITIES
+ =========
+
+Userspace tools for creating and manipulating Btrfs file systems are
+available from the git repository at the following location:
+
+ http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-progs-unstable.git
+ git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-progs-unstable.git
+
+These include the following tools:
+
+mkfs.btrfs: create a filesystem
+
+btrfsctl: control program to create snapshots and subvolumes:
+
+ mount /dev/sda2 /mnt
+ btrfsctl -s new_subvol_name /mnt
+ btrfsctl -s snapshot_of_default /mnt/default
+ btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name
+ btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol
+ ls /mnt
+ default snapshot_of_a_snapshot snapshot_of_new_subvol
+ new_subvol_name snapshot_of_default
+
+ Snapshots and subvolumes cannot be deleted right now, but you can
+ rm -rf all the files and directories inside them.
+
+btrfsck: do a limited check of the FS extent trees.
+
+btrfs-debug-tree: print all of the FS metadata in text form. Example:
+
+ btrfs-debug-tree /dev/sda2 >& big_output_file
diff --git a/fs/Kconfig b/fs/Kconfig
index 32883589ee5..02cff86af1b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -269,6 +269,25 @@ config OCFS2_FS_POSIX_ACL
Posix Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
+config BTRFS_FS
+ tristate "Btrfs filesystem (EXPERIMENTAL) Unstable disk format"
+ depends on EXPERIMENTAL
+ select LIBCRC32C
+ select ZLIB_INFLATE
+ select ZLIB_DEFLATE
+ help
+ Btrfs is a new filesystem with extents, writable snapshotting,
+ support for multiple devices and many more features.
+
+ Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET
+ FINALIZED. You should say N here unless you are interested in
+ testing Btrfs with non-critical data.
+
+ To compile this file system support as a module, choose M here. The
+ module will be called btrfs.
+
+ If unsure, say N.
+
endif # BLOCK
source "fs/notify/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index c830611550d..bc4e14df108 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -119,4 +119,5 @@ obj-$(CONFIG_HOSTFS) += hostfs/
obj-$(CONFIG_HPPFS) += hppfs/
obj-$(CONFIG_DEBUG_FS) += debugfs/
obj-$(CONFIG_OCFS2_FS) += ocfs2/
+obj-$(CONFIG_BTRFS_FS) += btrfs/
obj-$(CONFIG_GFS2_FS) += gfs2/
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
new file mode 100644
index 00000000000..d2cf5a54a4b
--- /dev/null
+++ b/fs/btrfs/Makefile
@@ -0,0 +1,25 @@
+ifneq ($(KERNELRELEASE),)
+# kbuild part of makefile
+
+obj-$(CONFIG_BTRFS_FS) := btrfs.o
+btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
+ file-item.o inode-item.o inode-map.o disk-io.o \
+ transaction.o inode.o file.o tree-defrag.o \
+ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
+ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
+ ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
+ compression.o
+else
+
+# Normal Makefile
+
+KERNELDIR := /lib/modules/`uname -r`/build
+all:
+ $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules
+
+modules_install:
+ $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install
+clean:
+ $(MAKE) -C $(KERNELDIR) M=`pwd` clean
+
+endif
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
new file mode 100644
index 00000000000..1d53b62dbba
--- /dev/null
+++ b/fs/btrfs/acl.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright (C) 2007 Red Hat. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/posix_acl.h>
+#include <linux/sched.h>
+
+#include "ctree.h"
+#include "btrfs_inode.h"
+#include "xattr.h"
+
+#ifdef CONFIG_FS_POSIX_ACL
+
+static void btrfs_update_cached_acl(struct inode *inode,
+ struct posix_acl **p_acl,
+ struct posix_acl *acl)
+{
+ spin_lock(&inode->i_lock);
+ if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED)
+ posix_acl_release(*p_acl);
+ *p_acl = posix_acl_dup(acl);
+ spin_unlock(&inode->i_lock);
+}
+
+static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
+{
+ int size;
+ const char *name;
+ char *value = NULL;
+ struct posix_acl *acl = NULL, **p_acl;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name = POSIX_ACL_XATTR_ACCESS;
+ p_acl = &BTRFS_I(inode)->i_acl;
+ break;
+ case ACL_TYPE_DEFAULT:
+ name = POSIX_ACL_XATTR_DEFAULT;
+ p_acl = &BTRFS_I(inode)->i_default_acl;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ spin_lock(&inode->i_lock);
+ if (*p_acl != BTRFS_ACL_NOT_CACHED)
+ acl = posix_acl_dup(*p_acl);
+ spin_unlock(&inode->i_lock);
+
+ if (acl)
+ return acl;
+
+
+ size = __btrfs_getxattr(inode, name, "", 0);
+ if (size > 0) {
+ value = kzalloc(size, GFP_NOFS);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ size = __btrfs_getxattr(inode, name, value, size);
+ if (size > 0) {
+ acl = posix_acl_from_xattr(value, size);
+ btrfs_update_cached_acl(inode, p_acl, acl);
+ }
+ kfree(value);
+ } else if (size == -ENOENT) {
+ acl = NULL;
+ btrfs_update_cached_acl(inode, p_acl, acl);
+ }
+
+ return acl;
+}
+
+static int btrfs_xattr_get_acl(struct inode *inode, int type,
+ void *value, size_t size)
+{
+ struct posix_acl *acl;
+ int ret = 0;
+
+ acl = btrfs_get_acl(inode, type);
+
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl == NULL)
+ return -ENODATA;
+ ret = posix_acl_to_xattr(acl, value, size);
+ posix_acl_release(acl);
+
+ return ret;
+}
+
+/*
+ * Needs to be called with fs_mutex held
+ */
+static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+ int ret, size = 0;
+ const char *name;
+ struct posix_acl **p_acl;
+ char *value = NULL;
+ mode_t mode;
+
+ if (acl) {
+ ret = posix_acl_valid(acl);
+ if (ret < 0)
+ return ret;
+ ret = 0;
+ }
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ mode = inode->i_mode;
+ ret = posix_acl_equiv_mode(acl, &mode);
+ if (ret < 0)
+ return ret;
+ ret = 0;
+ inode->i_mode = mode;
+ name = POSIX_ACL_XATTR_ACCESS;
+ p_acl = &BTRFS_I(inode)->i_acl;
+ break;
+ case ACL_TYPE_DEFAULT:
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EINVAL : 0;
+ name = POSIX_ACL_XATTR_DEFAULT;
+ p_acl = &BTRFS_I(inode)->i_default_acl;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (acl) {
+ size = posix_acl_xattr_size(acl->a_count);
+ value = kmalloc(size, GFP_NOFS);
+ if (!value) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = posix_acl_to_xattr(acl, value, size);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = __btrfs_setxattr(inode, name, value, size, 0);
+
+out:
+ kfree(value);
+
+ if (!ret)
+ btrfs_update_cached_acl(inode, p_acl, acl);
+
+ return ret;
+}
+
+static int btrfs_xattr_set_acl(struct inode *inode, int type,
+ const void *value, size_t size)
+{
+ int ret = 0;
+ struct posix_acl *acl = NULL;
+
+ if (value) {
+ acl = posix_acl_from_xattr(value, size);
+ if (acl == NULL) {
+ value = NULL;
+ size = 0;
+ } else if (IS_ERR(acl)) {
+ return PTR_ERR(acl);
+ }
+ }
+
+ ret = btrfs_set_acl(inode, acl, type);
+
+ posix_acl_release(acl);
+
+ return ret;
+}
+
+
+static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+int btrfs_check_acl(struct inode *inode, int mask)
+{
+ struct posix_acl *acl;
+ int error = -EAGAIN;
+
+ acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ }
+
+ return error;
+}
+
+/*
+ * btrfs_init_acl is already generally called under fs_mutex, so the locking
+ * stuff has been fixed to work with that. If the locking stuff changes, we
+ * need to re-evaluate the acl locking stuff.
+ */
+int btrfs_init_acl(struct inode *inode, struct inode *dir)
+{
+ struct posix_acl *acl = NULL;
+ int ret = 0;
+
+ /* this happens with subvols */
+ if (!dir)
+ return 0;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ if (IS_POSIXACL(dir)) {
+ acl = btrfs_get_acl(dir, ACL_TYPE_DEFAULT);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ }
+
+ if (!acl)
+ inode->i_mode &= ~current->fs->umask;
+ }
+
+ if (IS_POSIXACL(dir) && acl) {
+ struct posix_acl *clone;
+ mode_t mode;
+
+ if (S_ISDIR(inode->i_mode)) {
+ ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT);
+ if (ret)
+ goto failed;
+ }
+ clone = posix_acl_clone(acl, GFP_NOFS);
+ ret = -ENOMEM;
+ if (!clone)
+ goto failed;
+
+ mode = inode->i_mode;
+ ret = posix_acl_create_masq(clone, &mode);
+ if (ret >= 0) {
+ inode->i_mode = mode;
+ if (ret > 0) {
+ /* we need an acl */
+ ret = btrfs_set_acl(inode, clone,
+ ACL_TYPE_ACCESS);
+ }
+ }
+ }
+failed:
+ posix_acl_release(acl);
+
+ return ret;
+}
+
+int btrfs_acl_chmod(struct inode *inode)
+{
+ struct posix_acl *acl, *clone;
+ int ret = 0;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ if (!IS_POSIXACL(inode))
+ return 0;
+
+ acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl) || !acl)
+ return PTR_ERR(acl);
+
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ posix_acl_release(acl);
+ if (!clone)
+ return -ENOMEM;
+
+ ret = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!ret)
+ ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS);
+
+ posix_acl_release(clone);
+
+ return ret;
+}
+
+struct xattr_handler btrfs_xattr_acl_default_handler = {
+ .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .get = btrfs_xattr_acl_default_get,
+ .set = btrfs_xattr_acl_default_set,
+};
+
+struct xattr_handler btrfs_xattr_acl_access_handler = {
+ .prefix = POSIX_ACL_XATTR_ACCESS,
+ .get = btrfs_xattr_acl_access_get,
+ .set = btrfs_xattr_acl_access_set,
+};
+
+#else /* CONFIG_FS_POSIX_ACL */
+
+int btrfs_acl_chmod(struct inode *inode)
+{
+ return 0;
+}
+
+int btrfs_init_acl(struct inode *inode, struct inode *dir)
+{
+ return 0;
+}
+
+int btrfs_check_acl(struct inode *inode, int mask)
+{
+ return 0;
+}
+
+#endif /* CONFIG_FS_POSIX_ACL */
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
new file mode 100644
index 00000000000..8e2fec05dbe
--- /dev/null
+++ b/fs/btrfs/async-thread.c
@@ -0,0 +1,419 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+# include <linux/freezer.h>
+#include "async-thread.h"
+
+#define WORK_QUEUED_BIT 0
+#define WORK_DONE_BIT 1
+#define WORK_ORDER_DONE_BIT 2
+
+/*
+ * container for the kthread task pointer and the list of pending work
+ * One of these is allocated per thread.
+ */
+struct btrfs_worker_thread {
+ /* pool we belong to */
+ struct btrfs_workers *workers;
+
+ /* list of struct btrfs_work that are waiting for service */
+ struct list_head pending;
+
+ /* list of worker threads from struct btrfs_workers */
+ struct list_head worker_list;
+
+ /* kthread */
+ struct task_struct *task;
+
+ /* number of things on the pending list */
+ atomic_t num_pending;
+
+ unsigned long sequence;
+
+ /* protects the pending list. */
+ spinlock_t lock;
+
+ /* set to non-zero when this thread is already awake and kicking */
+ int working;
+
+ /* are we currently idle */
+ int idle;
+};
+
+/*
+ * helper function to move a thread onto the idle list after it
+ * has finished some requests.
+ */
+static void check_idle_worker(struct btrfs_worker_thread *worker)
+{
+ if (!worker->idle && atomic_read(&worker->num_pending) <
+ worker->workers->idle_thresh / 2) {
+ unsigned long flags;
+ spin_lock_irqsave(&worker->workers->lock, flags);
+ worker->idle = 1;
+ list_move(&worker->worker_list, &worker->workers->idle_list);
+ spin_unlock_irqrestore(&worker->workers->lock, flags);
+ }
+}
+
+/*
+ * helper function to move a thread off the idle list after new
+ * pending work is added.
+ */
+static void check_busy_worker(struct btrfs_worker_thread *worker)
+{
+ if (worker->idle && atomic_read(&worker->num_pending) >=
+ worker->workers->idle_thresh) {
+ unsigned long flags;
+ spin_lock_irqsave(&worker->workers->lock, flags);
+ worker->idle = 0;
+ list_move_tail(&worker->worker_list,
+ &worker->workers->worker_list);
+ spin_unlock_irqrestore(&worker->workers->lock, flags);
+ }
+}
+
+static noinline int run_ordered_completions(struct btrfs_workers *workers,
+ struct btrfs_work *work)
+{
+ unsigned long flags;
+
+ if (!workers->ordered)
+ return 0;
+
+ set_bit(WORK_DONE_BIT, &work->flags);
+
+ spin_lock_irqsave(&workers->lock, flags);
+
+ while (!list_empty(&workers->order_list)) {
+ work = list_entry(workers->order_list.next,
+ struct btrfs_work, order_list);
+
+ if (!test_bit(WORK_DONE_BIT, &work->flags))
+ break;
+
+ /* we are going to call the ordered done function, but
+ * we leave the work item on the list as a barrier so
+ * that later work items that are done don't have their
+ * functions called before this one returns
+ */
+ if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
+ break;
+
+ spin_unlock_irqrestore(&workers->lock, flags);
+
+ work->ordered_func(work);
+
+ /* now take the lock again and call the freeing code */
+ spin_lock_irqsave(&workers->lock, flags);
+ list_del(&work->order_list);
+ work->ordered_free(work);
+ }
+
+ spin_unlock_irqrestore(&workers->lock, flags);
+ return 0;
+}
+
+/*
+ * main loop for servicing work items
+ */
+static int worker_loop(void *arg)
+{
+ struct btrfs_worker_thread *worker = arg;
+ struct list_head *cur;
+ struct btrfs_work *work;
+ do {
+ spin_lock_irq(&worker->lock);
+ while (!list_empty(&worker->pending)) {
+ cur = worker->pending.next;
+ work = list_entry(cur, struct btrfs_work, list);
+ list_del(&work->list);
+ clear_bit(WORK_QUEUED_BIT, &work->flags);
+
+ work->worker = worker;
+ spin_unlock_irq(&worker->lock);
+
+ work->func(work);
+
+ atomic_dec(&worker->num_pending);
+ /*
+ * unless this is an ordered work queue,
+ * 'work' was probably freed by func above.
+ */
+ run_ordered_completions(worker->workers, work);
+
+ spin_lock_irq(&worker->lock);
+ check_idle_worker(worker);
+
+ }
+ worker->working = 0;
+ if (freezin