aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-01-09 13:01:38 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-09 13:01:38 -0800
commit73d59314e6ed268d6f322ae1bdd723b23fa5a4ed (patch)
treeec7159b13dfd57739ed840e88a436d8d6f4eee5f /fs/btrfs
parent6ddaab20c32af03d68de00e7c97ae8d9820e4dab (diff)
parente293e97e363e419d8a3628a927321e3f75206a0b (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (864 commits) Btrfs: explicitly mark the tree log root for writeback Btrfs: Drop the hardware crc32c asm code Btrfs: Add Documentation/filesystem/btrfs.txt, remove old COPYING Btrfs: kmap_atomic(KM_USER0) is safe for btrfs_readpage_end_io_hook Btrfs: Don't use kmap_atomic(..., KM_IRQ0) during checksum verifies Btrfs: tree logging checksum fixes Btrfs: don't change file extent's ram_bytes in btrfs_drop_extents Btrfs: Use btrfs_join_transaction to avoid deadlocks during snapshot creation Btrfs: drop remaining LINUX_KERNEL_VERSION checks and compat code Btrfs: drop EXPORT symbols from extent_io.c Btrfs: Fix checkpatch.pl warnings Btrfs: Fix free block discard calls down to the block layer Btrfs: avoid orphan inode caused by log replay Btrfs: avoid potential super block corruption Btrfs: do not call kfree if kmalloc failed in btrfs_sysfs_add_super Btrfs: fix a memory leak in btrfs_get_sb Btrfs: Fix typo in clear_state_cb Btrfs: Fix memset length in btrfs_file_write Btrfs: update directory's size when creating subvol/snapshot Btrfs: add permission checks to the ioctls ...
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Makefile25
-rw-r--r--fs/btrfs/acl.c351
-rw-r--r--fs/btrfs/async-thread.c419
-rw-r--r--fs/btrfs/async-thread.h101
-rw-r--r--fs/btrfs/btrfs_inode.h131
-rw-r--r--fs/btrfs/compat.h7
-rw-r--r--fs/btrfs/compression.c709
-rw-r--r--fs/btrfs/compression.h47
-rw-r--r--fs/btrfs/crc32c.h29
-rw-r--r--fs/btrfs/ctree.c3953
-rw-r--r--fs/btrfs/ctree.h2129
-rw-r--r--fs/btrfs/dir-item.c386
-rw-r--r--fs/btrfs/disk-io.c2343
-rw-r--r--fs/btrfs/disk-io.h102
-rw-r--r--fs/btrfs/export.c203
-rw-r--r--fs/btrfs/export.h19
-rw-r--r--fs/btrfs/extent-tree.c5986
-rw-r--r--fs/btrfs/extent_io.c3717
-rw-r--r--fs/btrfs/extent_io.h269
-rw-r--r--fs/btrfs/extent_map.c351
-rw-r--r--fs/btrfs/extent_map.h62
-rw-r--r--fs/btrfs/file-item.c831
-rw-r--r--fs/btrfs/file.c1288
-rw-r--r--fs/btrfs/free-space-cache.c495
-rw-r--r--fs/btrfs/hash.h27
-rw-r--r--fs/btrfs/inode-item.c206
-rw-r--r--fs/btrfs/inode-map.c144
-rw-r--r--fs/btrfs/inode.c5035
-rw-r--r--fs/btrfs/ioctl.c1132
-rw-r--r--fs/btrfs/ioctl.h67
-rw-r--r--fs/btrfs/locking.c88
-rw-r--r--fs/btrfs/locking.h27
-rw-r--r--fs/btrfs/ordered-data.c730
-rw-r--r--fs/btrfs/ordered-data.h158
-rw-r--r--fs/btrfs/orphan.c67
-rw-r--r--fs/btrfs/print-tree.c216
-rw-r--r--fs/btrfs/print-tree.h23
-rw-r--r--fs/btrfs/ref-cache.c230
-rw-r--r--fs/btrfs/ref-cache.h77
-rw-r--r--fs/btrfs/root-tree.c366
-rw-r--r--fs/btrfs/struct-funcs.c139
-rw-r--r--fs/btrfs/super.c720
-rw-r--r--fs/btrfs/sysfs.c269
-rw-r--r--fs/btrfs/transaction.c1097
-rw-r--r--fs/btrfs/transaction.h106
-rw-r--r--fs/btrfs/tree-defrag.c147
-rw-r--r--fs/btrfs/tree-log.c2898
-rw-r--r--fs/btrfs/tree-log.h41
-rw-r--r--fs/btrfs/version.h4
-rw-r--r--fs/btrfs/version.sh43
-rw-r--r--fs/btrfs/volumes.c3218
-rw-r--r--fs/btrfs/volumes.h162
-rw-r--r--fs/btrfs/xattr.c322
-rw-r--r--fs/btrfs/xattr.h39
-rw-r--r--fs/btrfs/zlib.c632
55 files changed, 42383 insertions, 0 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
new file mode 100644
index 00000000000..d2cf5a54a4b
--- /dev/null
+++ b/fs/btrfs/Makefile
@@ -0,0 +1,25 @@
+ifneq ($(KERNELRELEASE),)
+# kbuild part of makefile
+
+obj-$(CONFIG_BTRFS_FS) := btrfs.o
+btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
+ file-item.o inode-item.o inode-map.o disk-io.o \
+ transaction.o inode.o file.o tree-defrag.o \
+ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
+ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
+ ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
+ compression.o
+else
+
+# Normal Makefile
+
+KERNELDIR := /lib/modules/`uname -r`/build
+all:
+ $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules
+
+modules_install:
+ $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install
+clean:
+ $(MAKE) -C $(KERNELDIR) M=`pwd` clean
+
+endif
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
new file mode 100644
index 00000000000..1d53b62dbba
--- /dev/null
+++ b/fs/btrfs/acl.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright (C) 2007 Red Hat. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/posix_acl.h>
+#include <linux/sched.h>
+
+#include "ctree.h"
+#include "btrfs_inode.h"
+#include "xattr.h"
+
+#ifdef CONFIG_FS_POSIX_ACL
+
+static void btrfs_update_cached_acl(struct inode *inode,
+ struct posix_acl **p_acl,
+ struct posix_acl *acl)
+{
+ spin_lock(&inode->i_lock);
+ if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED)
+ posix_acl_release(*p_acl);
+ *p_acl = posix_acl_dup(acl);
+ spin_unlock(&inode->i_lock);
+}
+
+static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
+{
+ int size;
+ const char *name;
+ char *value = NULL;
+ struct posix_acl *acl = NULL, **p_acl;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name = POSIX_ACL_XATTR_ACCESS;
+ p_acl = &BTRFS_I(inode)->i_acl;
+ break;
+ case ACL_TYPE_DEFAULT:
+ name = POSIX_ACL_XATTR_DEFAULT;
+ p_acl = &BTRFS_I(inode)->i_default_acl;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ spin_lock(&inode->i_lock);
+ if (*p_acl != BTRFS_ACL_NOT_CACHED)
+ acl = posix_acl_dup(*p_acl);
+ spin_unlock(&inode->i_lock);
+
+ if (acl)
+ return acl;
+
+
+ size = __btrfs_getxattr(inode, name, "", 0);
+ if (size > 0) {
+ value = kzalloc(size, GFP_NOFS);
+ if (!value)
+ return ERR_PTR(-ENOMEM);
+ size = __btrfs_getxattr(inode, name, value, size);
+ if (size > 0) {
+ acl = posix_acl_from_xattr(value, size);
+ btrfs_update_cached_acl(inode, p_acl, acl);
+ }
+ kfree(value);
+ } else if (size == -ENOENT) {
+ acl = NULL;
+ btrfs_update_cached_acl(inode, p_acl, acl);
+ }
+
+ return acl;
+}
+
+static int btrfs_xattr_get_acl(struct inode *inode, int type,
+ void *value, size_t size)
+{
+ struct posix_acl *acl;
+ int ret = 0;
+
+ acl = btrfs_get_acl(inode, type);
+
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl == NULL)
+ return -ENODATA;
+ ret = posix_acl_to_xattr(acl, value, size);
+ posix_acl_release(acl);
+
+ return ret;
+}
+
+/*
+ * Needs to be called with fs_mutex held
+ */
+static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+ int ret, size = 0;
+ const char *name;
+ struct posix_acl **p_acl;
+ char *value = NULL;
+ mode_t mode;
+
+ if (acl) {
+ ret = posix_acl_valid(acl);
+ if (ret < 0)
+ return ret;
+ ret = 0;
+ }
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ mode = inode->i_mode;
+ ret = posix_acl_equiv_mode(acl, &mode);
+ if (ret < 0)
+ return ret;
+ ret = 0;
+ inode->i_mode = mode;
+ name = POSIX_ACL_XATTR_ACCESS;
+ p_acl = &BTRFS_I(inode)->i_acl;
+ break;
+ case ACL_TYPE_DEFAULT:
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EINVAL : 0;
+ name = POSIX_ACL_XATTR_DEFAULT;
+ p_acl = &BTRFS_I(inode)->i_default_acl;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (acl) {
+ size = posix_acl_xattr_size(acl->a_count);
+ value = kmalloc(size, GFP_NOFS);
+ if (!value) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = posix_acl_to_xattr(acl, value, size);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = __btrfs_setxattr(inode, name, value, size, 0);
+
+out:
+ kfree(value);
+
+ if (!ret)
+ btrfs_update_cached_acl(inode, p_acl, acl);
+
+ return ret;
+}
+
+static int btrfs_xattr_set_acl(struct inode *inode, int type,
+ const void *value, size_t size)
+{
+ int ret = 0;
+ struct posix_acl *acl = NULL;
+
+ if (value) {
+ acl = posix_acl_from_xattr(value, size);
+ if (acl == NULL) {
+ value = NULL;
+ size = 0;
+ } else if (IS_ERR(acl)) {
+ return PTR_ERR(acl);
+ }
+ }
+
+ ret = btrfs_set_acl(inode, acl, type);
+
+ posix_acl_release(acl);
+
+ return ret;
+}
+
+
+static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+int btrfs_check_acl(struct inode *inode, int mask)
+{
+ struct posix_acl *acl;
+ int error = -EAGAIN;
+
+ acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ }
+
+ return error;
+}
+
+/*
+ * btrfs_init_acl is already generally called under fs_mutex, so the locking
+ * stuff has been fixed to work with that. If the locking stuff changes, we
+ * need to re-evaluate the acl locking stuff.
+ */
+int btrfs_init_acl(struct inode *inode, struct inode *dir)
+{
+ struct posix_acl *acl = NULL;
+ int ret = 0;
+
+ /* this happens with subvols */
+ if (!dir)
+ return 0;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ if (IS_POSIXACL(dir)) {
+ acl = btrfs_get_acl(dir, ACL_TYPE_DEFAULT);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ }
+
+ if (!acl)
+ inode->i_mode &= ~current->fs->umask;
+ }
+
+ if (IS_POSIXACL(dir) && acl) {
+ struct posix_acl *clone;
+ mode_t mode;
+
+ if (S_ISDIR(inode->i_mode)) {
+ ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT);
+ if (ret)
+ goto failed;
+ }
+ clone = posix_acl_clone(acl, GFP_NOFS);
+ ret = -ENOMEM;
+ if (!clone)
+ goto failed;
+
+ mode = inode->i_mode;
+ ret = posix_acl_create_masq(clone, &mode);
+ if (ret >= 0) {
+ inode->i_mode = mode;
+ if (ret > 0) {
+ /* we need an acl */
+ ret = btrfs_set_acl(inode, clone,
+ ACL_TYPE_ACCESS);
+ }
+ }
+ }
+failed:
+ posix_acl_release(acl);
+
+ return ret;
+}
+
+int btrfs_acl_chmod(struct inode *inode)
+{
+ struct posix_acl *acl, *clone;
+ int ret = 0;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ if (!IS_POSIXACL(inode))
+ return 0;
+
+ acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl) || !acl)
+ return PTR_ERR(acl);
+
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ posix_acl_release(acl);
+ if (!clone)
+ return -ENOMEM;
+
+ ret = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!ret)
+ ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS);
+
+ posix_acl_release(clone);
+
+ return ret;
+}
+
+struct xattr_handler btrfs_xattr_acl_default_handler = {
+ .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .get = btrfs_xattr_acl_default_get,
+ .set = btrfs_xattr_acl_default_set,
+};
+
+struct xattr_handler btrfs_xattr_acl_access_handler = {
+ .prefix = POSIX_ACL_XATTR_ACCESS,
+ .get = btrfs_xattr_acl_access_get,
+ .set = btrfs_xattr_acl_access_set,
+};
+
+#else /* CONFIG_FS_POSIX_ACL */
+
+int btrfs_acl_chmod(struct inode *inode)
+{
+ return 0;
+}
+
+int btrfs_init_acl(struct inode *inode, struct inode *dir)
+{
+ return 0;
+}
+
+int btrfs_check_acl(struct inode *inode, int mask)
+{
+ return 0;
+}
+
+#endif /* CONFIG_FS_POSIX_ACL */
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
new file mode 100644
index 00000000000..8e2fec05dbe
--- /dev/null
+++ b/fs/btrfs/async-thread.c
@@ -0,0 +1,419 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/version.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+# include <linux/freezer.h>
+#include "async-thread.h"
+
+#define WORK_QUEUED_BIT 0
+#define WORK_DONE_BIT 1
+#define WORK_ORDER_DONE_BIT 2
+
+/*
+ * container for the kthread task pointer and the list of pending work
+ * One of these is allocated per thread.
+ */
+struct btrfs_worker_thread {
+ /* pool we belong to */
+ struct btrfs_workers *workers;
+
+ /* list of struct btrfs_work that are waiting for service */
+ struct list_head pending;
+
+ /* list of worker threads from struct btrfs_workers */
+ struct list_head worker_list;
+
+ /* kthread */
+ struct task_struct *task;
+
+ /* number of things on the pending list */
+ atomic_t num_pending;
+
+ unsigned long sequence;
+
+ /* protects the pending list. */
+ spinlock_t lock;
+
+ /* set to non-zero when this thread is already awake and kicking */
+ int working;
+
+ /* are we currently idle */
+ int idle;
+};
+
+/*
+ * helper function to move a thread onto the idle list after it
+ * has finished some requests.
+ */
+static void check_idle_worker(struct btrfs_worker_thread *worker)
+{
+ if (!worker->idle && atomic_read(&worker->num_pending) <
+ worker->workers->idle_thresh / 2) {
+ unsigned long flags;
+ spin_lock_irqsave(&worker->workers->lock, flags);
+ worker->idle = 1;
+ list_move(&worker->worker_list, &worker->workers->idle_list);
+ spin_unlock_irqrestore(&worker->workers->lock, flags);
+ }
+}
+
+/*
+ * helper function to move a thread off the idle list after new
+ * pending work is added.
+ */
+static void check_busy_worker(struct btrfs_worker_thread *worker)
+{
+ if (worker->idle && atomic_read(&worker->num_pending) >=
+ worker->workers->idle_thresh) {
+ unsigned long flags;
+ spin_lock_irqsave(&worker->workers->lock, flags);
+ worker->idle = 0;
+ list_move_tail(&worker->worker_list,
+ &worker->workers->worker_list);
+ spin_unlock_irqrestore(&worker->workers->lock, flags);
+ }
+}
+
+static noinline int run_ordered_completions(struct btrfs_workers *workers,
+ struct btrfs_work *work)
+{
+ unsigned long flags;
+
+ if (!workers->ordered)
+ return 0;
+
+ set_bit(WORK_DONE_BIT, &work->flags);
+
+ spin_lock_irqsave(&workers->lock, flags);
+
+ while (!list_empty(&workers->order_list)) {
+ work = list_entry(workers->order_list.next,
+ struct btrfs_work, order_list);
+
+ if (!test_bit(WORK_DONE_BIT, &work->flags))
+ break;
+
+ /* we are going to call the ordered done function, but
+ * we leave the work item on the list as a barrier so
+ * that later work items that are done don't have their
+ * functions called before this one returns
+ */
+ if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
+ break;
+
+ spin_unlock_irqrestore(&workers->lock, flags);
+
+ work->ordered_func(work);
+
+ /* now take the lock again and call the freeing code */
+ spin_lock_irqsave(&workers->lock, flags);
+ list_del(&work->order_list);
+ work->ordered_free(work);
+ }
+
+ spin_unlock_irqrestore(&workers->lock, flags);
+ return 0;
+}
+
+/*
+ * main loop for servicing work items
+ */
+static int worker_loop(void *arg)
+{
+ struct btrfs_worker_thread *worker = arg;
+ struct list_head *cur;
+ struct btrfs_work *work;
+ do {
+ spin_lock_irq(&worker->lock);
+ while (!list_empty(&worker->pending)) {
+ cur = worker->pending.next;
+ work = list_entry(cur, struct btrfs_work, list);
+ list_del(&work->list);
+ clear_bit(WORK_QUEUED_BIT, &work->flags);
+
+ work->worker = worker;
+ spin_unlock_irq(&worker->lock);
+
+ work->func(work);
+
+ atomic_dec(&worker->num_pending);
+ /*
+ * unless this is an ordered work queue,
+ * 'work' was probably freed by func above.
+ */
+ run_ordered_completions(worker->workers, work);
+
+ spin_lock_irq(&worker->lock);
+ check_idle_worker(worker);
+
+ }
+ worker->working = 0;
+ if (freezing(current)) {
+ refrigerator();
+ } else {
+ set_current_state(TASK_INTERRUPTIBLE);
+ spin_unlock_irq(&worker->lock);
+ if (!kthread_should_stop())
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ }
+ } while (!kthread_should_stop());
+ return 0;
+}
+
+/*
+ * this will wait for all the worker threads to shutdown
+ */
+int btrfs_stop_workers(struct btrfs_workers *workers)
+{
+ struct list_head *cur;
+ struct btrfs_worker_thread *worker;
+
+ list_splice_init(&workers->idle_list, &workers->worker_list);
+ while (!list_empty(&workers->worker_list)) {
+ cur = workers->worker_list.next;
+ worker = list_entry(cur, struct btrfs_worker_thread,
+ worker_list);
+ kthread_stop(worker->task);
+ list_del(&worker->worker_list);
+ kfree(worker);
+ }
+ return 0;
+}
+
+/*
+ * simple init on struct btrfs_workers
+ */
+void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
+{
+ workers->num_workers = 0;
+ INIT_LIST_HEAD(&workers->worker_list);
+ INIT_LIST_HEAD(&workers->idle_list);
+ INIT_LIST_HEAD(&workers->order_list);
+ spin_lock_init(&workers->lock);
+ workers->max_workers = max;
+ workers->idle_thresh = 32;
+ workers->name = name;
+ workers->ordered = 0;
+}
+
+/*
+ * starts new worker threads. This does not enforce the max worker
+ * count in case you need to temporarily go past it.
+ */
+int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
+{
+ struct btrfs_worker_thread *worker;
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < num_workers; i++) {
+ worker = kzalloc(sizeof(*worker), GFP_NOFS);
+ if (!worker) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ INIT_LIST_HEAD(&worker->pending);
+ INIT_LIST_HEAD(&worker->worker_list);
+ spin_lock_init(&worker->lock);
+ atomic_set(&worker->num_pending, 0);
+ worker->task = kthread_run(worker_loop, worker,
+ "btrfs-%s-%d", workers->name,
+ workers->num_workers + i);
+ worker->workers = workers;
+ if (IS_ERR(worker->task)) {
+ kfree(worker);
+ ret = PTR_ERR(worker->task);
+ goto fail;
+ }
+
+ spin_lock_irq(&workers->lock);
+ list_add_tail(&worker->worker_list, &workers->idle_list);
+ worker->idle = 1;
+ workers->num_workers++;
+ spin_unlock_irq(&workers->lock);
+ }
+ return 0;
+fail:
+ btrfs_stop_workers(workers);
+ return ret;
+}
+
+/*
+ * run through the list and find a worker thread that doesn't have a lot
+ * to do right now. This can return null if we aren't yet at the thread
+ * count limit and all of the threads are busy.
+ */
+static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
+{
+ struct btrfs_worker_thread *worker;
+ struct list_head *next;
+ int enforce_min = workers->num_workers < workers->max_workers;
+
+ /*
+ * if we find an idle thread, don't move it to the end of the
+ * idle list. This improves the chance that the next submission
+ * will reuse the same thread, and maybe catch it while it is still
+ * working
+ */
+ if (!list_empty(&workers->idle_list)) {
+ next = workers->idle_list.next;
+ worker = list_entry(next, struct btrfs_worker_thread,
+ worker_list);
+ return worker;
+ }
+ if (enforce_min || list_empty(&workers->worker_list))
+ return NULL;
+
+ /*
+ * if we pick a busy task, move the task to the end of the list.
+ * hopefully this will keep things somewhat evenly balanced.
+ * Do the move in batches based on the sequence number. This groups
+ * requests submitted at roughly the same time onto the same worker.
+ */
+ next = workers->worker_list.next;
+ worker = list_entry(next, struct btrfs_worker_thread, worker_list);
+ atomic_inc(&worker->num_pending);
+ worker->sequence++;
+
+ if (worker->sequence % workers->idle_thresh == 0)
+ list_move_tail(next, &workers->worker_list);
+ return worker;
+}
+
+/*
+ * selects a worker thread to take the next job. This will either find
+ * an idle worker, start a new worker up to the max count, or just return
+ * one of the existing busy workers.
+ */
+static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
+{
+ struct btrfs_worker_thread *worker;
+ unsigned long flags;
+
+again:
+ spin_lock_irqsave(&workers->lock, flags);
+ worker = next_worker(workers);
+ spin_unlock_irqrestore(&workers->lock, flags);
+
+ if (!worker) {
+ spin_lock_irqsave(&workers->lock, flags);
+ if (workers->num_workers >= workers->max_workers) {
+ struct list_head *fallback = NULL;
+ /*
+ * we have failed to find any workers, just
+ * return the force one
+ */
+ if (!list_empty(&workers->worker_list))
+ fallback = workers->worker_list.next;
+ if (!list_empty(&workers->idle_list))
+ fallback = workers->idle_list.next;
+ BUG_ON(!fallback);
+ worker = list_entry(fallback,
+ struct btrfs_worker_thread, worker_list);