aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-17 14:43:43 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-17 14:43:43 -0800
commiteee2a817df7c5a6e569f353f8be78cc1b3604bb6 (patch)
treef721beb4712c732702d3383d3c6a52da8b5bbb20
parent83896fb5e51594281720d145164f866ba769abd5 (diff)
parentacce952b0263825da32cf10489413dec78053347 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (25 commits) Btrfs: forced readonly mounts on errors btrfs: Require CAP_SYS_ADMIN for filesystem rebalance Btrfs: don't warn if we get ENOSPC in btrfs_block_rsv_check btrfs: Fix memory leak in btrfs_read_fs_root_no_radix() btrfs: check NULL or not btrfs: Don't pass NULL ptr to func that may deref it. btrfs: mount failure return value fix btrfs: Mem leak in btrfs_get_acl() btrfs: fix wrong free space information of btrfs btrfs: make the chunk allocator utilize the devices better btrfs: restructure find_free_dev_extent() btrfs: fix wrong calculation of stripe size btrfs: try to reclaim some space when chunk allocation fails btrfs: fix wrong data space statistics fs/btrfs: Fix build of ctree Btrfs: fix off by one while setting block groups readonly Btrfs: Add BTRFS_IOC_SUBVOL_GETFLAGS/SETFLAGS ioctls Btrfs: Add readonly snapshots support Btrfs: Refactor btrfs_ioctl_snap_create() btrfs: Extract duplicate decompress code ...
-rw-r--r--fs/btrfs/Kconfig2
-rw-r--r--fs/btrfs/Makefile2
-rw-r--r--fs/btrfs/acl.c4
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/compression.c329
-rw-r--r--fs/btrfs/compression.h72
-rw-r--r--fs/btrfs/ctree.c8
-rw-r--r--fs/btrfs/ctree.h48
-rw-r--r--fs/btrfs/disk-io.c412
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/extent-tree.c90
-rw-r--r--fs/btrfs/extent_io.c7
-rw-r--r--fs/btrfs/extent_io.h17
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/extent_map.h3
-rw-r--r--fs/btrfs/file.c13
-rw-r--r--fs/btrfs/inode.c90
-rw-r--r--fs/btrfs/ioctl.c220
-rw-r--r--fs/btrfs/ioctl.h12
-rw-r--r--fs/btrfs/lzo.c420
-rw-r--r--fs/btrfs/ordered-data.c18
-rw-r--r--fs/btrfs/ordered-data.h8
-rw-r--r--fs/btrfs/super.c281
-rw-r--r--fs/btrfs/transaction.c11
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/volumes.c626
-rw-r--r--fs/btrfs/volumes.h27
-rw-r--r--fs/btrfs/xattr.c18
-rw-r--r--fs/btrfs/zlib.c369
29 files changed, 2490 insertions, 623 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 7bb3c020e57..ecb9fd3be14 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,6 +4,8 @@ config BTRFS_FS
select LIBCRC32C
select ZLIB_INFLATE
select ZLIB_DEFLATE
+ select LZO_COMPRESS
+ select LZO_DECOMPRESS
help
Btrfs is a new filesystem with extents, writable snapshotting,
support for multiple devices and many more features.
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index a35eb36b32f..31610ea73ae 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
transaction.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
- export.o tree-log.o acl.o free-space-cache.o zlib.o \
+ export.o tree-log.o acl.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 6ae2c8cac9d..15b5ca2a260 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,8 +60,10 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
size = __btrfs_getxattr(inode, name, value, size);
if (size > 0) {
acl = posix_acl_from_xattr(value, size);
- if (IS_ERR(acl))
+ if (IS_ERR(acl)) {
+ kfree(value);
return acl;
+ }
set_cached_acl(inode, type, acl);
}
kfree(value);
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 6ad63f17eca..ccc991c542d 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -157,7 +157,7 @@ struct btrfs_inode {
/*
* always compress this one file
*/
- unsigned force_compress:1;
+ unsigned force_compress:4;
struct inode vfs_inode;
};
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b50bc4bd5c5..f745287fbf2 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -62,6 +62,9 @@ struct compressed_bio {
/* number of bytes on disk */
unsigned long compressed_len;
+ /* the compression algorithm for this bio */
+ int compress_type;
+
/* number of compressed pages in the array */
unsigned long nr_pages;
@@ -173,11 +176,12 @@ static void end_compressed_bio_read(struct bio *bio, int err)
/* ok, we're the last bio for this extent, lets start
* the decompression.
*/
- ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
- cb->start,
- cb->orig_bio->bi_io_vec,
- cb->orig_bio->bi_vcnt,
- cb->compressed_len);
+ ret = btrfs_decompress_biovec(cb->compress_type,
+ cb->compressed_pages,
+ cb->start,
+ cb->orig_bio->bi_io_vec,
+ cb->orig_bio->bi_vcnt,
+ cb->compressed_len);
csum_failed:
if (ret)
cb->errors = 1;
@@ -588,6 +592,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
cb->len = uncompressed_len;
cb->compressed_len = compressed_len;
+ cb->compress_type = extent_compress_type(bio_flags);
cb->orig_bio = bio;
nr_pages = (compressed_len + PAGE_CACHE_SIZE - 1) /
@@ -677,3 +682,317 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
bio_put(comp_bio);
return 0;
}
+
+static struct list_head comp_idle_workspace[BTRFS_COMPRESS_TYPES];
+static spinlock_t comp_workspace_lock[BTRFS_COMPRESS_TYPES];
+static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
+static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
+static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
+
+struct btrfs_compress_op *btrfs_compress_op[] = {
+ &btrfs_zlib_compress,
+ &btrfs_lzo_compress,
+};
+
+int __init btrfs_init_compress(void)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
+ INIT_LIST_HEAD(&comp_idle_workspace[i]);
+ spin_lock_init(&comp_workspace_lock[i]);
+ atomic_set(&comp_alloc_workspace[i], 0);
+ init_waitqueue_head(&comp_workspace_wait[i]);
+ }
+ return 0;
+}
+
+/*
+ * this finds an available workspace or allocates a new one
+ * ERR_PTR is returned if things go bad.
+ */
+static struct list_head *find_workspace(int type)
+{
+ struct list_head *workspace;
+ int cpus = num_online_cpus();
+ int idx = type - 1;
+
+ struct list_head *idle_workspace = &comp_idle_workspace[idx];
+ spinlock_t *workspace_lock = &comp_workspace_lock[idx];
+ atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
+ wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
+ int *num_workspace = &comp_num_workspace[idx];
+again:
+ spin_lock(workspace_lock);
+ if (!list_empty(idle_workspace)) {
+ workspace = idle_workspace->next;
+ list_del(workspace);
+ (*num_workspace)--;
+ spin_unlock(workspace_lock);
+ return workspace;
+
+ }
+ if (atomic_read(alloc_workspace) > cpus) {
+ DEFINE_WAIT(wait);
+
+ spin_unlock(workspace_lock);
+ prepare_to_wait(workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
+ if (atomic_read(alloc_workspace) > cpus && !*num_workspace)
+ schedule();
+ finish_wait(workspace_wait, &wait);
+ goto again;
+ }
+ atomic_inc(alloc_workspace);
+ spin_unlock(workspace_lock);
+
+ workspace = btrfs_compress_op[idx]->alloc_workspace();
+ if (IS_ERR(workspace)) {
+ atomic_dec(alloc_workspace);
+ wake_up(workspace_wait);
+ }
+ return workspace;
+}
+
+/*
+ * put a workspace struct back on the list or free it if we have enough
+ * idle ones sitting around
+ */
+static void free_workspace(int type, struct list_head *workspace)
+{
+ int idx = type - 1;
+ struct list_head *idle_workspace = &comp_idle_workspace[idx];
+ spinlock_t *workspace_lock = &comp_workspace_lock[idx];
+ atomic_t *alloc_workspace = &comp_alloc_workspace[idx];
+ wait_queue_head_t *workspace_wait = &comp_workspace_wait[idx];
+ int *num_workspace = &comp_num_workspace[idx];
+
+ spin_lock(workspace_lock);
+ if (*num_workspace < num_online_cpus()) {
+ list_add_tail(workspace, idle_workspace);
+ (*num_workspace)++;
+ spin_unlock(workspace_lock);
+ goto wake;
+ }
+ spin_unlock(workspace_lock);
+
+ btrfs_compress_op[idx]->free_workspace(workspace);
+ atomic_dec(alloc_workspace);
+wake:
+ if (waitqueue_active(workspace_wait))
+ wake_up(workspace_wait);
+}
+
+/*
+ * cleanup function for module exit
+ */
+static void free_workspaces(void)
+{
+ struct list_head *workspace;
+ int i;
+
+ for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) {
+ while (!list_empty(&comp_idle_workspace[i])) {
+ workspace = comp_idle_workspace[i].next;
+ list_del(workspace);
+ btrfs_compress_op[i]->free_workspace(workspace);
+ atomic_dec(&comp_alloc_workspace[i]);
+ }
+ }
+}
+
+/*
+ * given an address space and start/len, compress the bytes.
+ *
+ * pages are allocated to hold the compressed result and stored
+ * in 'pages'
+ *
+ * out_pages is used to return the number of pages allocated. There
+ * may be pages allocated even if we return an error
+ *
+ * total_in is used to return the number of bytes actually read. It
+ * may be smaller then len if we had to exit early because we
+ * ran out of room in the pages array or because we cross the
+ * max_out threshold.
+ *
+ * total_out is used to return the total number of compressed bytes
+ *
+ * max_out tells us the max number of bytes that we're allowed to
+ * stuff into pages
+ */
+int btrfs_compress_pages(int type, struct address_space *mapping,
+ u64 start, unsigned long len,
+ struct page **pages,
+ unsigned long nr_dest_pages,
+ unsigned long *out_pages,
+ unsigned long *total_in,
+ unsigned long *total_out,
+ unsigned long max_out)
+{
+ struct list_head *workspace;
+ int ret;
+
+ workspace = find_workspace(type);
+ if (IS_ERR(workspace))
+ return -1;
+
+ ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
+ start, len, pages,
+ nr_dest_pages, out_pages,
+ total_in, total_out,
+ max_out);
+ free_workspace(type, workspace);
+ return ret;
+}
+
+/*
+ * pages_in is an array of pages with compressed data.
+ *
+ * disk_start is the starting logical offset of this array in the file
+ *
+ * bvec is a bio_vec of pages from the file that we want to decompress into
+ *
+ * vcnt is the count of pages in the biovec
+ *
+ * srclen is the number of bytes in pages_in
+ *
+ * The basic idea is that we have a bio that was created by readpages.
+ * The pages in the bio are for the uncompressed data, and they may not
+ * be contiguous. They all correspond to the range of bytes covered by
+ * the compressed extent.
+ */
+int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
+ struct bio_vec *bvec, int vcnt, size_t srclen)
+{
+ struct list_head *workspace;
+ int ret;
+
+ workspace = find_workspace(type);
+ if (IS_ERR(workspace))
+ return -ENOMEM;
+
+ ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
+ disk_start,
+ bvec, vcnt, srclen);
+ free_workspace(type, workspace);
+ return ret;
+}
+
+/*
+ * a less complex decompression routine. Our compressed data fits in a
+ * single page, and we want to read a single page out of it.
+ * start_byte tells us the offset into the compressed data we're interested in
+ */
+int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
+ unsigned long start_byte, size_t srclen, size_t destlen)
+{
+ struct list_head *workspace;
+ int ret;
+
+ workspace = find_workspace(type);
+ if (IS_ERR(workspace))
+ return -ENOMEM;
+
+ ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
+ dest_page, start_byte,
+ srclen, destlen);
+
+ free_workspace(type, workspace);
+ return ret;
+}
+
+void __exit btrfs_exit_compress(void)
+{
+ free_workspaces();
+}
+
+/*
+ * Copy uncompressed data from working buffer to pages.
+ *
+ * buf_start is the byte offset we're of the start of our workspace buffer.
+ *
+ * total_out is the last byte of the buffer
+ */
+int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+ unsigned long total_out, u64 disk_start,
+ struct bio_vec *bvec, int vcnt,
+ unsigned long *page_index,
+ unsigned long *pg_offset)
+{
+ unsigned long buf_offset;
+ unsigned long current_buf_start;
+ unsigned long start_byte;
+ unsigned long working_bytes = total_out - buf_start;
+ unsigned long bytes;
+ char *kaddr;
+ struct page *page_out = bvec[*page_index].bv_page;
+
+ /*
+ * start byte is the first byte of the page we're currently
+ * copying into relative to the start of the compressed data.
+ */
+ start_byte = page_offset(page_out) - disk_start;
+
+ /* we haven't yet hit data corresponding to this page */
+ if (total_out <= start_byte)
+ return 1;
+
+ /*
+ * the start of the data we care about is offset into
+ * the middle of our working buffer
+ */
+ if (total_out > start_byte && buf_start < start_byte) {
+ buf_offset = start_byte - buf_start;
+ working_bytes -= buf_offset;
+ } else {
+ buf_offset = 0;
+ }
+ current_buf_start = buf_start;
+
+ /* copy bytes from the working buffer into the pages */
+ while (working_bytes > 0) {
+ bytes = min(PAGE_CACHE_SIZE - *pg_offset,
+ PAGE_CACHE_SIZE - buf_offset);
+ bytes = min(bytes, working_bytes);
+ kaddr = kmap_atomic(page_out, KM_USER0);
+ memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
+ kunmap_atomic(kaddr, KM_USER0);
+ flush_dcache_page(page_out);
+
+ *pg_offset += bytes;
+ buf_offset += bytes;
+ working_bytes -= bytes;
+ current_buf_start += bytes;
+
+ /* check if we need to pick another page */
+ if (*pg_offset == PAGE_CACHE_SIZE) {
+ (*page_index)++;
+ if (*page_index >= vcnt)
+ return 0;
+
+ page_out = bvec[*page_index].bv_page;
+ *pg_offset = 0;
+ start_byte = page_offset(page_out) - disk_start;
+
+ /*
+ * make sure our new page is covered by this
+ * working buffer
+ */
+ if (total_out <= start_byte)
+ return 1;
+
+ /*
+ * the next page in the biovec might not be adjacent
+ * to the last page, but it might still be found
+ * inside this working buffer. bump our offset pointer
+ */
+ if (total_out > start_byte &&
+ current_buf_start < start_byte) {
+ buf_offset = start_byte - buf_start;
+ working_bytes = total_out - start_byte;
+ current_buf_start = buf_start + buf_offset;
+ }
+ }
+ }
+
+ return 1;
+}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 421f5b4aa71..51000174b9d 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -19,24 +19,27 @@
#ifndef __BTRFS_COMPRESSION_
#define __BTRFS_COMPRESSION_
-int btrfs_zlib_decompress(unsigned char *data_in,
- struct page *dest_page,
- unsigned long start_byte,
- size_t srclen, size_t destlen);
-int btrfs_zlib_compress_pages(struct address_space *mapping,
- u64 start, unsigned long len,
- struct page **pages,
- unsigned long nr_dest_pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out,
- unsigned long max_out);
-int btrfs_zlib_decompress_biovec(struct page **pages_in,
- u64 disk_start,
- struct bio_vec *bvec,
- int vcnt,
- size_t srclen);
-void btrfs_zlib_exit(void);
+int btrfs_init_compress(void);
+void btrfs_exit_compress(void);
+
+int btrfs_compress_pages(int type, struct address_space *mapping,
+ u64 start, unsigned long len,
+ struct page **pages,
+ unsigned long nr_dest_pages,
+ unsigned long *out_pages,
+ unsigned long *total_in,
+ unsigned long *total_out,
+ unsigned long max_out);
+int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
+ struct bio_vec *bvec, int vcnt, size_t srclen);
+int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
+ unsigned long start_byte, size_t srclen, size_t destlen);
+int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
+ unsigned long total_out, u64 disk_start,
+ struct bio_vec *bvec, int vcnt,
+ unsigned long *page_index,
+ unsigned long *pg_offset);
+
int btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long len, u64 disk_start,
unsigned long compressed_len,
@@ -44,4 +47,37 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long nr_pages);
int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
+
+struct btrfs_compress_op {
+ struct list_head *(*alloc_workspace)(void);
+
+ void (*free_workspace)(struct list_head *workspace);
+
+ int (*compress_pages)(struct list_head *workspace,
+ struct address_space *mapping,
+ u64 start, unsigned long len,
+ struct page **pages,
+ unsigned long nr_dest_pages,
+ unsigned long *out_pages,
+ unsigned long *total_in,
+ unsigned long *total_out,
+ unsigned long max_out);
+
+ int (*decompress_biovec)(struct list_head *workspace,
+ struct page **pages_in,
+ u64 disk_start,
+ struct bio_vec *bvec,
+ int vcnt,
+ size_t srclen);
+
+ int (*decompress)(struct list_head *workspace,
+ unsigned char *data_in,
+ struct page *dest_page,
+ unsigned long start_byte,
+ size_t srclen, size_t destlen);
+};
+
+extern struct btrfs_compress_op btrfs_zlib_compress;
+extern struct btrfs_compress_op btrfs_lzo_compress;
+
#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9ac17159925..b5baff0dccf 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -105,6 +105,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
/* this also releases the path */
void btrfs_free_path(struct btrfs_path *p)
{
+ if (!p)
+ return;
btrfs_release_path(NULL, p);
kmem_cache_free(btrfs_path_cachep, p);
}
@@ -2514,6 +2516,9 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_assert_tree_locked(path->nodes[1]);
right = read_node_slot(root, upper, slot + 1);
+ if (right == NULL)
+ return 1;
+
btrfs_tree_lock(right);
btrfs_set_lock_blocking(right);
@@ -2764,6 +2769,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_assert_tree_locked(path->nodes[1]);
left = read_node_slot(root, path->nodes[1], slot - 1);
+ if (left == NULL)
+ return 1;
+
btrfs_tree_lock(left);
btrfs_set_lock_blocking(left);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b875d445ea8..2c98b3af605 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -295,6 +295,14 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
#define BTRFS_FSID_SIZE 16
#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
+
+/*
+ * File system states
+ */
+
+/* Errors detected */
+#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
+
#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
@@ -399,13 +407,15 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
#define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
- BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+ BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
+ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO)
/*
* A leaf is full of items. offset and size tell us where to find
@@ -552,9 +562,11 @@ struct btrfs_timespec {
} __attribute__ ((__packed__));
enum btrfs_compression_type {
- BTRFS_COMPRESS_NONE = 0,
- BTRFS_COMPRESS_ZLIB = 1,
- BTRFS_COMPRESS_LAST = 2,
+ BTRFS_COMPRESS_NONE = 0,
+ BTRFS_COMPRESS_ZLIB = 1,
+ BTRFS_COMPRESS_LZO = 2,
+ BTRFS_COMPRESS_TYPES = 2,
+ BTRFS_COMPRESS_LAST = 3,
};
struct btrfs_inode_item {
@@ -598,6 +610,8 @@ struct btrfs_dir_item {
u8 type;
} __attribute__ ((__packed__));
+#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
+
struct btrfs_root_item {
struct btrfs_inode_item inode;
__le64 generation;
@@ -896,7 +910,8 @@ struct btrfs_fs_info {
*/
u64 last_trans_log_full_commit;
u64 open_ioctl_trans;
- unsigned long mount_opt;
+ unsigned long mount_opt:20;
+ unsigned long compress_type:4;
u64 max_inline;
u64 alloc_start;
struct btrfs_transaction *running_transaction;
@@ -1051,6 +1066,9 @@ struct btrfs_fs_info {
unsigned metadata_ratio;
void *bdev_holder;
+
+ /* filesystem state */
+ u64 fs_state;
};
/*
@@ -1894,6 +1912,11 @@ BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
last_snapshot, 64);
+static inline bool btrfs_root_readonly(struct btrfs_root *root)
+{
+ return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY;
+}
+
/* struct btrfs_super_block */
BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
@@ -2146,6 +2169,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
+u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
@@ -2189,6 +2213,12 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
int btrfs_set_block_group_rw(struct btrfs_root *root,
struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
+u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
+int btrfs_error_unpin_extent_range(struct btrfs_root *root,
+ u64 start, u64 end);
+int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes);
+
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
@@ -2542,6 +2572,14 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* super.c */
int btrfs_parse_options(struct btrfs_root *root, char *options);
int btrfs_sync_fs(struct super_block *sb, int wait);
+void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
+ unsigned int line, int errno);
+
+#define btrfs_std_error(fs_info, errno) \
+do { \
+ if ((errno)) \
+ __btrfs_std_error((fs_info), __func__, __LINE__, (errno));\
+} while (0)
/* acl.c */
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 51d2e4de34e..b531c36455d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -44,6 +44,20 @@
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
+static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+ int read_only);
+static int btrfs_destroy_ordered_operations(struct btrfs_root *root);
+static int btrfs_destroy_ordered_extents(struct btrfs_root *root);
+static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
+ struct btrfs_root *root);
+static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
+static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
+static int btrfs_destroy_marked_extents(struct btrfs_root *root,
+ struct extent_io_tree *dirty_pages,
+ int mark);
+static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
+ struct extent_io_tree *pinned_extents);
+static int btrfs_cleanup_transaction(struct btrfs_root *root);
/*
* end_io_wq structs are used to do processing in task context when an IO is
@@ -353,6 +367,10 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
WARN_ON(len == 0);
eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
+ if (eb == NULL) {
+ WARN_ON(1);
+ goto out;
+ }
ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
btrfs_header_generation(eb));
BUG_ON(ret);
@@ -427,6 +445,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
WARN_ON(len == 0);
eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
+ if (eb == NULL) {
+ ret = -EIO;
+ goto out;
+ }
found_start = btrfs_header_bytenr(eb);
if (found_start != start) {
@@ -1145,6 +1167,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
}
btrfs_free_path(path);
if (ret) {
+ kfree(root);
if (ret > 0)
ret = -ENOENT;
return ERR_PTR(ret);
@@ -1713,8 +1736,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
- if (!bh)
+ if (!bh) {
+ err = -EINVAL;
goto fail_iput;
+ }
memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy));
memcpy(&fs_info->super_for_commit, &fs_info->super_copy,
@@ -1727,6 +1752,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!btrfs_super_root(disk_super))
goto fail_iput;
+ /* check FS state, whether FS is broken. */
+ fs_info->fs_state |= btrfs_super_flags(disk_super);
+
+ btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
+
ret = btrfs_parse_options(tree_root, options);
if (ret) {
err = ret;
@@ -1744,10 +1774,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
}
features = btrfs_super_incompat_flags(disk_super);
- if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
- features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
- btrfs_set_super_incompat_flags(disk_super, features);
- }
+ features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
+ if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO)
+ features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
+ btrfs_set_super_incompat_flags(disk_super, features);
features = btrfs_super_compat_ro_flags(disk_super) &
~BTRFS_FEATURE_COMPAT_RO_SUPP;
@@ -1957,7 +1987,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_set_opt(fs_info->mount_opt, SSD);
}
- if (btrfs_super_log_root(disk_super) != 0) {
+ /* do not make disk changes in broken FS */
+ if (btrfs_super_log_root(disk_super) != 0 &&
+ !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
u64 bytenr = btrfs_super_log_root(disk_super);
if (fs_devices->rw_devices == 0) {
@@ -2442,8 +2474,28 @@ int close_ctree(struct btrfs_root *root)
smp_mb();
btrfs_put_block_group_cache(fs_info);
+
+ /*
+ * Here come 2 situations when btrfs is broken to flip readonly:
+ *
+ * 1. when btrfs flips readonly somewhere else before
+ * btrfs_commit_super, sb->s_flags has MS_RDONLY flag,
+ * and btrfs will skip to write sb directly to keep
+ * ERROR state on disk.
+ *
+ * 2. when btrfs flips readonly just in btrfs_commit_super,
+ * and in such case, btrfs cannnot write sb via btrfs_commit_super,
+ * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
+ * btrfs will cleanup all FS resources first and write sb then.
+ */
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
- ret = btrfs_commit_super(root);
+ ret = btrfs_commit_super(root);
+ if (ret)
+ printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
+ }
+
+ if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+ ret = btrfs_error_commit_super(root);
if (ret)
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
}
@@ -2619,6 +2671,352 @@ out:
return 0;
}
+static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
+ int read_only)
+{
+ if (read_only)
+ return;
+
+ if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+ printk(KERN_WARNING "warning: mount fs with errors, "
+ "running btrfsck is recommended\n");
+}
+
+int btrfs_error_commit_super(struct btrfs_root *root)
+{
+ int ret;
+
+ mutex_lock(&root->fs_info->cleaner_mutex);
+ btrfs_run_delayed_iputs(root);
+ mutex_unlock(&root->fs_info->cleaner_mutex);
+
+ down_write(&root->fs_info->cleanup_work_sem);
+ up_write(&root->fs_info->cleanup_work_sem);
+
+ /* cleanup FS via transaction */
+ btrfs_cleanup_transaction(root);
+
+ ret = write_ctree_super(NULL, root, 0);
+
+ return ret;
+}
+
+static int btrfs_destroy_ordered_operations(struct btrfs_root *root)
+{
+ struct btrfs_inode *btrfs_inode;
+ struct list_head splice;
+