aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c1064
1 files changed, 890 insertions, 174 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 55c07b65037..c226daefd65 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -39,12 +39,13 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/mount.h>
+#include <linux/btrfs.h>
+#include <linux/blkdev.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
-#include "ioctl.h"
#include "print-tree.h"
#include "ordered-data.h"
#include "xattr.h"
@@ -54,6 +55,7 @@
#include "locking.h"
#include "free-space-cache.h"
#include "inode-map.h"
+#include "backref.h"
struct btrfs_iget_args {
u64 ino;
@@ -231,8 +233,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
u64 isize = i_size_read(inode);
u64 actual_end = min(end + 1, isize);
u64 inline_len = actual_end - start;
- u64 aligned_end = (end + root->sectorsize - 1) &
- ~((u64)root->sectorsize - 1);
+ u64 aligned_end = ALIGN(end, root->sectorsize);
u64 data_len = inline_len;
int ret;
@@ -265,6 +266,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
return 1;
}
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
btrfs_delalloc_release_metadata(inode, end + 1 - start);
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
return 0;
@@ -389,7 +391,7 @@ again:
* a compressed extent to 128k.
*/
total_compressed = min(total_compressed, max_uncompressed);
- num_bytes = (end - start + blocksize) & ~(blocksize - 1);
+ num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
total_in = 0;
ret = 0;
@@ -488,15 +490,13 @@ cont:
* up to a block size boundary so the allocator does sane
* things
*/
- total_compressed = (total_compressed + blocksize - 1) &
- ~(blocksize - 1);
+ total_compressed = ALIGN(total_compressed, blocksize);
/*
* one last check to make sure the compression is really a
* win, compare the page count read with the blocks on disk
*/
- total_in = (total_in + PAGE_CACHE_SIZE - 1) &
- ~(PAGE_CACHE_SIZE - 1);
+ total_in = ALIGN(total_in, PAGE_CACHE_SIZE);
if (total_compressed >= total_in) {
will_compress = 0;
} else {
@@ -608,7 +608,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
if (list_empty(&async_cow->extents))
return 0;
-
+again:
while (!list_empty(&async_cow->extents)) {
async_extent = list_entry(async_cow->extents.next,
struct async_extent, list);
@@ -648,6 +648,8 @@ retry:
async_extent->ram_size - 1,
btrfs_get_extent,
WB_SYNC_ALL);
+ else if (ret)
+ unlock_page(async_cow->locked_page);
kfree(async_extent);
cond_resched();
continue;
@@ -672,6 +674,7 @@ retry:
if (ret) {
int i;
+
for (i = 0; i < async_extent->nr_pages; i++) {
WARN_ON(async_extent->pages[i]->mapping);
page_cache_release(async_extent->pages[i]);
@@ -679,12 +682,10 @@ retry:
kfree(async_extent->pages);
async_extent->nr_pages = 0;
async_extent->pages = NULL;
- unlock_extent(io_tree, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1);
+
if (ret == -ENOSPC)
goto retry;
- goto out_free; /* JDM: Requeue? */
+ goto out_free;
}
/*
@@ -696,10 +697,13 @@ retry:
async_extent->ram_size - 1, 0);
em = alloc_extent_map();
- BUG_ON(!em); /* -ENOMEM */
+ if (!em)
+ goto out_free_reserve;
em->start = async_extent->start;
em->len = async_extent->ram_size;
em->orig_start = em->start;
+ em->mod_start = em->start;
+ em->mod_len = em->len;
em->block_start = ins.objectid;
em->block_len = ins.offset;
@@ -726,6 +730,9 @@ retry:
async_extent->ram_size - 1, 0);
}
+ if (ret)
+ goto out_free_reserve;
+
ret = btrfs_add_ordered_extent_compress(inode,
async_extent->start,
ins.objectid,
@@ -733,7 +740,8 @@ retry:
ins.offset,
BTRFS_ORDERED_COMPRESSED,
async_extent->compress_type);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ goto out_free_reserve;
/*
* clear dirty, set writeback and unlock the pages.
@@ -754,18 +762,30 @@ retry:
ins.objectid,
ins.offset, async_extent->pages,
async_extent->nr_pages);
-
- BUG_ON(ret); /* -ENOMEM */
alloc_hint = ins.objectid + ins.offset;
kfree(async_extent);
+ if (ret)
+ goto out;
cond_resched();
}
ret = 0;
out:
return ret;
+out_free_reserve:
+ btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
out_free:
+ extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
+ async_extent->start,
+ async_extent->start +
+ async_extent->ram_size - 1,
+ NULL, EXTENT_CLEAR_UNLOCK_PAGE |
+ EXTENT_CLEAR_UNLOCK |
+ EXTENT_CLEAR_DELALLOC |
+ EXTENT_CLEAR_DIRTY |
+ EXTENT_SET_WRITEBACK |
+ EXTENT_END_WRITEBACK);
kfree(async_extent);
- goto out;
+ goto again;
}
static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
@@ -834,7 +854,7 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
BUG_ON(btrfs_is_free_space_inode(inode));
- num_bytes = (end - start + blocksize) & ~(blocksize - 1);
+ num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
disk_num_bytes = num_bytes;
@@ -892,6 +912,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
em->orig_start = em->start;
ram_size = ins.offset;
em->len = ins.offset;
+ em->mod_start = em->start;
+ em->mod_len = em->len;
em->block_start = ins.objectid;
em->block_len = ins.offset;
@@ -1338,6 +1360,8 @@ out_check:
em->block_start = disk_bytenr;
em->orig_block_len = disk_num_bytes;
em->bdev = root->fs_info->fs_devices->latest_bdev;
+ em->mod_start = em->start;
+ em->mod_len = em->len;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
set_bit(EXTENT_FLAG_FILLING, &em->flags);
em->generation = -1;
@@ -1508,14 +1532,22 @@ static void btrfs_set_bit_hook(struct inode *inode,
spin_unlock(&BTRFS_I(inode)->lock);
}
- spin_lock(&root->fs_info->delalloc_lock);
+ __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
+ root->fs_info->delalloc_batch);
+ spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->delalloc_bytes += len;
- root->fs_info->delalloc_bytes += len;
- if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
- list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
- &root->fs_info->delalloc_inodes);
+ if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+ &BTRFS_I(inode)->runtime_flags)) {
+ spin_lock(&root->fs_info->delalloc_lock);
+ if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+ list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
+ &root->fs_info->delalloc_inodes);
+ set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+ &BTRFS_I(inode)->runtime_flags);
+ }
+ spin_unlock(&root->fs_info->delalloc_lock);
}
- spin_unlock(&root->fs_info->delalloc_lock);
+ spin_unlock(&BTRFS_I(inode)->lock);
}
}
@@ -1550,15 +1582,22 @@ static void btrfs_clear_bit_hook(struct inode *inode,
&& do_list)
btrfs_free_reserved_data_space(inode, len);
- spin_lock(&root->fs_info->delalloc_lock);
- root->fs_info->delalloc_bytes -= len;
+ __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
+ root->fs_info->delalloc_batch);
+ spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->delalloc_bytes -= len;
-
if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
- !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
- list_del_init(&BTRFS_I(inode)->delalloc_inodes);
+ test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+ &BTRFS_I(inode)->runtime_flags)) {
+ spin_lock(&root->fs_info->delalloc_lock);
+ if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+ list_del_init(&BTRFS_I(inode)->delalloc_inodes);
+ clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+ &BTRFS_I(inode)->runtime_flags);
+ }
+ spin_unlock(&root->fs_info->delalloc_lock);
}
- spin_unlock(&root->fs_info->delalloc_lock);
+ spin_unlock(&BTRFS_I(inode)->lock);
}
}
@@ -1566,7 +1605,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
* extent_io.c merge_bio_hook, this must check the chunk tree to make sure
* we don't create bios that span stripes or chunks
*/
-int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
+int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags)
{
@@ -1581,7 +1620,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
length = bio->bi_size;
map_length = length;
- ret = btrfs_map_block(root->fs_info, READ, logical,
+ ret = btrfs_map_block(root->fs_info, rw, logical,
&map_length, NULL, 0);
/* Will always return 0 with map_multi == NULL */
BUG_ON(ret < 0);
@@ -1892,6 +1931,640 @@ out:
return ret;
}
+/* snapshot-aware defrag */
+struct sa_defrag_extent_backref {
+ struct rb_node node;
+ struct old_sa_defrag_extent *old;
+ u64 root_id;
+ u64 inum;
+ u64 file_pos;
+ u64 extent_offset;
+ u64 num_bytes;
+ u64 generation;
+};
+
+struct old_sa_defrag_extent {
+ struct list_head list;
+ struct new_sa_defrag_extent *new;
+
+ u64 extent_offset;
+ u64 bytenr;
+ u64 offset;
+ u64 len;
+ int count;
+};
+
+struct new_sa_defrag_extent {
+ struct rb_root root;
+ struct list_head head;
+ struct btrfs_path *path;
+ struct inode *inode;
+ u64 file_pos;
+ u64 len;
+ u64 bytenr;
+ u64 disk_len;
+ u8 compress_type;
+};
+
+static int backref_comp(struct sa_defrag_extent_backref *b1,
+ struct sa_defrag_extent_backref *b2)
+{
+ if (b1->root_id < b2->root_id)
+ return -1;
+ else if (b1->root_id > b2->root_id)
+ return 1;
+
+ if (b1->inum < b2->inum)
+ return -1;
+ else if (b1->inum > b2->inum)
+ return 1;
+
+ if (b1->file_pos < b2->file_pos)
+ return -1;
+ else if (b1->file_pos > b2->file_pos)
+ return 1;
+
+ /*
+ * [------------------------------] ===> (a range of space)
+ * |<--->| |<---->| =============> (fs/file tree A)
+ * |<---------------------------->| ===> (fs/file tree B)
+ *
+ * A range of space can refer to two file extents in one tree while
+ * refer to only one file extent in another tree.
+ *
+ * So we may process a disk offset more than one time(two extents in A)
+ * and locate at the same extent(one extent in B), then insert two same
+ * backrefs(both refer to the extent in B).
+ */
+ return 0;
+}
+
+static void backref_insert(struct rb_root *root,
+ struct sa_defrag_extent_backref *backref)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct sa_defrag_extent_backref *entry;
+ int ret;
+
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
+
+ ret = backref_comp(backref, entry);
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&backref->node, parent, p);
+ rb_insert_color(&backref->node, root);
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
+ void *ctx)
+{
+ struct btrfs_file_extent_item *extent;
+ struct btrfs_fs_info *fs_info;
+ struct old_sa_defrag_extent *old = ctx;
+ struct new_sa_defrag_extent *new = old->new;
+ struct btrfs_path *path = new->path;
+ struct btrfs_key key;
+ struct btrfs_root *root;
+ struct sa_defrag_extent_backref *backref;
+ struct extent_buffer *leaf;
+ struct inode *inode = new->inode;
+ int slot;
+ int ret;
+ u64 extent_offset;
+ u64 num_bytes;
+
+ if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
+ inum == btrfs_ino(inode))
+ return 0;
+
+ key.objectid = root_id;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ fs_info = BTRFS_I(inode)->root->fs_info;
+ root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(root)) {
+ if (PTR_ERR(root) == -ENOENT)
+ return 0;
+ WARN_ON(1);
+ pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
+ inum, offset, root_id);
+ return PTR_ERR(root);
+ }
+
+ key.objectid = inum;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ if (offset > (u64)-1 << 32)
+ key.offset = 0;
+ else
+ key.offset = offset;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ WARN_ON(1);
+ return ret;
+ }
+
+ while (1) {
+ cond_resched();
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ continue;
+ }
+
+ path->slots[0]++;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ if (key.objectid > inum)
+ goto out;
+
+ if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+
+ extent = btrfs_item_ptr(leaf, slot,
+ struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
+ continue;
+
+ extent_offset = btrfs_file_extent_offset(leaf, extent);
+ if (key.offset - extent_offset != offset)
+ continue;
+
+ num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
+ if (extent_offset >= old->extent_offset + old->offset +
+ old->len || extent_offset + num_bytes <=
+ old->extent_offset + old->offset)
+ continue;
+
+ break;
+ }
+
+ backref = kmalloc(sizeof(*backref), GFP_NOFS);
+ if (!backref) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ backref->root_id = root_id;
+ backref->inum = inum;
+ backref->file_pos = offset + extent_offset;
+ backref->num_bytes = num_bytes;
+ backref->extent_offset = extent_offset;
+ backref->generation = btrfs_file_extent_generation(leaf, extent);
+ backref->old = old;
+ backref_insert(&new->root, backref);
+ old->count++;
+out:
+ btrfs_release_path(path);
+ WARN_ON(ret);
+ return ret;
+}
+
+static noinline bool record_extent_backrefs(struct btrfs_path *path,
+ struct new_sa_defrag_extent *new)
+{
+ struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
+ struct old_sa_defrag_extent *old, *tmp;
+ int ret;
+
+ new->path = path;
+
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ ret = iterate_inodes_from_logical(old->bytenr, fs_info,
+ path, record_one_backref,
+ old);
+ BUG_ON(ret < 0 && ret != -ENOENT);
+
+ /* no backref to be processed for this extent */
+ if (!old->count) {
+ list_del(&old->list);
+ kfree(old);
+ }
+ }
+
+ if (list_empty(&new->head))
+ return false;
+
+ return true;
+}
+
+static int relink_is_mergable(struct extent_buffer *leaf,
+ struct btrfs_file_extent_item *fi,
+ u64 disk_bytenr)
+{
+ if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr)
+ return 0;
+
+ if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
+ return 0;
+
+ if (btrfs_file_extent_compression(leaf, fi) ||
+ btrfs_file_extent_encryption(leaf, fi) ||
+ btrfs_file_extent_other_encoding(leaf, fi))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int relink_extent_backref(struct btrfs_path *path,
+ struct sa_defrag_extent_backref *prev,
+ struct sa_defrag_extent_backref *backref)
+{
+ struct btrfs_file_extent_item *extent;
+ struct btrfs_file_extent_item *item;
+ struct btrfs_ordered_extent *ordered;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct old_sa_defrag_extent *old = backref->old;
+ struct new_sa_defrag_extent *new = old->new;
+ struct inode *src_inode = new->inode;
+ struct inode *inode;
+ struct extent_state *cached = NULL;
+ int ret = 0;
+ u64 start;
+ u64 len;
+ u64 lock_start;
+ u64 lock_end;
+ bool merge = false;
+ int index;
+
+ if (prev && prev->root_id == backref->root_id &&
+ prev->inum == backref->inum &&
+ prev->file_pos + prev->num_bytes == backref->file_pos)
+ merge = true;
+
+ /* step 1: get root */
+ key.objectid = backref->root_id;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ fs_info = BTRFS_I(src_inode)->root->fs_info;
+ index = srcu_read_lock(&fs_info->subvol_srcu);
+
+ root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(root)) {
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ if (PTR_ERR(root) == -ENOENT)
+ return 0;
+ return PTR_ERR(root);
+ }
+ if (btrfs_root_refs(&root->root_item) == 0) {
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ /* parse ENOENT to 0 */
+ return 0;
+ }
+
+ /* step 2: get inode */
+ key.objectid = backref->inum;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ if (IS_ERR(inode)) {
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ return 0;
+ }
+
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+
+ /* step 3: relink backref */
+ lock_start = backref->file_pos;
+ lock_end = backref->file_pos + backref->num_bytes - 1;
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
+ 0, &cached);
+
+ ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
+ if (ordered) {
+ btrfs_put_ordered_extent(ordered);
+ goto out_unlock;
+ }
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out_unlock;
+ }
+
+ key.objectid = backref->inum;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = backref->file_pos;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out_free_path;
+ } else if (ret > 0) {
+ ret = 0;
+ goto out_free_path;
+ }
+
+ extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_generation(path->nodes[0], extent) !=
+ backref->generation)
+ goto out_free_path;
+
+ btrfs_release_path(path);
+
+ start = backref->file_pos;
+ if (backref->extent_offset < old->extent_offset + old->offset)
+ start += old->extent_offset + old->offset -
+ backref->extent_offset;
+
+ len = min(backref->extent_offset + backref->num_bytes,
+ old->extent_offset + old->offset + old->len);
+ len -= max(backref->extent_offset, old->extent_offset + old->offset);
+
+ ret = btrfs_drop_extents(trans, root, inode, start,
+ start + len, 1);
+ if (ret)
+ goto out_free_path;
+again:
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = start;
+
+ if (merge) {
+ struct btrfs_file_extent_item *fi;
+ u64 extent_len;
+ struct btrfs_key found_key;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+ if (ret < 0)
+ goto out_free_path;
+
+ path->slots[0]--;
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_len = btrfs_file_extent_num_bytes(leaf, fi);
+
+ if (relink_is_mergable(leaf, fi, new->bytenr) &&
+ extent_len + found_key.offset == start) {
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_len + len);
+ btrfs_mark_buffer_dirty(leaf);
+ inode_add_bytes(inode, len);
+
+ ret = 1;
+ goto out_free_path;
+ } else {
+ merge = false;
+ btrfs_release_path(path);
+ goto again;
+ }
+ }
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ sizeof(*extent));
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_free_path;
+ }
+
+ leaf = path->nodes[0];
+ item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
+ btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
+ btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
+ btrfs_set_file_extent_num_bytes(leaf, item, len);
+ btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
+ btrfs_set_file_extent_generation(leaf, item, trans->transid);
+ btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
+ btrfs_set_file_extent_compression(leaf, item, new->compress_type);
+ btrfs_set_file_extent_encryption(leaf, item, 0);
+ btrfs_set_file_extent_other_encoding(leaf, item, 0);
+
+ btrfs_mark_buffer_dirty(leaf);
+ inode_add_bytes(inode, len);
+
+ ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
+ new->disk_len, 0,
+ backref->root_id, backref->inum,
+ new->file_pos, 0); /* start - extent_offset */
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_free_path;
+ }
+
+ ret = 1;
+out_free_path:
+ btrfs_release_path(path);
+ btrfs_end_transaction(trans, root);
+out_unlock:
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
+ &cached, GFP_NOFS);
+ iput(inode);
+ return ret;
+}
+
+static void relink_file_extents(struct new_sa_defrag_extent *new)
+{
+ struct btrfs_path *path;
+ struct old_sa_defrag_extent *old, *tmp;
+ struct sa_defrag_extent_backref *backref;
+ struct sa_defrag_extent_backref *prev = NULL;
+ struct inode *inode;
+ struct btrfs_root *root;
+ struct rb_node *node;
+ int ret;
+
+ inode = new->inode;
+ root = BTRFS_I(inode)->root;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return;
+
+ if (!record_extent_backrefs(path, new)) {
+ btrfs_free_path(path);
+ goto out;
+ }
+ btrfs_release_path(path);
+
+ while (1) {
+ node = rb_first(&new->root);
+ if (!node)
+ break;
+ rb_erase(node, &new->root);
+
+ backref = rb_entry(node, struct sa_defrag_extent_backref, node);
+
+ ret = relink_extent_backref(path, prev, backref);
+ WARN_ON(ret < 0);
+
+ kfree(prev);
+
+ if (ret == 1)
+ prev = backref;
+ else
+ prev = NULL;
+ cond_resched();
+ }
+ kfree(prev);
+
+ btrfs_free_path(path);
+
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
+out:
+ atomic_dec(&root->fs_info->defrag_running);
+ wake_up(&root->fs_info->transaction_wait);
+
+ kfree(new);
+}
+
+static struct new_sa_defrag_extent *
+record_old_file_extents(struct inode *inode,
+ struct btrfs_ordered_extent *ordered)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct old_sa_defrag_extent *old, *tmp;
+ struct new_sa_defrag_extent *new;
+ int ret;
+
+ new = kmalloc(sizeof(*new), GFP_NOFS);
+ if (!new)
+ return NULL;
+
+ new->inode = inode;
+ new->file_pos = ordered->file_offset;
+ new->len = ordered->len;
+ new->bytenr = ordered->start;
+ new->disk_len = ordered->disk_len;
+ new->compress_type = ordered->compress_type;
+ new->root = RB_ROOT;
+ INIT_LIST_HEAD(&new->head);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ goto out_kfree;
+
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = new->file_pos;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out_free_path;
+ if (ret > 0 && path->slots[0] > 0)
+ path->slots[0]--;
+
+ /* find out all the old extents for the file range */
+ while (1) {
+ struct btrfs_file_extent_item *extent;
+ struct extent_buffer *l;
+ int slot;
+ u64 num_bytes;
+ u64 offset;
+ u64 end;
+ u64 disk_bytenr;
+ u64 extent_offset;
+
+ l = path->nodes[0];
+ slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(l)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out_free_list;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(l, &key, slot);
+
+ if (key.objectid != btrfs_ino(inode))
+ break;
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ break;
+ if (key.offset >= new->file_pos + new->len)
+ break;
+
+ extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
+
+ num_bytes = btrfs_file_extent_num_bytes(l, extent);
+ if (key.offset + num_bytes < new->file_pos)
+ goto next;
+
+ disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent);
+ if (!disk_bytenr)
+ goto next;
+
+ extent_offset = btrfs_file_extent_offset(l, extent);
+
+ old = kmalloc(sizeof(*old), GFP_NOFS);
+ if (!old)
+ goto out_free_list;
+
+ offset = max(new->file_pos, key.offset);
+ end = min(new->file_pos + new->len, key.offset + num_bytes);
+
+ old->bytenr = disk_bytenr;
+ old->extent_offset = extent_offset;
+ old->offset = offset - key.offset;
+ old->len = end - offset;
+ old->new = new;
+ old->count = 0;
+ list_add_tail(&old->list, &new->head);
+next:
+ path->slots[0]++;
+ cond_resched();
+ }
+
+ btrfs_free_path(path);
+ atomic_inc(&root->fs_info->defrag_running);
+
+ return new;
+
+out_free_list:
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
+out_free_path:
+ btrfs_free_path(path);
+out_kfree:
+ kfree(new);
+ return NULL;
+}
+
/*
* helper function for btrfs_finish_ordered_io, this
* just reads in some of the csum leaves to prime them into ram
@@ -1909,6 +2582,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
struct btrfs_trans_handle *trans = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_state *cached_state = NULL;
+ struct new_sa_defrag_extent *new = NULL;
int compress_type = 0;
int ret;
bool nolock;
@@ -1943,6 +2617,20 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
ordered_extent->file_offset + ordered_extent->len - 1,
0, &cached_state);
+ ret = test_range_bit(io_tree, ordered_extent->file_offset,
+ ordered_extent->file_offset + ordered_extent->len - 1,
+ EXTENT_DEFRAG, 1, cached_state);
+ if (ret) {
+ u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
+ if (last_snapshot >= BTRFS_I(inode)->generation)
+ /* the inode is shared */
+ new = record_old_file_extents(inode, ordered_extent);
+
+ clear_extent_bit(io_tree, ordered_extent->file_offset,
+ ordered_extent->file_offset + ordered_extent->len - 1,
+ EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS);
+ }
+
if (nolock)
trans = btrfs_join_transaction_nolock(root);
else
@@ -2001,17 +2689,33 @@ out:
if (trans)
btrfs_end_transaction(trans, root);
- if (ret)
+ if (ret) {
clear_extent_uptodate(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len - 1, NULL, GFP_NOFS);
+ /*
+ * If the ordered extent had an IOERR or something else went
+ * wrong we need to return the space for this ordered extent
+ * back to the allocator.
+ */
+ if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
+ !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
+ btrfs_free_reserved_extent(root, ordered_extent->start,
+ ordered_extent->disk_len);
+ }
+
+
/*
* This needs to be done to make sure anybody waiting knows we are done
* updating everything for this ordered extent.
*/
btrfs_remove_ordered_extent(inode, ordered_extent);
+ /* for snapshot-aware defrag */
+ if (new)
+ relink_file_extents(new);
+
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */
@@ -2062,7 +2766,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state, int mirror)
{
- size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
+ size_t offset = start - page_offset(page);
struct inode *inode = page->mapping->host;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
char *kaddr;
@@ -2167,11 +2871,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
}
}
-enum btrfs_orphan_cleanup_state {
- ORPHAN_CLEANUP_STARTED = 1,
- ORPHAN_CLEANUP_DONE = 2,
-};
-
/*
* This is called in transaction commit time. If there are no orphan
* files in the subvolume, it removes orphan item and frees block_rsv
@@ -2469,6 +3168,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
*/
set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags);
+ atomic_inc(&root->orphan_inodes);
/* if we have links, this was a truncate, lets do that */
if (inode->i_nlink) {
@@ -2491,6 +3191,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
goto out;
ret = btrfs_truncate(inode);
+ if (ret)
+ btrfs_orphan_del(NULL, inode);
} else {
nr_unlink++;
}
@@ -2709,34 +3411,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *item,
struct inode *inode)
{
- btrfs_set_inode_uid(leaf, item, i_uid_read(inode));
- btrfs_set_inode_gid(leaf, item, i_gid_read(inode));
- btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
- btrfs_set_inode_mode(leaf, item, inode->i_mode);
- btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
+
+ btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
+ btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
+ btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
+ &token);
+ btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
+ btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
- btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
- inode->i_atime.tv_sec);
- btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
- inode->i_atime.tv_nsec);
+ btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
+ inode->i_atime.tv_sec, &token);
+ btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
+ inode->i_atime.tv_nsec, &token);
- btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
- inode->i_mtime.tv_sec);
- btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
- inode->i_mtime.tv_nsec);
+ btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
+ inode->i_mtime.tv_sec, &token);
+ btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
+ inode->i_mtime.tv_nsec, &token);
- btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
- inode->i_ctime.tv_sec);
- btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
- inode->i_ctime.tv_nsec);
+ btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
+ inode->i_ctime.tv_sec, &token);
+ btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
+ inode->i_ctime.tv_nsec, &token);
- btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
- btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
- btrfs_set_inode_sequence(leaf, item, inode->i_version);
- btrfs_set_inode_transid(leaf, item, trans->transid);
- btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
- btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
- btrfs_set_inode_block_group(leaf, item, 0);
+ btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
+ &token);
+ btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
+ &token);
+ btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
+ btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
+ btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
+ btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
+ btrfs_set_token_inode_block_group(leaf, item, 0, &token);
}
/*
@@ -3304,7 +4013,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u64 extent_num_bytes = 0;
u64 extent_offset = 0;
u64 item_end = 0;
- u64 mask = root->sectorsize - 1;
u32 found_type = (u8)-1;
int found_extent;
int del_item;
@@ -3328,7 +4036,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
* extent just the way it is.
*/
if (root->ref_cows || root == root->fs_info->tree_root)
- btrfs_drop_extent_cache(inode, (new_size + mask) & (~mask), (u64)-1, 0);
+ btrfs_drop_extent_cache(inode, ALIGN(new_size,
+ root->sectorsize), (u64)-1, 0);
/*
* This function is also used to drop the items in the log tree before
@@ -3407,10 +4116,9 @@ search_again:
if (!del_item) {
u64 orig_num_bytes =
btrfs_file_extent_num_bytes(leaf, fi);
- extent_num_bytes = new_size -
- found_key.offset + root->sectorsize - 1;
- extent_num_bytes = extent_num_bytes &
- ~((u64)root->sectorsize - 1);
+ extent_num_bytes = ALIGN(new_size -
+ found_key.offset,
+ root->sectorsize);
btrfs_set_file_extent_num_bytes(leaf, fi,
extent_num_bytes);
num_dec = (orig_num_bytes -
@@ -3646,9 +4354,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- u64 mask = root->sectorsize - 1;
- u64 hole_start = (oldsize + mask) & ~mask;
- u64 block_end = (size + mask) & ~mask;
+ u64 hole_start = ALIGN(oldsize, root->sectorsize);
+ u64 block_end = ALIGN(size, root->sectorsize);
u64 last_byte;
u64 cur_offset;
u64 hole_size;
@@ -3681,7 +4388,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
break;
}
last_byte = min(extent_map_end(em), block_end);
- last_byte = (last_byte + mask) & ~mask;
+ last_byte = ALIGN(last_byte , root->sectorsize);
if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
struct extent_map *h