aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c2233
1 files changed, 1748 insertions, 485 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a29f19384a2..9e1f2cd5e67 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -18,13 +18,14 @@
#include <linux/sched.h>
#include <linux/slab.h>
-#include "ctree.h"
-#include "transaction.h"
+#include <linux/blkdev.h>
+#include <linux/list_sort.h>
+#include "tree-log.h"
#include "disk-io.h"
#include "locking.h"
#include "print-tree.h"
-#include "compat.h"
-#include "tree-log.h"
+#include "backref.h"
+#include "hash.h"
/* magic values for the inode_only field in btrfs_log_inode:
*
@@ -89,7 +90,8 @@
*/
#define LOG_WALK_PIN_ONLY 0
#define LOG_WALK_REPLAY_INODES 1
-#define LOG_WALK_REPLAY_ALL 2
+#define LOG_WALK_REPLAY_DIR_INDEX 2
+#define LOG_WALK_REPLAY_ALL 3
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
@@ -132,43 +134,61 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
* syncing the tree wait for us to finish
*/
static int start_log_trans(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
+ struct btrfs_root *root,
+ struct btrfs_log_ctx *ctx)
{
+ int index;
int ret;
- int err = 0;
mutex_lock(&root->log_mutex);
if (root->log_root) {
+ if (btrfs_need_log_full_commit(root->fs_info, trans)) {
+ ret = -EAGAIN;
+ goto out;
+ }
if (!root->log_start_pid) {
root->log_start_pid = current->pid;
- root->log_multiple_pids = false;
+ clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
} else if (root->log_start_pid != current->pid) {
- root->log_multiple_pids = true;
+ set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
}
- root->log_batch++;
+ atomic_inc(&root->log_batch);
atomic_inc(&root->log_writers);
+ if (ctx) {
+ index = root->log_transid % 2;
+ list_add_tail(&ctx->list, &root->log_ctxs[index]);
+ ctx->log_transid = root->log_transid;
+ }
mutex_unlock(&root->log_mutex);
return 0;
}
- root->log_multiple_pids = false;
- root->log_start_pid = current->pid;
+
+ ret = 0;
mutex_lock(&root->fs_info->tree_log_mutex);
- if (!root->fs_info->log_root_tree) {
+ if (!root->fs_info->log_root_tree)
ret = btrfs_init_log_root_tree(trans, root->fs_info);
- if (ret)
- err = ret;
- }
- if (err == 0 && !root->log_root) {
+ mutex_unlock(&root->fs_info->tree_log_mutex);
+ if (ret)
+ goto out;
+
+ if (!root->log_root) {
ret = btrfs_add_log_tree(trans, root);
if (ret)
- err = ret;
+ goto out;
}
- mutex_unlock(&root->fs_info->tree_log_mutex);
- root->log_batch++;
+ clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
+ root->log_start_pid = current->pid;
+ atomic_inc(&root->log_batch);
atomic_inc(&root->log_writers);
+ if (ctx) {
+ index = root->log_transid % 2;
+ list_add_tail(&ctx->list, &root->log_ctxs[index]);
+ ctx->log_transid = root->log_transid;
+ }
+out:
mutex_unlock(&root->log_mutex);
- return err;
+ return ret;
}
/*
@@ -212,14 +232,13 @@ int btrfs_pin_log_trans(struct btrfs_root *root)
* indicate we're done making changes to the log tree
* and wake up anyone waiting to do a sync
*/
-int btrfs_end_log_trans(struct btrfs_root *root)
+void btrfs_end_log_trans(struct btrfs_root *root)
{
if (atomic_dec_and_test(&root->log_writers)) {
smp_mb();
if (waitqueue_active(&root->log_writer_wait))
wake_up(&root->log_writer_wait);
}
- return 0;
}
@@ -275,17 +294,31 @@ static int process_one_buffer(struct btrfs_root *log,
struct extent_buffer *eb,
struct walk_control *wc, u64 gen)
{
+ int ret = 0;
+
+ /*
+ * If this fs is mixed then we need to be able to process the leaves to
+ * pin down any logged extents, so we have to read the block.
+ */
+ if (btrfs_fs_incompat(log->fs_info, MIXED_GROUPS)) {
+ ret = btrfs_read_buffer(eb, gen);
+ if (ret)
+ return ret;
+ }
+
if (wc->pin)
- btrfs_pin_extent(log->fs_info->extent_root,
- eb->start, eb->len, 0);
+ ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
+ eb->start, eb->len);
- if (btrfs_buffer_uptodate(eb, gen)) {
+ if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
+ if (wc->pin && btrfs_header_level(eb) == 0)
+ ret = btrfs_exclude_logged_extents(log, eb);
if (wc->write)
btrfs_write_tree_block(eb);
if (wc->wait)
btrfs_wait_tree_block_writeback(eb);
}
- return 0;
+ return ret;
}
/*
@@ -315,6 +348,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
unsigned long src_ptr;
unsigned long dst_ptr;
int overwrite_root = 0;
+ bool inode_item = key->type == BTRFS_INODE_ITEM_KEY;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
overwrite_root = 1;
@@ -324,6 +358,9 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
/* look for the key in the destination tree */
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
if (ret == 0) {
char *src_copy;
char *dst_copy;
@@ -333,11 +370,17 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
goto insert;
if (item_size == 0) {
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
return 0;
}
dst_copy = kmalloc(item_size, GFP_NOFS);
src_copy = kmalloc(item_size, GFP_NOFS);
+ if (!dst_copy || !src_copy) {
+ btrfs_release_path(path);
+ kfree(dst_copy);
+ kfree(src_copy);
+ return -ENOMEM;
+ }
read_extent_buffer(eb, src_copy, src_ptr, item_size);
@@ -355,13 +398,57 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
* sync
*/
if (ret == 0) {
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
return 0;
}
+ /*
+ * We need to load the old nbytes into the inode so when we
+ * replay the extents we've logged we get the right nbytes.
+ */
+ if (inode_item) {
+ struct btrfs_inode_item *item;
+ u64 nbytes;
+ u32 mode;
+
+ item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ nbytes = btrfs_inode_nbytes(path->nodes[0], item);
+ item = btrfs_item_ptr(eb, slot,
+ struct btrfs_inode_item);
+ btrfs_set_inode_nbytes(eb, item, nbytes);
+
+ /*
+ * If this is a directory we need to reset the i_size to
+ * 0 so that we can set it up properly when replaying
+ * the rest of the items in this log.
+ */
+ mode = btrfs_inode_mode(eb, item);
+ if (S_ISDIR(mode))
+ btrfs_set_inode_size(eb, item, 0);
+ }
+ } else if (inode_item) {
+ struct btrfs_inode_item *item;
+ u32 mode;
+
+ /*
+ * New inode, set nbytes to 0 so that the nbytes comes out
+ * properly when we replay the extents.
+ */
+ item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
+ btrfs_set_inode_nbytes(eb, item, 0);
+
+ /*
+ * If this is a directory we need to reset the i_size to 0 so
+ * that we can set it up properly when replaying the rest of
+ * the items in this log.
+ */
+ mode = btrfs_inode_mode(eb, item);
+ if (S_ISDIR(mode))
+ btrfs_set_inode_size(eb, item, 0);
}
insert:
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
/* try to insert the key into the destination tree */
ret = btrfs_insert_empty_item(trans, root, path,
key, item_size);
@@ -371,13 +458,11 @@ insert:
u32 found_size;
found_size = btrfs_item_size_nr(path->nodes[0],
path->slots[0]);
- if (found_size > item_size) {
- btrfs_truncate_item(trans, root, path, item_size, 1);
- } else if (found_size < item_size) {
- ret = btrfs_extend_item(trans, root, path,
- item_size - found_size);
- BUG_ON(ret);
- }
+ if (found_size > item_size)
+ btrfs_truncate_item(root, path, item_size, 1);
+ else if (found_size < item_size)
+ btrfs_extend_item(root, path,
+ item_size - found_size);
} else if (ret) {
return ret;
}
@@ -432,7 +517,7 @@ insert:
}
no_copy:
btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
return 0;
}
@@ -478,11 +563,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_key *key)
{
int found_type;
- u64 mask = root->sectorsize - 1;
u64 extent_end;
- u64 alloc_hint;
u64 start = key->offset;
- u64 saved_nbytes;
+ u64 nbytes = 0;
struct btrfs_file_extent_item *item;
struct inode *inode = NULL;
unsigned long size;
@@ -492,11 +575,20 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
found_type = btrfs_file_extent_type(eb, item);
if (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC)
- extent_end = start + btrfs_file_extent_num_bytes(eb, item);
- else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
- size = btrfs_file_extent_inline_len(eb, item);
- extent_end = (start + size + mask) & ~mask;
+ found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ nbytes = btrfs_file_extent_num_bytes(eb, item);
+ extent_end = start + nbytes;
+
+ /*
+ * We don't add to the inodes nbytes if we are prealloc or a
+ * hole.
+ */
+ if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
+ nbytes = 0;
+ } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+ size = btrfs_file_extent_inline_len(eb, slot, item);
+ nbytes = btrfs_file_extent_ram_bytes(eb, item);
+ extent_end = ALIGN(start + size, root->sectorsize);
} else {
ret = 0;
goto out;
@@ -513,7 +605,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
* file. This must be done before the btrfs_drop_extents run
* so we don't try to drop this extent.
*/
- ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
+ ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
start, 0);
if (ret == 0 &&
@@ -538,17 +630,16 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
* we don't have to do anything
*/
if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) {
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
goto out;
}
}
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
- saved_nbytes = inode_get_bytes(inode);
/* drop any overlapping extents */
- ret = btrfs_drop_extents(trans, inode, start, extent_end,
- &alloc_hint, 1);
- BUG_ON(ret);
+ ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1);
+ if (ret)
+ goto out;
if (found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
@@ -558,7 +649,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, root, path, key,
sizeof(*item));
- BUG_ON(ret);
+ if (ret)
+ goto out;
dest_offset = btrfs_item_ptr_offset(path->nodes[0],
path->slots[0]);
copy_extent_buffer(path->nodes[0], eb, dest_offset,
@@ -583,7 +675,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_inc_extent_ref(trans, root,
ins.objectid, ins.offset,
0, root->root_key.objectid,
- key->objectid, offset);
+ key->objectid, offset, 0);
+ if (ret)
+ goto out;
} else {
/*
* insert the extent pointer in the extent
@@ -592,9 +686,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_alloc_logged_file_extent(trans,
root, root->root_key.objectid,
key->objectid, offset, &ins);
- BUG_ON(ret);
+ if (ret)
+ goto out;
}
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
if (btrfs_file_extent_compression(eb, item)) {
csum_start = ins.objectid;
@@ -608,31 +703,35 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_csums_range(root->log_root,
csum_start, csum_end - 1,
- &ordered_sums);
- BUG_ON(ret);
+ &ordered_sums, 0);
+ if (ret)
+ goto out;
while (!list_empty(&ordered_sums)) {
struct btrfs_ordered_sum *sums;
sums = list_entry(ordered_sums.next,
struct btrfs_ordered_sum,
list);
- ret = btrfs_csum_file_blocks(trans,
+ if (!ret)
+ ret = btrfs_csum_file_blocks(trans,
root->fs_info->csum_root,
sums);
- BUG_ON(ret);
list_del(&sums->list);
kfree(sums);
}
+ if (ret)
+ goto out;
} else {
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
}
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
/* inline extents are easy, we just overwrite them */
ret = overwrite_item(trans, root, path, eb, slot, key);
- BUG_ON(ret);
+ if (ret)
+ goto out;
}
- inode_set_bytes(inode, saved_nbytes);
- btrfs_update_inode(trans, root, inode);
+ inode_add_bytes(inode, nbytes);
+ ret = btrfs_update_inode(trans, root, inode);
out:
if (inode)
iput(inode);
@@ -665,19 +764,29 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
btrfs_dir_item_key_to_cpu(leaf, di, &location);
name_len = btrfs_dir_name_len(leaf, di);
name = kmalloc(name_len, GFP_NOFS);
+ if (!name)
+ return -ENOMEM;
+
read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
inode = read_one_inode(root, location.objectid);
- BUG_ON(!inode);
+ if (!inode) {
+ ret = -EIO;
+ goto out;
+ }
ret = link_to_fixup_dir(trans, root, path, location.objectid);
- BUG_ON(ret);
+ if (ret)
+ goto out;
ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
- BUG_ON(ret);
+ if (ret)
+ goto out;
+ else
+ ret = btrfs_run_delayed_items(trans, root);
+out:
kfree(name);
-
iput(inode);
return ret;
}
@@ -704,7 +813,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
goto out;
} else
goto out;
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
if (di && !IS_ERR(di)) {
@@ -715,7 +824,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
goto out;
match = 1;
out:
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
return match;
}
@@ -731,6 +840,7 @@ out:
*/
static noinline int backref_in_log(struct btrfs_root *log,
struct btrfs_key *key,
+ u64 ref_objectid,
char *name, int namelen)
{
struct btrfs_path *path;
@@ -744,12 +854,24 @@ static noinline int backref_in_log(struct btrfs_root *log,
int match = 0;
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
if (ret != 0)
goto out;
- item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
+
+ if (key->type == BTRFS_INODE_EXTREF_KEY) {
+ if (btrfs_find_name_in_ext_backref(path, ref_objectid,
+ name, namelen, NULL))
+ match = 1;
+
+ goto out;
+ }
+
+ item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
ptr_end = ptr + item_size;
while (ptr < ptr_end) {
ref = (struct btrfs_inode_ref *)ptr;
@@ -770,84 +892,42 @@ out:
return match;
}
-
-/*
- * replay one inode back reference item found in the log tree.
- * eb, slot and key refer to the buffer and key found in the log tree.
- * root is the destination we are replaying into, and path is for temp
- * use by this function. (it should be released on return).
- */
-static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
+static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct btrfs_root *log,
struct btrfs_path *path,
- struct extent_buffer *eb, int slot,
- struct btrfs_key *key)
+ struct btrfs_root *log_root,
+ struct inode *dir, struct inode *inode,
+ struct extent_buffer *eb,
+ u64 inode_objectid, u64 parent_objectid,
+ u64 ref_index, char *name, int namelen,
+ int *search_done)
{
- struct inode *dir;
int ret;
- struct btrfs_inode_ref *ref;
+ char *victim_name;
+ int victim_name_len;
+ struct extent_buffer *leaf;
struct btrfs_dir_item *di;
- struct inode *inode;
- char *name;
- int namelen;
- unsigned long ref_ptr;
- unsigned long ref_end;
-
- /*
- * it is possible that we didn't log all the parent directories
- * for a given inode. If we don't find the dir, just don't
- * copy the back ref in. The link count fixup code will take
- * care of the rest
- */
- dir = read_one_inode(root, key->offset);
- if (!dir)
- return -ENOENT;
-
- inode = read_one_inode(root, key->objectid);
- BUG_ON(!inode);
-
- ref_ptr = btrfs_item_ptr_offset(eb, slot);
- ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
+ struct btrfs_key search_key;
+ struct btrfs_inode_extref *extref;
again:
- ref = (struct btrfs_inode_ref *)ref_ptr;
-
- namelen = btrfs_inode_ref_name_len(eb, ref);
- name = kmalloc(namelen, GFP_NOFS);
- BUG_ON(!name);
-
- read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen);
-
- /* if we already have a perfect match, we're done */
- if (inode_in_dir(root, path, dir->i_ino, inode->i_ino,
- btrfs_inode_ref_index(eb, ref),
- name, namelen)) {
- goto out;
- }
-
- /*
- * look for a conflicting back reference in the metadata.
- * if we find one we have to unlink that name of the file
- * before we add our new link. Later on, we overwrite any
- * existing back reference, and we don't want to create
- * dangling pointers in the directory.
- */
-conflict_again:
- ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+ /* Search old style refs */
+ search_key.objectid = inode_objectid;
+ search_key.type = BTRFS_INODE_REF_KEY;
+ search_key.offset = parent_objectid;
+ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
if (ret == 0) {
- char *victim_name;
- int victim_name_len;
struct btrfs_inode_ref *victim_ref;
unsigned long ptr;
unsigned long ptr_end;
- struct extent_buffer *leaf = path->nodes[0];
+
+ leaf = path->nodes[0];
/* are we trying to overwrite a back ref for the root directory
* if so, just jump out, we're done
*/
- if (key->objectid == key->offset)
- goto out_nowrite;
+ if (search_key.objectid == search_key.offset)
+ return 1;
/* check all the names in this back reference to see
* if they are in the log. if so, we allow them to stay
@@ -860,114 +940,385 @@ conflict_again:
victim_name_len = btrfs_inode_ref_name_len(leaf,
victim_ref);
victim_name = kmalloc(victim_name_len, GFP_NOFS);
- BUG_ON(!victim_name);
+ if (!victim_name)
+ return -ENOMEM;
read_extent_buffer(leaf, victim_name,
(unsigned long)(victim_ref + 1),
victim_name_len);
- if (!backref_in_log(log, key, victim_name,
+ if (!backref_in_log(log_root, &search_key,
+ parent_objectid,
+ victim_name,
victim_name_len)) {
- btrfs_inc_nlink(inode);
- btrfs_release_path(root, path);
+ inc_nlink(inode);
+ btrfs_release_path(path);
ret = btrfs_unlink_inode(trans, root, dir,
inode, victim_name,
victim_name_len);
kfree(victim_name);
- btrfs_release_path(root, path);
- goto conflict_again;
+ if (ret)
+ return ret;
+ ret = btrfs_run_delayed_items(trans, root);
+ if (ret)
+ return ret;
+ *search_done = 1;
+ goto again;
}
kfree(victim_name);
+
ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
}
- BUG_ON(ret);
+
+ /*
+ * NOTE: we have searched root tree and checked the
+ * coresponding ref, it does not need to check again.
+ */
+ *search_done = 1;
+ }
+ btrfs_release_path(path);
+
+ /* Same search but for extended refs */
+ extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
+ inode_objectid, parent_objectid, 0,
+ 0);
+ if (!IS_ERR_OR_NULL(extref)) {
+ u32 item_size;
+ u32 cur_offset = 0;
+ unsigned long base;
+ struct inode *victim_parent;
+
+ leaf = path->nodes[0];
+
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ base = btrfs_item_ptr_offset(leaf, path->slots[0]);
+
+ while (cur_offset < item_size) {
+ extref = (struct btrfs_inode_extref *)base + cur_offset;
+
+ victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
+
+ if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
+ goto next;
+
+ victim_name = kmalloc(victim_name_len, GFP_NOFS);
+ if (!victim_name)
+ return -ENOMEM;
+ read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
+ victim_name_len);
+
+ search_key.objectid = inode_objectid;
+ search_key.type = BTRFS_INODE_EXTREF_KEY;
+ search_key.offset = btrfs_extref_hash(parent_objectid,
+ victim_name,
+ victim_name_len);
+ ret = 0;
+ if (!backref_in_log(log_root, &search_key,
+ parent_objectid, victim_name,
+ victim_name_len)) {
+ ret = -ENOENT;
+ victim_parent = read_one_inode(root,
+ parent_objectid);
+ if (victim_parent) {
+ inc_nlink(inode);
+ btrfs_release_path(path);
+
+ ret = btrfs_unlink_inode(trans, root,
+ victim_parent,
+ inode,
+ victim_name,
+ victim_name_len);
+ if (!ret)
+ ret = btrfs_run_delayed_items(
+ trans, root);
+ }
+ iput(victim_parent);
+ kfree(victim_name);
+ if (ret)
+ return ret;
+ *search_done = 1;
+ goto again;
+ }
+ kfree(victim_name);
+ if (ret)
+ return ret;
+next:
+ cur_offset += victim_name_len + sizeof(*extref);
+ }
+ *search_done = 1;
}
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
/* look for a conflicting sequence number */
- di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
- btrfs_inode_ref_index(eb, ref),
- name, namelen, 0);
+ di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
+ ref_index, name, namelen, 0);
if (di && !IS_ERR(di)) {
ret = drop_one_dir_item(trans, root, path, dir, di);
- BUG_ON(ret);
+ if (ret)
+ return ret;
}
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
-
- /* look for a conflicting name */
- di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
+ /* look for a conflicing name */
+ di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
name, namelen, 0);
if (di && !IS_ERR(di)) {
ret = drop_one_dir_item(trans, root, path, dir, di);
- BUG_ON(ret);
+ if (ret)
+ return ret;
}
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
- /* insert our name */
- ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
- btrfs_inode_ref_index(eb, ref));
- BUG_ON(ret);
+ return 0;
+}
- btrfs_update_inode(trans, root, inode);
+static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
+ u32 *namelen, char **name, u64 *index,
+ u64 *parent_objectid)
+{
+ struct btrfs_inode_extref *extref;
-out:
- ref_ptr = (unsigned long)(ref + 1) + namelen;
- kfree(name);
- if (ref_ptr < ref_end)
- goto again;
+ extref = (struct btrfs_inode_extref *)ref_ptr;
+
+ *namelen = btrfs_inode_extref_name_len(eb, extref);
+ *name = kmalloc(*namelen, GFP_NOFS);
+ if (*name == NULL)
+ return -ENOMEM;
+
+ read_extent_buffer(eb, *name, (unsigned long)&extref->name,
+ *namelen);
+
+ *index = btrfs_inode_extref_index(eb, extref);
+ if (parent_objectid)
+ *parent_objectid = btrfs_inode_extref_parent(eb, extref);
+
+ return 0;
+}
+
+static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
+ u32 *namelen, char **name, u64 *index)
+{
+ struct btrfs_inode_ref *ref;
+
+ ref = (struct btrfs_inode_ref *)ref_ptr;
+
+ *namelen = btrfs_inode_ref_name_len(eb, ref);
+ *name = kmalloc(*namelen, GFP_NOFS);
+ if (*name == NULL)
+ return -ENOMEM;
+
+ read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen);
+
+ *index = btrfs_inode_ref_index(eb, ref);
+
+ return 0;
+}
+
+/*
+ * replay one inode back reference item found in the log tree.
+ * eb, slot and key refer to the buffer and key found in the log tree.
+ * root is the destination we are replaying into, and path is for temp
+ * use by this function. (it should be released on return).
+ */
+static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_root *log,
+ struct btrfs_path *path,
+ struct extent_buffer *eb, int slot,
+ struct btrfs_key *key)
+{
+ struct inode *dir = NULL;
+ struct inode *inode = NULL;
+ unsigned long ref_ptr;
+ unsigned long ref_end;
+ char *name = NULL;
+ int namelen;
+ int ret;
+ int search_done = 0;
+ int log_ref_ver = 0;
+ u64 parent_objectid;
+ u64 inode_objectid;
+ u64 ref_index = 0;
+ int ref_struct_size;
+
+ ref_ptr = btrfs_item_ptr_offset(eb, slot);
+ ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
+
+ if (key->type == BTRFS_INODE_EXTREF_KEY) {
+ struct btrfs_inode_extref *r;
+
+ ref_struct_size = sizeof(struct btrfs_inode_extref);
+ log_ref_ver = 1;
+ r = (struct btrfs_inode_extref *)ref_ptr;
+ parent_objectid = btrfs_inode_extref_parent(eb, r);
+ } else {
+ ref_struct_size = sizeof(struct btrfs_inode_ref);
+ parent_objectid = key->offset;
+ }
+ inode_objectid = key->objectid;
+
+ /*
+ * it is possible that we didn't log all the parent directories
+ * for a given inode. If we don't find the dir, just don't
+ * copy the back ref in. The link count fixup code will take
+ * care of the rest
+ */
+ dir = read_one_inode(root, parent_objectid);
+ if (!dir) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ inode = read_one_inode(root, inode_objectid);
+ if (!inode) {
+ ret = -EIO;
+ goto out;
+ }
+
+ while (ref_ptr < ref_end) {
+ if (log_ref_ver) {
+ ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
+ &ref_index, &parent_objectid);
+ /*
+ * parent object can change from one array
+ * item to another.
+ */
+ if (!dir)
+ dir = read_one_inode(root, parent_objectid);
+ if (!dir) {
+ ret = -ENOENT;
+ goto out;
+ }
+ } else {
+ ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
+ &ref_index);
+ }
+ if (ret)
+ goto out;
+
+ /* if we already have a perfect match, we're done */
+ if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
+ ref_index, name, namelen)) {
+ /*
+ * look for a conflicting back reference in the
+ * metadata. if we find one we have to unlink that name
+ * of the file before we add our new link. Later on, we
+ * overwrite any existing back reference, and we don't
+ * want to create dangling pointers in the directory.
+ */
+
+ if (!search_done) {
+ ret = __add_inode_ref(trans, root, path, log,
+ dir, inode, eb,
+ inode_objectid,
+ parent_objectid,
+ ref_index, name, namelen,
+ &search_done);
+ if (ret) {
+ if (ret == 1)
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /* insert our name */
+ ret = btrfs_add_link(trans, dir, inode, name, namelen,
+ 0, ref_index);
+ if (ret)
+ goto out;
+
+ btrfs_update_inode(trans, root, inode);
+ }
+
+ ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
+ kfree(name);
+ name = NULL;
+ if (log_ref_ver) {
+ iput(dir);
+ dir = NULL;
+ }
+ }
/* finally write the back reference in the inode */
ret = overwrite_item(trans, root, path, eb, slot, key);
- BUG_ON(ret);
-
-out_nowrite:
- btrfs_release_path(root, path);
+out:
+ btrfs_release_path(path);
+ kfree(name);
iput(dir);
iput(inode);
- return 0;
+ return ret;
}
static int insert_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 offset)
{
int ret;
- ret = btrfs_find_orphan_item(root, offset);
+ ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID,
+ offset, BTRFS_ORPHAN_ITEM_KEY, NULL);
if (ret > 0)
ret = btrfs_insert_orphan_item(trans, root, offset);
return ret;
}
+static int count_inode_extrefs(struct btrfs_root *root,
+ struct inode *inode, struct btrfs_path *path)
+{
+ int ret = 0;
+ int name_len;
+ unsigned int nlink = 0;
+ u32 item_size;
+ u32 cur_offset = 0;
+ u64 inode_objectid = btrfs_ino(inode);
+ u64 offset = 0;
+ unsigned long ptr;
+ struct btrfs_inode_extref *extref;
+ struct extent_buffer *leaf;
-/*
- * There are a few corners where the link count of the file can't
- * be properly maintained during replay. So, instead of adding
- * lots of complexity to the log code, we just scan the backrefs
- * for any file that has been through replay.
- *
- * The scan will update the link count on the inode to reflect the
- * number of back refs found. If it goes down to zero, the iput
- * will free the inode.
- */
-static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode)
+ while (1) {
+ ret = btrfs_find_one_extref(root, inode_objectid, offset, path,
+ &extref, &offset);
+ if (ret)
+ break;
+
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+
+ while (cur_offset < item_size) {
+ extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
+ name_len = btrfs_inode_extref_name_len(leaf, extref);
+
+ nlink++;
+
+ cur_offset += name_len + sizeof(*extref);
+ }
+
+ offset++;
+ btrfs_release_path(path);
+ }
+ btrfs_release_path(path);
+
+ if (ret < 0)
+ return ret;
+ return nlink;
+}
+
+static int count_inode_refs(struct btrfs_root *root,
+ struct inode *inode, struct btrfs_path *path)
{
- struct btrfs_path *path;
int ret;
struct btrfs_key key;
- u64 nlink = 0;
+ unsigned int nlink = 0;
unsigned long ptr;
unsigned long ptr_end;
int name_len;
+ u64 ino = btrfs_ino(inode);
- key.objectid = inode->i_ino;
+ key.objectid = ino;
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
- path = btrfs_alloc_path();
-
while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
@@ -977,9 +1328,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
break;
path->slots[0]--;
}
+process_slot:
btrfs_item_key_to_cpu(path->nodes[0], &key,
path->slots[0]);
- if (key.objectid != inode->i_ino ||
+ if (key.objectid != ino ||
key.type != BTRFS_INODE_REF_KEY)
break;
ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
@@ -997,12 +1349,60 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
if (key.offset == 0)
break;
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ goto process_slot;
+ }
key.offset--;
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
}
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
+
+ return nlink;
+}
+
+/*
+ * There are a few corners where the link count of the file can't
+ * be properly maintained during replay. So, instead of adding
+ * lots of complexity to the log code, we just scan the backrefs
+ * for any file that has been through replay.
+ *
+ * The scan will update the link count on the inode to reflect the
+ * number of back refs found. If it goes down to zero, the iput
+ * will free the inode.
+ */
+static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode)
+{
+ struct btrfs_path *path;
+ int ret;
+ u64 nlink = 0;
+ u64 ino = btrfs_ino(inode);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = count_inode_refs(root, inode, path);
+ if (ret < 0)
+ goto out;
+
+ nlink = ret;
+
+ ret = count_inode_extrefs(root, inode, path);
+ if (ret == -ENOENT)
+ ret = 0;
+
+ if (ret < 0)
+ goto out;
+
+ nlink += ret;
+
+ ret = 0;
+
if (nlink != inode->i_nlink) {
- inode->i_nlink = nlink;
+ set_nlink(inode, nlink);
btrfs_update_inode(trans, root, inode);
}
BTRFS_I(inode)->index_cnt = (u64)-1;
@@ -1010,15 +1410,16 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
if (inode->i_nlink == 0) {
if (S_ISDIR(inode->i_mode)) {
ret = replay_dir_deletes(trans, root, NULL, path,
- inode->i_ino, 1);
- BUG_ON(ret);
+ ino, 1);
+ if (ret)
+ goto out;
}
- ret = insert_orphan_item(trans, root, inode->i_ino);
- BUG_ON(ret);
+ ret = insert_orphan_item(trans, root, ino);
}
- btrfs_free_path(path);
- return 0;
+out:
+ btrfs_free_path(path);
+ return ret;
}
static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
@@ -1049,16 +1450,18 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
break;
ret = btrfs_del_item(trans, root, path);
- BUG_ON(ret);
+ if (ret)
+ goto out;
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
inode = read_one_inode(root, key.offset);
- BUG_ON(!inode);
+ if (!inode)
+ return -EIO;
ret = fixup_inode_link_count(trans, root, inode);
- BUG_ON(ret);
-
iput(inode);
+ if (ret)
+ goto out;
/*
* fixup on a directory may create new entries,
@@ -1067,8 +1470,10 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
*/
key.offset = (u64)-1;
}
- btrfs_release_path(root, path);
- return 0;
+ ret = 0;
+out:
+ btrfs_release_path(path);
+ return ret;
}
@@ -1087,7 +1492,8 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
struct inode *inode;
inode = read_one_inode(root, objectid);
- BUG_ON(!inode);
+ if (!inode)
+ return -EIO;
key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID;
btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
@@ -1095,14 +1501,17 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
if (ret == 0) {
- btrfs_inc_nlink(inode);
- btrfs_update_inode(trans, root, inode);
+ if (!inode->i_nlink)
+ set_nlink(inode, 1);
+ else
+ inc_nlink(inode);
+ ret = btrfs_update_inode(trans, root, inode);
} else if (ret == -EEXIST) {
ret = 0;
} else {
- BUG();
+ BUG(); /* Logic Error */
}
iput(inode);
@@ -1134,6 +1543,7 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
iput(inode);
return -EIO;
}
+
ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
/* FIXME, put inode into FIXUP list */
@@ -1171,13 +1581,20 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
struct inode *dir;
u8 log_type;
int exists;
- int ret;
+ int ret = 0;
+ bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
dir = read_one_inode(root, key->objectid);
- BUG_ON(!dir);
+ if (!dir)
+ return -EIO;
name_len = btrfs_dir_name_len(eb, di);
name = kmalloc(name_len, GFP_NOFS);
+ if (!name) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
log_type = btrfs_dir_type(eb, di);
read_extent_buffer(eb, name, (unsigned long)(di + 1),
name_len);
@@ -1188,7 +1605,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
exists = 1;
else
exists = 0;
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
if (key->type == BTRFS_DIR_ITEM_KEY) {
dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
@@ -1199,9 +1616,11 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
key->offset, name,
name_len, 1);
} else {
- BUG();
+ /* Corruption */
+ ret = -EINVAL;
+ goto out;
}
- if (!dst_di || IS_ERR(dst_di)) {
+ if (IS_ERR_OR_NULL(dst_di)) {
/* we need a sequence number to insert, so we only
* do inserts for the BTRFS_DIR_INDEX_KEY types
*/
@@ -1227,22 +1646,29 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
goto out;
ret = drop_one_dir_item(trans, root, path, dir, dst_di);
- BUG_ON(ret);
+ if (ret)
+ goto out;
if (key->type == BTRFS_DIR_INDEX_KEY)
goto insert;
out:
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
+ if (!ret && update_size) {
+ btrfs_i_size_write(dir, dir->i_size + name_len * 2);
+ ret = btrfs_update_inode(trans, root, dir);
+ }
kfree(name);
iput(dir);
- return 0;
+ return ret;
insert:
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
ret = insert_one_name(trans, root, path, key->objectid, key->offset,
name, name_len, log_type, &log_key);
-
- BUG_ON(ret && ret != -ENOENT);
+ if (ret && ret != -ENOENT)
+ goto out;
+ update_size = false;
+ ret = 0;
goto out;
}
@@ -1269,9 +1695,12 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
ptr_end = ptr + item_size;
while (ptr < ptr_end) {
di = (struct btrfs_dir_item *)ptr;
+ if (verify_dir_item(root, eb, di))
+ return -EIO;
name_len = btrfs_dir_name_len(eb, di);
ret = replay_one_name(trans, root, path, eb, di, key);
- BUG_ON(ret);
+ if (ret)
+ return ret;
ptr = (unsigned long)(di + 1);
ptr += name_len;
}
@@ -1357,7 +1786,7 @@ next:
*end_ret = found_end;
ret = 0;
out:
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
return ret;
}
@@ -1395,6 +1824,11 @@ again:
ptr_end = ptr + item_size;
while (ptr < ptr_end) {
di = (struct btrfs_dir_item *)ptr;
+ if (verify_dir_item(root, eb, di)) {
+ ret = -EIO;
+ goto out;
+ }
+
name_len = btrfs_dir_name_len(eb, di);
name = kmalloc(name_len, GFP_NOFS);
if (!name) {
@@ -1415,22 +1849,33 @@ again:
dir_key->offset,
name, name_len, 0);
}
- if (!log_di || IS_ERR(log_di)) {
+ if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) {
btrfs_dir_item_key_to_cpu(eb, di, &location);
- btrfs_release_path(root, path);
- btrfs_release_path(log, log_path);
+ btrfs_release_path(path);
+ btrfs_release_path(log_path);
inode = read_one_inode(root, location.objectid);
- BUG_ON(!inode);
+ if (!inode) {
+ kfree(name);
+ return -EIO;
+ }
ret = link_to_fixup_dir(trans, root,
path, location.objectid);
- BUG_ON(ret);
- btrfs_inc_nlink(inode);
+ if (ret) {
+ kfree(name);
+ iput(inode);
+ goto out;
+ }
+
+ inc_nlink(inode);
ret = btrfs_unlink_inode(trans, root, dir, inode,
name, name_len);
- BUG_ON(ret);
+ if (!ret)
+ ret = btrfs_run_delayed_items(trans, root);
kfree(name);
iput(inode);
+ if (ret)
+ goto out;
/* there might still be more names under this key
* check and repeat if required
@@ -1441,8 +1886,11 @@ again:
goto again;
ret = 0;
goto out;
+ } else if (IS_ERR(log_di)) {
+ kfree(name);
+ return PTR_ERR(log_di);
}
- btrfs_release_path(log, log_path);
+ btrfs_release_path(log_path);
kfree(name);
ptr = (unsigned long)(di + 1);
@@ -1450,8 +1898,8 @@ again:
}
ret = 0;
out:
- btrfs_release_path(root, path);
- btrfs_release_path(log, log_path);
+ btrfs_release_path(path);
+ btrfs_release_path(log_path);
return ret;
}
@@ -1534,12 +1982,13 @@ again:
ret = check_item_in_log(trans, root, log, path,
log_path, dir,
&found_key);
- BUG_ON(ret);
+ if (ret)
+ goto out;
if (found_key.offset == (u64)-1)
break;
dir_key.offset = found_key.offset + 1;
}
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
if (range_end == (u64)-1)
break;
range_start = range_end + 1;
@@ -1550,11 +1999,11 @@ next_type:
if (key_type == BTRFS_DIR_LOG_ITEM_KEY) {
key_type = BTRFS_DIR_LOG_INDEX_KEY;
dir_key.type = BTRFS_DIR_INDEX_KEY;
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
goto again;
}
out:
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
btrfs_free_path(log_path);
iput(dir);
return ret;
@@ -1582,7 +2031,9 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
int i;
int ret;
- btrfs_read_buffer(eb, gen);
+ ret = btrfs_read_buffer(eb, gen);
+ if (ret)
+ return ret;
level = btrfs_header_level(eb);
@@ -1590,7 +2041,8 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
return 0;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
nritems = btrfs_header_nritems(eb);
for (i = 0; i < nritems; i++) {
@@ -1608,11 +2060,13 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
if (S_ISDIR(mode)) {
ret = replay_dir_deletes(wc->trans,
root, log, path, key.objectid, 0);
- BUG_ON(ret);
+ if (ret)
+ break;
}
ret = overwrite_item(wc->trans, root, path,
eb, i, &key);
- BUG_ON(ret);
+ if (ret)
+ break;
/* for regular files, make sure corresponding
* orhpan item exist. extents past the new EOF
@@ -1621,13 +2075,24 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
if (S_ISREG(mode)) {
ret = insert_orphan_item(wc->trans, root,
key.objectid);
- BUG_ON(ret);
+ if (ret)
+ break;
}
ret = link_to_fixup_dir(wc->trans, root,
path, key.objectid);
- BUG_ON(ret);
+ if (ret)
+ break;
}
+
+ if (key.type == BTRFS_DIR_INDEX_KEY &&
+ wc->stage == LOG_WALK_REPLAY_DIR_INDEX) {
+ ret = replay_one_dir_item(wc->trans, root, path,
+ eb, i, &key);
+ if (ret)
+ break;
+ }
+
if (wc->stage < LOG_WALK_REPLAY_ALL)
continue;
@@ -1635,24 +2100,29 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
if (key.type == BTRFS_XATTR_ITEM_KEY) {
ret = overwrite_item(wc->trans, root, path,
eb, i, &key);
- BUG_ON(ret);
- } else if (key.type == BTRFS_INODE_REF_KEY) {
+ if (ret)
+ break;
+ } else if (key.type == BTRFS_INODE_REF_KEY ||
+ key.type == BTRFS_INODE_EXTREF_KEY) {
ret = add_inode_ref(wc->trans, root, log, path,
eb, i, &key);
- BUG_ON(ret && ret != -ENOENT);
+ if (ret && ret != -ENOENT)
+ break;
+ ret = 0;
} else if (key.type == BTRFS_EXTENT_DATA_KEY) {
ret = replay_one_extent(wc->trans, root, path,
eb, i, &key);
- BUG_ON(ret);
- } else if (key.type == BTRFS_DIR_ITEM_KEY ||
- key.type == BTRFS_DIR_INDEX_KEY) {
+ if (ret)
+ break;
+ } else if (key.type == BTRFS_DIR_ITEM_KEY) {
ret = replay_one_dir_item(wc->trans, root, path,
eb, i, &key);
- BUG_ON(ret);
+ if (ret)
+ break;
}
}
btrfs_free_path(path);
- return 0;
+ return ret;
}
static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
@@ -1677,8 +2147,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
WARN_ON(*level >= BTRFS_MAX_LEVEL);
cur = path->nodes[*level];
- if (btrfs_header_level(cur) != *level)
- WARN_ON(1);
+ WARN_ON(btrfs_header_level(cur) != *level);
if (path->slots[*level] >=
btrfs_header_nritems(cur))
@@ -1692,30 +2161,49 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
root_owner = btrfs_header_owner(parent);
next = btrfs_find_create_tree_block(root, bytenr, blocksize);
+ if (!next)
+ return -ENOMEM;
if (*level == 1) {
- wc->process_func(root, next, wc, ptr_gen);
+ ret = wc->process_func(root, next, wc, ptr_gen);
+ if (ret) {
+ free_extent_buffer(next);
+ return ret;
+ }
path->slots[*level]++;
if (wc->free) {
- btrfs_read_buffer(next, ptr_gen);
+ ret = btrfs_read_buffer(next, ptr_gen);
+ if (ret) {
+ free_extent_buffer(next);
+ return ret;
+ }
- btrfs_tree_lock(next);
- clean_tree_block(trans, root, next);
- btrfs_set_lock_blocking(next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
+ if (trans) {
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, root, next);
+ btrfs_wait_tree_block_writeback(next);
+ btrfs_tree_unlock(next);
+ }
WARN_ON(root_owner !=
BTRFS_TREE_LOG_OBJECTID);
- ret = btrfs_free_reserved_extent(root,
+ ret = btrfs_free_and_pin_reserved_extent(root,
bytenr, blocksize);
- BUG_ON(ret);
+ if (ret) {
+ free_extent_buffer(next);
+ return ret;
+ }
}
free_extent_buffer(next);
continue;
}
- btrfs_read_buffer(next, ptr_gen);
+ ret = btrfs_read_buffer(next, ptr_gen);
+ if (ret) {
+ free_extent_buffer(next);
+ return ret;
+ }
WARN_ON(*level <= 0);
if (path->nodes[*level-1])
@@ -1759,24 +2247,30 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
parent = path->nodes[*level + 1];
root_owner = btrfs_header_owner(parent);
- wc->process_func(root, path->nodes[*level], wc,
+ ret = wc->process_func(root, path->nodes[*level], wc,
btrfs_header_generation(path->nodes[*level]));
+ if (ret)
+ return ret;
+
if (wc->free) {
struct extent_buffer *next;
next = path->nodes[*level];
- btrfs_tree_lock(next);
- clean_tree_block(trans, root, next);
- btrfs_set_lock_blocking(next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
+ if (trans) {
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, root, next);
+ btrfs_wait_tree_block_writeback(next);
+ btrfs_tree_unlock(next);
+ }
WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
- ret = btrfs_free_reserved_extent(root,
+ ret = btrfs_free_and_pin_reserved_extent(root,
path->nodes[*level]->start,
path->nodes[*level]->len);
- BUG_ON(ret);
+ if (ret)
+ return ret;
}
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = NULL;
@@ -1798,11 +2292,11 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
int wret;
int level;
struct btrfs_path *path;
- int i;
int orig_level;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
level = btrfs_header_level(log->node);
orig_level = level;
@@ -1814,45 +2308,49 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
wret = walk_down_log_tree(trans, log, path, &level, wc);
if (wret > 0)
break;
- if (wret < 0)
+ if (wret < 0) {
ret = wret;
+ goto out;
+ }
wret = walk_up_log_tree(trans, log, path, &level, wc);
if (wret > 0)
break;
- if (wret < 0)
+ if (wret < 0) {
ret = wret;
+ goto out;
+ }
}
/* was the root node processed? if not, catch it here */
if (path->nodes[orig_level]) {
- wc->process_func(log, path->nodes[orig_level], wc,
+ ret = wc->process_func(log, path->nodes[orig_level], wc,
btrfs_header_generation(path->nodes[orig_level]));
+ if (ret)
+ goto out;
if (wc->free) {
struct extent_buffer *next;
next = path->nodes[orig_level];
- btrfs_tree_lock(next);
- clean_tree_block(trans, log, next);
- btrfs_set_lock_blocking(next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
+ if (trans) {
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, log, next);
+ btrfs_wait_tree_block_writeback(next);
+ btrfs_tree_unlock(next);
+ }
WARN_ON(log->root_key.objectid !=
BTRFS_TREE_LOG_OBJECTID);
- ret = btrfs_free_reserved_extent(log, next->start,
+ ret = btrfs_free_and_pin_reserved_extent(log, next->start,
next->len);
- BUG_ON(ret);
+ if (ret)
+ goto out;
}
}
- for (i = 0; i <= orig_level; i++) {
- if (path->nodes[i]) {
- free_extent_buffer(path->nodes[i]);
- path->nodes[i] = NULL;
- }
- }
+out:
btrfs_free_path(path);
return ret;
}
@@ -1877,8 +2375,8 @@ static int update_log_root(struct btrfs_trans_handle *trans,
return ret;
}
-static int wait_log_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, unsigned long transid)
+static void wait_log_commit(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int transid)
{
DEFINE_WAIT(wait);
int index = transid % 2;
@@ -1893,33 +2391,61 @@ static int wait_log_commit(struct btrfs_trans_handle *trans,
&wait, TASK_UNINTERRUPTIBLE);
mutex_unlock(&root->log_mutex);
- if (root->fs_info->last_trans_log_full_commit !=
- trans->transid && root->log_transid < transid + 2 &&
+ if (root->log_transid_committed < transid &&
atomic_read(&root->log_commit[index]))
schedule();
finish_wait(&root->log_commit_wait[index], &wait);
mutex_lock(&root->log_mutex);
- } while (root->log_transid < transid + 2 &&
+ } while (root->log_transid_committed < transid &&
atomic_read(&root->log_commit[index]));
- return 0;
}
-static int wait_for_writer(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
+static void wait_for_writer(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
{
DEFINE_WAIT(wait);
+
while (atomic_read(&root->log_writers)) {
prepare_to_wait(&root->log_writer_wait,
&wait, TASK_UNINTERRUPTIBLE);
mutex_unlock(&root->log_mutex);
- if (root->fs_info->last_trans_log_full_commit !=
- trans->transid && atomic_read(&root->log_writers))
+ if (atomic_read(&root->log_writers))
schedule();
mutex_lock(&root->log_mutex);
finish_wait(&root->log_writer_wait, &wait);
}
- return 0;
+}
+
+static inline void btrfs_remove_log_ctx(struct btrfs_root *root,
+ struct btrfs_log_ctx *ctx)
+{
+ if (!ctx)
+ return;
+
+ mutex_lock(&root->log_mutex);
+ list_del_init(&ctx->list);
+ mutex_unlock(&root->log_mutex);
+}
+
+/*
+ * Invoked in log mutex context, or be sure there is no other task which
+ * can access the list.
+ */
+static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
+ int index, int error)
+{
+ struct btrfs_log_ctx *ctx;
+
+ if (!error) {
+ INIT_LIST_HEAD(&root->log_ctxs[index]);
+ return;
+ }
+
+ list_for_each_entry(ctx, &root->log_ctxs[index], list)
+ ctx->log_ret = error;
+
+ INIT_LIST_HEAD(&root->log_ctxs[index]);
}
/*
@@ -1935,7 +2461,7 @@ static int wait_for_writer(struct btrfs_trans_handle *trans,
* that has happened.
*/
int btrfs_sync_log(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
+ struct btrfs_root *root, struct btrfs_log_ctx *ctx)
{
int index1;
int index2;
@@ -1943,41 +2469,52 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
int ret;
struct btrfs_root *log = root->log_root;
struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
- unsigned long log_transid = 0;
+ int log_transid = 0;
+ struct btrfs_log_ctx root_log_ctx;
+ struct blk_plug plug;
mutex_lock(&root->log_mutex);
- index1 = root->log_transid % 2;
+ log_transid = ctx->log_transid;
+ if (root->log_transid_committed >= log_transid) {
+ mutex_unlock(&root->log_mutex);
+ return ctx->log_ret;
+ }
+
+ index1 = log_transid % 2;
if (atomic_read(&root->log_commit[index1])) {
- wait_log_commit(trans, root, root->log_transid);
+ wait_log_commit(trans, root, log_transid);
mutex_unlock(&root->log_mutex);
- return 0;
+ return ctx->log_ret;
}
+ ASSERT(log_transid == root->log_transid);
atomic_set(&root->log_commit[index1], 1);
/* wait for previous tree log sync to complete */
if (atomic_read(&root->log_commit[(index1 + 1) % 2]))
- wait_log_commit(trans, root, root->log_transid - 1);
+ wait_log_commit(trans, root, log_transid - 1);
while (1) {
- unsigned long batch = root->log_batch;
- if (root->log_multiple_pids) {
+ int batch = atomic_read(&root->log_batch);
+ /* when we're on an ssd, just kick the log commit out */
+ if (!btrfs_test_opt(root, SSD) &&
+ test_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state)) {
mutex_unlock(&root->log_mutex);
schedule_timeout_uninterruptible(1);
mutex_lock(&root->log_mutex);
}
wait_for_writer(trans, root);
- if (batch == root->log_batch)
+ if (batch == atomic_read(&root->log_batch))
break;
}
/* bail out if we need to do a full commit */
- if (root->fs_info->last_trans_log_full_commit == trans->transid) {
+ if (btrfs_need_log_full_commit(root->fs_info, trans)) {
ret = -EAGAIN;
+ btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&root->log_mutex);
goto out;
}
- log_transid = root->log_transid;
if (log_transid % 2 == 0)
mark = EXTENT_DIRTY;
else
@@ -1986,16 +2523,22 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
/* we start IO on all the marked extents here, but we don't actually
* wait for them until later.
*/
+ blk_start_plug(&plug);
ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
- BUG_ON(ret);
+ if (ret) {
+ blk_finish_plug(&plug);
+ btrfs_abort_transaction(trans, root, ret);
+ btrfs_free_logged_extents(log, log_transid);
+ btrfs_set_log_full_commit(root->fs_info, trans);
+ mutex_unlock(&root->log_mutex);
+ goto out;
+ }
btrfs_set_root_node(&log->root_item, log->node);
- root->log_batch = 0;
root->log_transid++;
log->log_transid = root->log_transid;
root->log_start_pid = 0;
- smp_mb();
/*
* IO has been started, blocks of the log tree have WRITTEN flag set
* in their headers. new modifications of the log will be written to
@@ -2003,9 +2546,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
*/
mutex_unlock(&root->log_mutex);
+ btrfs_init_log_ctx(&root_log_ctx);
+
mutex_lock(&log_root_tree->log_mutex);
- log_root_tree->log_batch++;
+ atomic_inc(&log_root_tree->log_batch);
atomic_inc(&log_root_tree->log_writers);
+
+ index2 = log_root_tree->log_transid % 2;
+ list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
+ root_log_ctx.log_transid = log_root_tree->log_transid;
+
mutex_unlock(&log_root_tree->log_mutex);
ret = update_log_root(trans, log);
@@ -2018,27 +2568,47 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
}
if (ret) {
- BUG_ON(ret != -ENOSPC);
- root->fs_info->last_trans_log_full_commit = trans->transid;
+ if (!list_empty(&root_log_ctx.list))
+ list_del_init(&root_log_ctx.list);
+
+ blk_finish_plug(&plug);
+ btrfs_set_log_full_commit(root->fs_info, trans);
+
+ if (ret != -ENOSPC) {
+ btrfs_abort_transaction(trans, root, ret);
+ mutex_unlock(&log_root_tree->log_mutex);
+ goto out;
+ }
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+ btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out;
}
- index2 = log_root_tree->log_transid % 2;
+ if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) {
+ mutex_unlock(&log_root_tree->log_mutex);
+ ret = root_log_ctx.log_ret;
+ goto out;
+ }
+
+ index2 = root_log_ctx.log_transid % 2;
if (atomic_read(&log_root_tree->log_commit[index2])) {
+ blk_finish_plug(&plug);
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
wait_log_commit(trans, log_root_tree,
- log_root_tree->log_transid);
+ root_log_ctx.log_transid);
+ btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
+ ret = root_log_ctx.log_ret;
goto out;
}
+ ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid);
atomic_set(&log_root_tree->log_commit[index2], 1);
if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) {
wait_log_commit(trans, log_root_tree,
- log_root_tree->log_transid - 1);
+ root_log_ctx.log_transid - 1);
}
wait_for_writer(trans, log_root_tree);
@@ -2047,28 +2617,38 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* now that we've moved on to the tree of log tree roots,
* check the full commit flag again
*/
- if (root->fs_info->last_trans_log_full_commit == trans->transid) {
+ if (btrfs_need_log_full_commit(root->fs_info, trans)) {
+ blk_finish_plug(&plug);
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+ btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out_wake_log_root;
}
- ret = btrfs_write_and_wait_marked_extents(log_root_tree,
- &log_root_tree->dirty_log_pages,
- EXTENT_DIRTY | EXTENT_NEW);
- BUG_ON(ret);
+ ret = btrfs_write_marked_extents(log_root_tree,
+ &log_root_tree->dirty_log_pages,
+ EXTENT_DIRTY | EXTENT_NEW);
+ blk_finish_plug(&plug);
+ if (ret) {
+ btrfs_set_log_full_commit(root->fs_info, trans);
+ btrfs_abort_transaction(trans, root, ret);
+ btrfs_free_logged_extents(log, log_transid);
+ mutex_unlock(&log_root_tree->log_mutex);
+ goto out_wake_log_root;
+ }
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+ btrfs_wait_marked_extents(log_root_tree,
+ &log_root_tree->dirty_log_pages,
+ EXTENT_NEW | EXTENT_DIRTY);
+ btrfs_wait_logged_extents(log, log_transid);
- btrfs_set_super_log_root(&root->fs_info->super_for_commit,
+ btrfs_set_super_log_root(root->fs_info->super_for_commit,
log_root_tree->node->start);
- btrfs_set_super_log_root_level(&root->fs_info->super_for_commit,
+ btrfs_set_super_log_root_level(root->fs_info->super_for_commit,
btrfs_header_level(log_root_tree->node));
- log_root_tree->log_batch = 0;
log_root_tree->log_transid++;
- smp_mb();
-
mutex_unlock(&log_root_tree->log_mutex);
/*
@@ -2078,8 +2658,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* the running transaction open, so a full commit can't hop
* in and cause problems either.
*/
- write_ctree_super(trans, root->fs_info->tree_root, 1);
- ret = 0;
+ ret = write_ctree_super(trans, root->fs_info->tree_root, 1);
+ if (ret) {
+ btrfs_set_log_full_commit(root->fs_info, trans);
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_wake_log_root;
+ }
mutex_lock(&root->log_mutex);
if (root->last_log_commit < log_transid)
@@ -2087,16 +2671,31 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_unlock(&root->log_mutex);
out_wake_log_root:
+ /*
+ * We needn't get log_mutex here because we are sure all
+ * the other tasks are blocked.
+ */
+ btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);
+
+ mutex_lock(&log_root_tree->log_mutex);
+ log_root_tree->log_transid_committed++;
atomic_set(&log_root_tree->log_commit[index2], 0);
- smp_mb();
+ mutex_unlock(&log_root_tree->log_mutex);
+
if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
wake_up(&log_root_tree->log_commit_wait[index2]);
out:
+ /* See above. */
+ btrfs_remove_all_log_ctxs(root, index1, ret);
+
+ mutex_lock(&root->log_mutex);
+ root->log_transid_committed++;
atomic_set(&root->log_commit[index1], 0);
- smp_mb();
+ mutex_unlock(&root->log_mutex);
+
if (waitqueue_active(&root->log_commit_wait[index1]))
wake_up(&root->log_commit_wait[index1]);
- return 0;
+ return ret;
}
static void free_log_tree(struct btrfs_trans_handle *trans,
@@ -2111,11 +2710,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
};
ret = walk_log_tree(trans, log, &wc);
- BUG_ON(ret);
+ /* I don't think this can happen but just in case */
+ if (ret)
+ btrfs_abort_transaction(trans, log, ret);
while (1) {
ret = find_first_extent_bit(&log->dirty_log_pages,
- 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
+ 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW,
+ NULL);
if (ret)
break;
@@ -2123,6 +2725,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
}
+ /*
+ * We may have short-circuited the log tree with the full commit logic
+ * and left ordered extents on our list, so clear these out to keep us
+ * from leaking inodes and memory.
+ */
+ btrfs_free_logged_extents(log, 0);
+ btrfs_free_logged_extents(log, 1);
+
free_extent_buffer(log->node);
kfree(log);
}
@@ -2182,6 +2792,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
int ret;
int err = 0;
int bytes_del = 0;
+ u64 dir_ino = btrfs_ino(dir);
if (BTRFS_I(dir)->logged_trans < trans->transid)
return 0;
@@ -2194,7 +2805,12 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
log = root->log_root;
path = btrfs_alloc_path();
- di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino,
+ if (!path) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ di = btrfs_lookup_dir_item(trans, log, path, dir_ino,
name, name_len, -1);
if (IS_ERR(di)) {
err = PTR_ERR(di);
@@ -2203,10 +2819,13 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
if (di) {
ret = btrfs_delete_one_dir_name(trans, log, path, di);
bytes_del += name_len;
- BUG_ON(ret);
+ if (ret) {
+ err = ret;
+ goto fail;
+ }
}
- btrfs_release_path(log, path);
- di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino,
+ btrfs_release_path(path);
+ di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
index, name, name_len, -1);
if (IS_ERR(di)) {
err = PTR_ERR(di);
@@ -2215,7 +2834,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
if (di) {
ret = btrfs_delete_one_dir_name(trans, log, path, di);
bytes_del += name_len;
- BUG_ON(ret);
+ if (ret) {
+ err = ret;
+ goto fail;
+ }
}
/* update the directory size in the log to reflect the names
@@ -2224,10 +2846,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
if (bytes_del) {
struct btrfs_key key;
- key.objectid = dir->i_ino;
+ key.objectid = dir_ino;
key.offset = 0;
key.type = BTRFS_INODE_ITEM_KEY;
- btrfs_release_path(log, path);
+ btrfs_release_path(path);
ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
if (ret < 0) {
@@ -2249,15 +2871,18 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]);
} else
ret = 0;
- btrfs_release_path(log, path);
+ btrfs_release_path(path);
}
fail:
btrfs_free_path(path);
+out_unlock:
mutex_unlock(&BTRFS_I(dir)->log_mutex);
if (ret == -ENOSPC) {
- root->fs_info->last_trans_log_full_commit = trans->transid;
+ btrfs_set_log_full_commit(root->fs_info, trans);
ret = 0;
- }
+ } else if (ret < 0)
+ btrfs_abort_transaction(trans, root, ret);
+
btrfs_end_log_trans(root);
return err;
@@ -2282,13 +2907,14 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
log = root->log_root;
mutex_lock(&BTRFS_I(inode)->log_mutex);
- ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino,
+ ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
dirid, &index);
mutex_unlock(&BTRFS_I(inode)->log_mutex);
if (ret == -ENOSPC) {
- root->fs_info->last_trans_log_full_commit = trans->transid;
+ btrfs_set_log_full_commit(root->fs_info, trans);
ret = 0;
- }
+ } else if (ret < 0 && ret != -ENOENT)
+ btrfs_abort_transaction(trans, root, ret);
btrfs_end_log_trans(root);
return ret;
@@ -2323,7 +2949,7 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans,
struct btrfs_dir_log_item);
btrfs_set_dir_log_end(path->nodes[0], item, last_offset);
btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_release_path(log, path);
+ btrfs_release_path(path);
return 0;
}
@@ -2339,7 +2965,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
u64 min_offset, u64 *last_offset_ret)
{
struct btrfs_key min_key;
- struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
struct extent_buffer *src;
int err = 0;
@@ -2348,37 +2973,33 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
int nritems;
u64 first_offset = min_offset;
u64 last_offset = (u64)-1;
+ u64 ino = btrfs_ino(inode);
log = root->log_root;
- max_key.objectid = inode->i_ino;
- max_key.offset = (u64)-1;
- max_key.type = key_type;
- min_key.objectid = inode->i_ino;
+ min_key.objectid = ino;
min_key.type = key_type;
min_key.offset = min_offset;
path->keep_locks = 1;
- ret = btrfs_search_forward(root, &min_key, &max_key,
- path, 0, trans->transid);
+ ret = btrfs_search_forward(root, &min_key, path, trans->transid);
/*
* we didn't find anything from this transaction, see if there
* is anything at all
*/
- if (ret != 0 || min_key.objectid != inode->i_ino ||
- min_key.type != key_type) {
- min_key.objectid = inode->i_ino;
+ if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) {
+ min_key.objectid = ino;
min_key.type = key_type;
min_key.offset = (u64)-1;
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
if (ret < 0) {
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
return ret;
}
- ret = btrfs_previous_item(root, path, inode->i_ino, key_type);
+ ret = btrfs_previous_item(root, path, ino, key_type);
/* if ret == 0 there are items for this type,
* create a range to tell us the last key of this type.
@@ -2396,7 +3017,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
}
/* go backward to find any previous key */
- ret = btrfs_previous_item(root, path, inode->i_ino, key_type);
+ ret = btrfs_previous_item(root, path, ino, key_type);
if (ret == 0) {
struct btrfs_key tmp;
btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
@@ -2411,14 +3032,12 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
}
}
}
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
/* find the first key from this transaction again */
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
- if (ret != 0) {
- WARN_ON(1);
+ if (WARN_ON(ret != 0))
goto done;
- }
/*
* we have a block from this transaction, log every item in it
@@ -2431,8 +3050,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
for (i = path->slots[0]; i < nritems; i++) {
btrfs_item_key_to_cpu(src, &min_key, i);
- if (min_key.objectid != inode->i_ino ||
- min_key.type != key_type)
+ if (min_key.objectid != ino || min_key.type != key_type)
goto done;
ret = overwrite_item(trans, log, dst_path, src, i,
&min_key);
@@ -2453,7 +3071,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
goto done;
}
btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]);
- if (tmp.objectid != inode->i_ino || tmp.type != key_type) {
+ if (tmp.objectid != ino || tmp.type != key_type) {
last_offset = (u64)-1;
goto done;
}
@@ -2469,8 +3087,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
}
}
done:
- btrfs_release_path(root, path);
- btrfs_release_path(log, dst_path);
+ btrfs_release_path(path);
+ btrfs_release_path(dst_path);
if (err == 0) {
*last_offset_ret = last_offset;
@@ -2479,8 +3097,7 @@ done:
* is valid
*/
ret = insert_dir_log_key(trans, log, path, key_type,
- inode->i_ino, first_offset,
- last_offset);
+ ino, first_offset, last_offset);
if (ret)
err = ret;
}
@@ -2544,6 +3161,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
int ret;
struct btrfs_key key;
struct btrfs_key found_key;
+ int start_slot;
key.objectid = objectid;
key.type = max_key_type;
@@ -2551,7 +3169,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
while (1) {
ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
- BUG_ON(ret == 0);
+ BUG_ON(ret == 0); /* Logic error */
if (ret < 0)
break;
@@ -2565,35 +3183,133 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
if (found_key.objectid != objectid)
break;
- ret = btrfs_del_item(trans, log, path);
- BUG_ON(ret);
- btrfs_release_path(log, path);
+ found_key.offset = 0;
+ found_key.type = 0;
+ ret = btrfs_bin_search(path->nodes[0], &found_key, 0,
+ &start_slot);
+
+ ret = btrfs_del_items(trans, log, path, start_slot,
+ path->slots[0] - start_slot + 1);
+ /*
+ * If start slot isn't 0 then we don't need to re-search, we've
+ * found the last guy with the objectid in this tree.
+ */
+ if (ret || start_slot != 0)
+ break;
+ btrfs_release_path(path);
}
- btrfs_release_path(log, path);
+ btrfs_release_path(path);
+ if (ret > 0)
+ ret = 0;
return ret;
}
+static void fill_inode_item(struct btrfs_trans_handle *trans,
+ struct extent_buffer *leaf,
+ struct btrfs_inode_item *item,
+ struct inode *inode, int log_inode_only)
+{
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
+
+ if (log_inode_only) {
+ /* set the generation to zero so the recover code
+ * can tell the difference between an logging
+ * just to say 'this inode exists' and a logging
+ * to say 'update this inode with these values'
+ */
+ btrfs_set_token_inode_generation(leaf, item, 0, &token);
+ btrfs_set_token_inode_size(leaf, item, 0, &token);
+ } else {
+ btrfs_set_token_inode_generation(leaf, item,
+ BTRFS_I(inode)->generation,
+ &token);
+ btrfs_set_token_inode_size(leaf, item, inode->i_size, &token);
+ }
+
+ btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
+ btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
+ btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
+ btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
+
+ btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
+ inode->i_atime.tv_sec, &token);
+ btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
+ inode->i_atime.tv_nsec, &token);
+
+ btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
+ inode->i_mtime.tv_sec, &token);
+ btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
+ inode->i_mtime.tv_nsec, &token);
+
+ btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
+ inode->i_ctime.tv_sec, &token);
+ btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
+ inode->i_ctime.tv_nsec, &token);
+
+ btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
+ &token);
+
+ btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
+ btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
+ btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
+ btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
+ btrfs_set_token_inode_block_group(leaf, item, 0, &token);
+}
+
+static int log_inode_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *log, struct btrfs_path *path,
+ struct inode *inode)
+{
+ struct btrfs_inode_item *inode_item;
+ int ret;
+
+ ret = btrfs_insert_empty_item(trans, log, path,
+ &BTRFS_I(inode)->location,
+ sizeof(*inode_item));
+ if (ret && ret != -EEXIST)
+ return ret;
+ inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ fill_inode_item(trans, path->nodes[0], inode_item, inode, 0);
+ btrfs_release_path(path);
+ return 0;
+}
+
static noinline int copy_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *log,
+ struct inode *inode,
struct btrfs_path *dst_path,
- struct extent_buffer *src,
+ struct btrfs_path *src_path, u64 *last_extent,
int start_slot, int nr, int inode_only)
{
unsigned long src_offset;
unsigned long dst_offset;
+ struct btrfs_root *log = BTRFS_I(inode)->root->log_root;
struct btrfs_file_extent_item *extent;
struct btrfs_inode_item *inode_item;
+ struct extent_buffer *src = src_path->nodes[0];
+ struct btrfs_key first_key, last_key, key;
int ret;
struct btrfs_key *ins_keys;
u32 *ins_sizes;
char *ins_data;
int i;
struct list_head ordered_sums;
+ int skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+ bool has_extents = false;
+ bool need_find_last_extent = (*last_extent == 0);
+ bool done = false;
INIT_LIST_HEAD(&ordered_sums);
ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
nr * sizeof(u32), GFP_NOFS);
+ if (!ins_data)
+ return -ENOMEM;
+
+ first_key.objectid = (u64)-1;
+
ins_sizes = (u32 *)ins_data;
ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
@@ -2614,36 +3330,50 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
src_offset = btrfs_item_ptr_offset(src, start_slot + i);
- copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
- src_offset, ins_sizes[i]);
+ if ((i == (nr - 1)))
+ last_key = ins_keys[i];
- if (inode_only == LOG_INODE_EXISTS &&
- ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
+ if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
inode_item = btrfs_item_ptr(dst_path->nodes[0],
dst_path->slots[0],
struct btrfs_inode_item);
- btrfs_set_inode_size(dst_path->nodes[0], inode_item, 0);
+ fill_inode_item(trans, dst_path->nodes[0], inode_item,
+ inode, inode_only == LOG_INODE_EXISTS);
+ } else {
+ copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
+ src_offset, ins_sizes[i]);
+ }
- /* set the generation to zero so the recover code
- * can tell the difference between an logging
- * just to say 'this inode exists' and a logging
- * to say 'update this inode with these values'
- */
- btrfs_set_inode_generation(dst_path->nodes[0],
- inode_item, 0);
+ /*
+ * We set need_find_last_extent here in case we know we were
+ * processing other items and then walk into the first extent in
+ * the inode. If we don't hit an extent then nothing changes,
+ * we'll do the last search the next time around.
+ */
+ if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
+ has_extents = true;
+ if (need_find_last_extent &&
+ first_key.objectid == (u64)-1)
+ first_key = ins_keys[i];
+ } else {
+ need_find_last_extent = false;
}
+
/* take a reference on file data extents so that truncates
* or deletes of this inode don't have to relog the inode
* again
*/
- if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY) {
+ if (btrfs_key_type(ins_keys + i) == BTRFS_EXTENT_DATA_KEY &&
+ !skip_csum) {
int found_type;
extent = btrfs_item_ptr(src, start_slot + i,
struct btrfs_file_extent_item);
+ if (btrfs_file_extent_generation(src, extent) < trans->transid)
+ continue;
+
found_type = btrfs_file_extent_type(src, extent);
- if (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ if (found_type == BTRFS_FILE_EXTENT_REG) {
u64 ds, dl, cs, cl;
ds = btrfs_file_extent_disk_bytenr(src,
extent);
@@ -2665,14 +3395,18 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_csums_range(
log->fs_info->csum_root,
ds + cs, ds + cs + cl - 1,
- &ordered_sums);
- BUG_ON(ret);
+ &ordered_sums, 0);
+ if (ret) {
+ btrfs_release_path(dst_path);
+ kfree(ins_data);
+ return ret;
+ }
}
}
}
btrfs_mark_buffer_dirty(dst_path->nodes[0]);
- btrfs_release_path(log, dst_path);
+ btrfs_release_path(dst_path);
kfree(ins_data);
/*
@@ -2689,6 +3423,414 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
list_del(&sums->list);
kfree(sums);
}
+
+ if (!has_extents)
+ return ret;
+
+ /*
+ * Because we use btrfs_search_forward we could skip leaves that were
+ * not modified and then assume *last_extent is valid when it really
+ * isn't. So back up to the previous leaf and read the end of the last
+ * extent before we go and fill in holes.
+ */
+ if (need_find_last_extent) {
+ u64 len;
+
+ ret = btrfs_prev_leaf(BTRFS_I(inode)->root, src_path);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ goto fill_holes;
+ if (src_path->slots[0])
+ src_path->slots[0]--;
+ src = src_path->nodes[0];
+ btrfs_item_key_to_cpu(src, &key, src_path->slots[0]);
+ if (key.objectid != btrfs_ino(inode) ||
+ key.type != BTRFS_EXTENT_DATA_KEY)
+ goto fill_holes;
+ extent = btrfs_item_ptr(src, src_path->slots[0],
+ struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(src, extent) ==
+ BTRFS_FILE_EXTENT_INLINE) {
+ len = btrfs_file_extent_inline_len(src,
+ src_path->slots[0],
+ extent);
+ *last_extent = ALIGN(key.offset + len,
+ log->sectorsize);
+ } else {
+ len = btrfs_file_extent_num_bytes(src, extent);
+ *last_extent = key.offset + len;
+ }
+ }
+fill_holes:
+ /* So we did prev_leaf, now we need to move to the next leaf, but a few
+ * things could have happened
+ *
+ * 1) A merge could have happened, so we could currently be on a leaf
+ * that holds what we were copying in the first place.
+ * 2) A split could have happened, and now not all of the items we want
+ * are on the same leaf.
+ *
+ * So we need to adjust how we search for holes, we need to drop the
+ * path and re-search for the first extent key we found, and then walk
+ * forward until we hit the last one we copied.
+ */
+ if (need_find_last_extent) {
+ /* btrfs_prev_leaf could return 1 without releasing the path */
+ btrfs_release_path(src_path);
+ ret = btrfs_search_slot(NULL, BTRFS_I(inode)->root, &first_key,
+ src_path, 0, 0);
+ if (ret < 0)
+ return ret;
+ ASSERT(ret == 0);
+ src = src_path->nodes[0];
+ i = src_path->slots[0];
+ } else {
+ i = start_slot;
+ }
+
+ /*
+ * Ok so here we need to go through and fill in any holes we may have
+ * to make sure that holes are punched for those areas in case they had
+ * extents previously.
+ */
+ while (!done) {
+ u64 offset, len;
+ u64 extent_end;
+
+ if (i >= btrfs_header_nritems(src_path->nodes[0])) {
+ ret = btrfs_next_leaf(BTRFS_I(inode)->root, src_path);
+ if (ret < 0)
+ return ret;
+ ASSERT(ret == 0);
+ src = src_path->nodes[0];
+ i = 0;
+ }
+
+ btrfs_item_key_to_cpu(src, &key, i);
+ if (!btrfs_comp_cpu_keys(&key, &last_key))
+ done = true;
+ if (key.objectid != btrfs_ino(inode) ||
+ key.type != BTRFS_EXTENT_DATA_KEY) {
+ i++;
+ continue;
+ }
+ extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(src, extent) ==
+ BTRFS_FILE_EXTENT_INLINE) {
+ len = btrfs_file_extent_inline_len(src, i, extent);
+ extent_end = ALIGN(key.offset + len, log->sectorsize);
+ } else {
+ len = btrfs_file_extent_num_bytes(src, extent);
+ extent_end = key.offset + len;
+ }
+ i++;
+
+ if (*last_extent == key.offset) {
+ *last_extent = extent_end;
+ continue;
+ }
+ offset = *last_extent;
+ len = key.offset - *last_extent;
+ ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode),
+ offset, 0, 0, len, 0, len, 0,
+ 0, 0);
+ if (ret)
+ break;
+ *last_extent = offset + len;
+ }
+ /*
+ * Need to let the callers know we dropped the path so they should
+ * re-search.
+ */
+ if (!ret && need_find_last_extent)
+ ret = 1;
+ return ret;
+}
+
+static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ struct extent_map *em1, *em2;
+
+ em1 = list_entry(a, struct extent_map, list);
+ em2 = list_entry(b, struct extent_map, list);
+
+ if (em1->start < em2->start)
+ return -1;
+ else if (em1->start > em2->start)
+ return 1;
+ return 0;
+}
+
+static int log_one_extent(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct btrfs_root *root,
+ struct extent_map *em, struct btrfs_path *path,
+ struct list_head *logged_list)
+{
+ struct btrfs_root *log = root->log_root;
+ struct btrfs_file_extent_item *fi;
+ struct extent_buffer *leaf;
+ struct btrfs_ordered_extent *ordered;
+ struct list_head ordered_sums;
+ struct btrfs_map_token token;
+ struct btrfs_key key;
+ u64 mod_start = em->mod_start;
+ u64 mod_len = em->mod_len;
+ u64 csum_offset;
+ u64 csum_len;
+ u64 extent_offset = em->start - em->orig_start;
+ u64 block_len;
+ int ret;
+ bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+ int extent_inserted = 0;
+
+ INIT_LIST_HEAD(&ordered_sums);
+ btrfs_init_map_token(&token);
+
+ ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
+ em->start + em->len, NULL, 0, 1,
+ sizeof(*fi), &extent_inserted);
+ if (ret)
+ return ret;
+
+ if (!extent_inserted) {
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = em->start;
+
+ ret = btrfs_insert_empty_item(trans, log, path, &key,
+ sizeof(*fi));
+ if (ret)
+ return ret;
+ }
+ leaf = path->nodes[0];
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+
+ btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
+ &token);
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
+ skip_csum = true;
+ btrfs_set_token_file_extent_type(leaf, fi,
+ BTRFS_FILE_EXTENT_PREALLOC,
+ &token);
+ } else {
+ btrfs_set_token_file_extent_type(leaf, fi,
+ BTRFS_FILE_EXTENT_REG,
+ &token);
+ if (em->block_start == EXTENT_MAP_HOLE)
+ skip_csum = true;
+ }
+
+ block_len = max(em->block_len, em->orig_block_len);
+ if (em->compress_type != BTRFS_COMPRESS_NONE) {
+ btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
+ em->block_start,
+ &token);
+ btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
+ &token);
+ } else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
+ btrfs_set_token_file_extent_disk_bytenr(leaf, fi,
+ em->block_start -
+ extent_offset, &token);
+ btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len,
+ &token);
+ } else {
+ btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token);
+ btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0,
+ &token);
+ }
+
+ btrfs_set_token_file_extent_offset(leaf, fi,
+ em->start - em->orig_start,
+ &token);
+ btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token);
+ btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token);
+ btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type,
+ &token);
+ btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token);
+ btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token);
+ btrfs_mark_buffer_dirty(leaf);
+
+ btrfs_release_path(path);
+ if (ret) {
+ return ret;
+ }
+
+ if (skip_csum)
+ return 0;
+
+ /*
+ * First check and see if our csums are on our outstanding ordered
+ * extents.
+ */
+ list_for_each_entry(ordered, logged_list, log_list) {
+ struct btrfs_ordered_sum *sum;
+
+ if (!mod_len)
+ break;
+
+ if (ordered->file_offset + ordered->len <= mod_start ||
+ mod_start + mod_len <= ordered->file_offset)
+ continue;
+
+ /*
+ * We are going to copy all the csums on this ordered extent, so
+ * go ahead and adjust mod_start and mod_len in case this
+ * ordered extent has already been logged.
+ */
+ if (ordered->file_offset > mod_start) {
+ if (ordered->file_offset + ordered->len >=
+ mod_start + mod_len)
+ mod_len = ordered->file_offset - mod_start;
+ /*
+ * If we have this case
+ *
+ * |--------- logged extent ---------|
+ * |----- ordered extent ----|
+ *
+ * Just don't mess with mod_start and mod_len, we'll
+ * just end up logging more csums than we need and it
+ * will be ok.
+ */
+ } else {
+ if (ordered->file_offset + ordered->len <
+ mod_start + mod_len) {
+ mod_len = (mod_start + mod_len) -
+ (ordered->file_offset + ordered->len);
+ mod_start = ordered->file_offset +
+ ordered->len;
+ } else {
+ mod_len = 0;
+ }
+ }
+
+ /*
+ * To keep us from looping for the above case of an ordered
+ * extent that falls inside of the logged extent.
+ */
+ if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
+ &ordered->flags))
+ continue;
+
+ if (ordered->csum_bytes_left) {
+ btrfs_start_ordered_extent(inode, ordered, 0);
+ wait_event(ordered->wait,
+ ordered->csum_bytes_left == 0);
+ }
+
+ list_for_each_entry(sum, &ordered->list, list) {
+ ret = btrfs_csum_file_blocks(trans, log, sum);
+ if (ret)
+ goto unlocked;
+ }
+
+ }
+unlocked:
+
+ if (!mod_len || ret)
+ return ret;
+
+ if (em->compress_type) {
+ csum_offset = 0;
+ csum_len = block_len;
+ } else {
+ csum_offset = mod_start - em->start;
+ csum_len = mod_len;
+ }
+
+ /* block start is already adjusted for the file extent offset. */
+ ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
+ em->block_start + csum_offset,
+ em->block_start + csum_offset +
+ csum_len - 1, &ordered_sums, 0);
+ if (ret)
+ return ret;
+
+ while (!list_empty(&ordered_sums)) {
+ struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
+ struct btrfs_ordered_sum,
+ list);
+ if (!ret)
+ ret = btrfs_csum_file_blocks(trans, log, sums);
+ list_del(&sums->list);
+ kfree(sums);
+ }
+
+ return ret;
+}
+
+static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode,
+ struct btrfs_path *path,
+ struct list_head *logged_list)
+{
+ struct extent_map *em, *n;
+ struct list_head extents;
+ struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
+ u64 test_gen;
+ int ret = 0;
+ int num = 0;
+
+ INIT_LIST_HEAD(&extents);
+
+ write_lock(&tree->lock);
+ test_gen = root->fs_info->last_trans_committed;
+
+ list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
+ list_del_init(&em->list);
+
+ /*
+ * Just an arbitrary number, this can be really CPU intensive
+ * once we start getting a lot of extents, and really once we
+ * have a bunch of extents we just want to commit since it will
+ * be faster.
+ */
+ if (++num > 32768) {
+ list_del_init(&tree->modified_extents);
+ ret = -EFBIG;
+ goto process;
+ }
+
+ if (em->generation <= test_gen)
+ continue;
+ /* Need a ref to keep it from getting evicted from cache */
+ atomic_inc(&em->refs);
+ set_bit(EXTENT_FLAG_LOGGING, &em->flags);
+ list_add_tail(&em->list, &extents);
+ num++;
+ }
+
+ list_sort(NULL, &extents, extent_cmp);
+
+process:
+ while (!list_empty(&extents)) {
+ em = list_entry(extents.next, struct extent_map, list);
+
+ list_del_init(&em->list);
+
+ /*
+ * If we had an error we just need to delete everybody from our
+ * private list.
+ */
+ if (ret) {
+ clear_em_logging(tree, em);
+ free_extent_map(em);
+ continue;
+ }
+
+ write_unlock(&tree->lock);
+
+ ret = log_one_extent(trans, inode, root, em, path, logged_list);
+ write_lock(&tree->lock);
+ clear_em_logging(tree, em);
+ free_extent_map(em);
+ }
+ WARN_ON(!list_empty(&extents));
+ write_unlock(&tree->lock);
+
+ btrfs_release_path(path);
return ret;
}
@@ -2716,35 +3858,57 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
struct extent_buffer *src = NULL;
+ LIST_HEAD(logged_list);
+ u64 last_extent = 0;
int err = 0;
int ret;
int nritems;
int ins_start_slot = 0;
int ins_nr;
-
- log = root->log_root;
+ bool fast_search = false;
+ u64 ino = btrfs_ino(inode);
path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
dst_path = btrfs_alloc_path();
+ if (!dst_path) {
+ btrfs_free_path(path);
+ return -ENOMEM;
+ }
- min_key.objectid = inode->i_ino;
+ min_key.objectid = ino;
min_key.type = BTRFS_INODE_ITEM_KEY;
min_key.offset = 0;
- max_key.objectid = inode->i_ino;
+ max_key.objectid = ino;
- /* today the code can only do partial logging of directories */
- if (!S_ISDIR(inode->i_mode))
- inode_only = LOG_INODE_ALL;
- if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode))
+ /* today the code can only do partial logging of directories */
+ if (S_ISDIR(inode->i_mode) ||
+ (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &BTRFS_I(inode)->runtime_flags) &&
+ inode_only == LOG_INODE_EXISTS))
max_key.type = BTRFS_XATTR_ITEM_KEY;
else
max_key.type = (u8)-1;
max_key.offset = (u64)-1;
+ /* Only run delayed items if we are a dir or a new file */
+ if (S_ISDIR(inode->i_mode) ||
+ BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) {
+ ret = btrfs_commit_inode_delayed_items(trans, inode);
+ if (ret) {
+ btrfs_free_path(path);
+ btrfs_free_path(dst_path);
+ return ret;
+ }
+ }
+
mutex_lock(&BTRFS_I(inode)->log_mutex);
+ btrfs_get_logged_extents(inode, &logged_list);
+
/*
* a brute force approach to making sure we get the most uptodate
* copies of everything.
@@ -2754,10 +3918,33 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if (inode_only == LOG_INODE_EXISTS)
max_key_type = BTRFS_XATTR_ITEM_KEY;
- ret = drop_objectid_items(trans, log, path,
- inode->i_ino, max_key_type);
+ ret = drop_objectid_items(trans, log, path, ino, max_key_type);
} else {
- ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0);
+ if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &BTRFS_I(inode)->runtime_flags)) {
+ clear_bit(BTRFS_INODE_COPY_EVERYTHING,
+ &BTRFS_I(inode)->runtime_flags);
+ ret = btrfs_truncate_inode_items(trans, log,
+ inode, 0, 0);
+ } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
+ &BTRFS_I(inode)->runtime_flags) ||
+ inode_only == LOG_INODE_EXISTS) {
+ if (inode_only == LOG_INODE_ALL)
+ fast_search = true;
+ max_key.type = BTRFS_XATTR_ITEM_KEY;
+ ret = drop_objectid_items(trans, log, path, ino,
+ max_key.type);
+ } else {
+ if (inode_only == LOG_INODE_ALL)
+ fast_search = true;
+ ret = log_inode_item(trans, log, dst_path, inode);
+ if (ret) {
+ err = ret;
+ goto out_unlock;
+ }
+ goto log_extents;
+ }
+
}
if (ret) {
err = ret;
@@ -2767,13 +3954,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
while (1) {
ins_nr = 0;
- ret = btrfs_search_forward(root, &min_key, &max_key,
- path, 0, trans->transid);
+ ret = btrfs_search_forward(root, &min_key,
+ path, trans->transid);
if (ret != 0)
break;
again:
/* note, ins_nr might be > 0 here, cleanup outside the loop */
- if (min_key.objectid != inode->i_ino)
+ if (min_key.objectid != ino)
break;
if (min_key.type > max_key.type)
break;
@@ -2788,11 +3975,15 @@ again:
goto next_slot;
}
- ret = copy_items(trans, log, dst_path, src, ins_start_slot,
- ins_nr, inode_only);
- if (ret) {
+ ret = copy_items(trans, inode, dst_path, path, &last_extent,
+ ins_start_slot, ins_nr, inode_only);
+ if (ret < 0) {
err = ret;
goto out_unlock;
+ } if (ret) {
+ ins_nr = 0;
+ btrfs_release_path(path);
+ continue;
}
ins_nr = 1;
ins_start_slot = path->slots[0];
@@ -2806,40 +3997,59 @@ next_slot:
goto again;
}
if (ins_nr) {
- ret = copy_items(trans, log, dst_path, src,
- ins_start_slot,
+ ret = copy_items(trans, inode, dst_path, path,
+ &last_extent, ins_start_slot,
ins_nr, inode_only);
- if (ret) {
+ if (ret < 0) {
err = ret;
goto out_unlock;
}
+ ret = 0;
ins_nr = 0;
}
- btrfs_release_path(root, path);
+ btrfs_release_path(path);
- if (min_key.offset < (u64)-1)
+ if (min_key.offset < (u64)-1) {
min_key.offset++;
- else if (min_key.type < (u8)-1)
+ } else if (min_key.type < max_key.type) {
min_key.type++;
- else if (min_key.objectid < (u64)-1)
- min_key.objectid++;
- else
+ min_key.offset = 0;
+ } else {
break;
+ }
}
if (ins_nr) {
- ret = copy_items(trans, log, dst_path, src,
- ins_start_slot,
- ins_nr, inode_only);
- if (ret) {
+ ret = copy_items(trans, inode, dst_path, path, &last_extent,
+ ins_start_slot, ins_nr, inode_only);
+ if (ret < 0) {
err = ret;
goto out_unlock;
}
+ ret = 0;
ins_nr = 0;
}
- WARN_ON(ins_nr);
+
+log_extents:
+ btrfs_release_path(path);
+ btrfs_release_path(dst_path);
+ if (fast_search) {
+ ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
+ &logged_list);
+ if (ret) {
+ err = ret;
+ goto out_unlock;
+ }
+ } else if (inode_only == LOG_INODE_ALL) {
+ struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map *em, *n;
+
+ write_lock(&tree->lock);
+ list_for_each_entry_safe(em, n, &tree->modified_extents, list)
+ list_del_init(&em->list);
+ write_unlock(&tree->lock);
+ }
+
if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
- btrfs_release_path(root, path);
- btrfs_release_path(log, dst_path);
ret = log_directory_changes(trans, root, inode, path, dst_path);
if (ret) {
err = ret;
@@ -2847,7 +4057,12 @@ next_slot:
}
}
BTRFS_I(inode)->logged_trans = trans->transid;
+ BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
out_unlock:
+ if (unlikely(err))
+ btrfs_put_logged_extents(&logged_list);
+ else
+ btrfs_submit_logged_extents(&logged_list, log);
mutex_unlock(&BTRFS_I(inode)->log_mutex);
btrfs_free_path(path);
@@ -2869,6 +4084,8 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
{
int ret = 0;
struct btrfs_root *root;
+ struct dentry *old_parent = NULL;
+ struct inode *orig_inode = inode;
/*
* for regular files, if its inode is already on disk, we don't
@@ -2888,7 +4105,14 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
}
while (1) {
- BTRFS_I(inode)->logged_trans = trans->transid;
+ /*
+ * If we are logging a directory then we start with our inode,
+ * not our parents inode, so we need to skipp setting the
+ * logged_trans so that further down in the log code we don't
+ * think this inode has already been logged.
+ */
+ if (inode != orig_inode)
+ BTRFS_I(inode)->logged_trans = trans->transid;
smp_mb();
if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
@@ -2898,8 +4122,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
* make sure any commits to the log are forced
* to be full commits
*/
- root->fs_info->last_trans_log_full_commit =
- trans->transid;
+ btrfs_set_log_full_commit(root->fs_info, trans);
ret = 1;
break;
}
@@ -2910,41 +4133,31 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
if (IS_ROOT(parent))
break;
- parent = parent->d_parent;
+ parent = dget_parent(parent);
+ dput(old_parent);
+ old_parent = parent;
inode = parent->d_inode;
}
+ dput(old_parent);
out:
return ret;
}
-static int inode_in_log(struct btrfs_trans_handle *trans,
- struct inode *inode)
-{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret = 0;
-
- mutex_lock(&root->log_mutex);
- if (BTRFS_I(inode)->logged_trans == trans->transid &&
- BTRFS_I(inode)->last_sub_trans <= root->last_log_commit)
- ret = 1;
- mutex_unlock(&root->log_mutex);
- return ret;
-}
-
-
/*
* helper function around btrfs_log_inode to make sure newly created
* parent directories also end up in the log. A minimal inode and backref
* only logging is done of any parent directories that are older than
* the last committed transaction
*/
-int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- struct dentry *parent, int exists_only)
+static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct inode *inode,
+ struct dentry *parent, int exists_only,
+ struct btrfs_log_ctx *ctx)
{
int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
struct super_block *sb;
+ struct dentry *old_parent = NULL;
int ret = 0;
u64 last_committed = root->fs_info->last_trans_committed;
@@ -2955,6 +4168,10 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
goto end_no_trans;
}
+ /*
+ * The prev transaction commit doesn't complete, we need do
+ * full commit by ourselves.
+ */
if (root->fs_info->last_trans_log_full_commit >
root->fs_info->last_trans_committed) {
ret = 1;
@@ -2972,14 +4189,14 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret)
goto end_no_trans;
- if (inode_in_log(trans, inode)) {
+ if (btrfs_inode_in_log(inode, trans->transid)) {
ret = BTRFS_NO_LOG_SYNC;
goto end_no_trans;
}
- ret = start_log_trans(trans, root);
+ ret = start_log_trans(trans, root, ctx);
if (ret)
- goto end_trans;
+ goto end_no_trans;
ret = btrfs_log_inode(trans, root, inode, inode_only);
if (ret)
@@ -3016,15 +4233,20 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (IS_ROOT(parent))
break;
- parent = parent->d_parent;
+ parent = dget_parent(parent);
+ dput(old_parent);
+ old_parent = parent;
}
ret = 0;
end_trans:
+ dput(old_parent);
if (ret < 0) {
- BUG_ON(ret != -ENOSPC);
- root->fs_info->last_trans_log_full_commit = trans->transid;
+ btrfs_set_log_full_commit(root->fs_info, trans);
ret = 1;
}
+
+ if (ret)
+ btrfs_remove_log_ctx(root, ctx);
btrfs_end_log_trans(root);
end_no_trans:
return ret;
@@ -3037,10 +4259,17 @@ end_no_trans:
* data on disk.
*/
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct dentry *dentry)
+ struct btrfs_root *root, struct dentry *dentry,
+ struct btrfs_log_ctx *ctx)
{
- return btrfs_log_inode_parent(trans, root, dentry->d_inode,
- dentry->d_parent, 0);
+ struct dentry *parent = dget_parent(dentry);
+ int ret;
+
+ ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent,
+ 0, ctx);
+ dput(parent);
+
+ return ret;
}
/*
@@ -3062,16 +4291,27 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
.stage = 0,
};
- fs_info->log_root_recovering = 1;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
+
+ fs_info->log_root_recovering = 1;
trans = btrfs_start_transaction(fs_info->tree_root, 0);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto error;
+ }
wc.trans = trans;
wc.pin = 1;
- walk_log_tree(trans, log_root_tree, &wc);
+ ret = walk_log_tree(trans, log_root_tree, &wc);
+ if (ret) {
+ btrfs_error(fs_info, ret, "Failed to pin buffers while "
+ "recovering log root tree.");
+ goto error;
+ }
again:
key.objectid = BTRFS_TREE_LOG_OBJECTID;
@@ -3080,8 +4320,12 @@ again:
while (1) {
ret = btrfs_search_slot(NULL, log_root_tree, &key, path, 0, 0);
- if (ret < 0)
- break;
+
+ if (ret < 0) {
+ btrfs_error(fs_info, ret,
+ "Couldn't find tree log root.");
+ goto error;
+ }
if (ret > 0) {
if (path->slots[0] == 0)
break;
@@ -3089,31 +4333,40 @@ again:
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
- btrfs_release_path(log_root_tree, path);
+ btrfs_release_path(path);
if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
break;
- log = btrfs_read_fs_root_no_radix(log_root_tree,
- &found_key);
- BUG_ON(!log);
-
+ log = btrfs_read_fs_root(log_root_tree, &found_key);
+ if (IS_ERR(log)) {
+ ret = PTR_ERR(log);
+ btrfs_error(fs_info, ret,
+ "Couldn't read tree log root.");
+ goto error;
+ }
tmp_key.objectid = found_key.offset;
tmp_key.type = BTRFS_ROOT_ITEM_KEY;
tmp_key.offset = (u64)-1;
wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
- BUG_ON(!wc.replay_dest);
+ if (IS_ERR(wc.replay_dest)) {
+ ret = PTR_ERR(wc.replay_dest);
+ free_extent_buffer(log->node);
+ free_extent_buffer(log->commit_root);
+ kfree(log);
+ btrfs_error(fs_info, ret, "Couldn't read target root "
+ "for tree log recovery.");
+ goto error;
+ }
wc.replay_dest->log_root = log;
btrfs_record_root_in_trans(trans, wc.replay_dest);
ret = walk_log_tree(trans, log, &wc);
- BUG_ON(ret);
- if (wc.stage == LOG_WALK_REPLAY_ALL) {
+ if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
ret = fixup_inode_link_counts(trans, wc.replay_dest,
path);
- BUG_ON(ret);
}
key.offset = found_key.offset - 1;
@@ -3122,10 +4375,13 @@ again:
free_extent_buffer(log->commit_root);
kfree(log);
+ if (ret)
+ goto error;
+
if (found_key.offset == 0)
break;
}
- btrfs_release_path(log_root_tree, path);
+ btrfs_release_path(path);
/* step one is to pin it all, step two is to replay just inodes */
if (wc.pin) {
@@ -3142,15 +4398,22 @@ again:
btrfs_free_path(path);
+ /* step 4: commit the transaction, which also unpins the blocks */
+ ret = btrfs_commit_transaction(trans, fs_info->tree_root);
+ if (ret)
+ return ret;
+
free_extent_buffer(log_root_tree->node);
log_root_tree->log_root = NULL;
fs_info->log_root_recovering = 0;
-
- /* step 4: commit the transaction, which also unpins the blocks */
- btrfs_commit_transaction(trans, fs_info->tree_root);
-
kfree(log_root_tree);
+
return 0;
+error:
+ if (wc.trans)
+ btrfs_end_transaction(wc.trans, fs_info->tree_root);
+ btrfs_free_path(path);
+ return ret;
}
/*
@@ -3240,6 +4503,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
root->fs_info->last_trans_committed))
return 0;
- return btrfs_log_inode_parent(trans, root, inode, parent, 1);
+ return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL);
}