aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2010-06-16 18:08:13 +0200
committerJiri Kosina <jkosina@suse.cz>2010-06-16 18:08:13 +0200
commitf1bbbb6912662b9f6070c5bfc4ca9eb1f06a9d5b (patch)
treec2c130a74be25b0b2dff992e1a195e2728bdaadd /fs/btrfs/inode.c
parentfd0961ff67727482bb20ca7e8ea97b83e9de2ddb (diff)
parent7e27d6e778cd87b6f2415515d7127eba53fe5d02 (diff)
Merge branch 'master' into for-next
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c1724
1 files changed, 1363 insertions, 361 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2bfdc641d4e..1bff92ad474 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -252,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
inline_len, compressed_size,
compressed_pages);
BUG_ON(ret);
+ btrfs_delalloc_release_metadata(inode, end + 1 - start);
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
return 0;
}
@@ -414,6 +415,7 @@ again:
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
/* lets try to make an inline extent */
if (ret || total_in < (actual_end - start)) {
@@ -439,7 +441,6 @@ again:
start, end, NULL,
EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_ACCOUNTING |
EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
btrfs_end_transaction(trans, root);
@@ -697,6 +698,38 @@ retry:
return 0;
}
+static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
+ u64 num_bytes)
+{
+ struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map *em;
+ u64 alloc_hint = 0;
+
+ read_lock(&em_tree->lock);
+ em = search_extent_mapping(em_tree, start, num_bytes);
+ if (em) {
+ /*
+ * if block start isn't an actual block number then find the
+ * first block in this inode and use that as a hint. If that
+ * block is also bogus then just don't worry about it.
+ */
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ free_extent_map(em);
+ em = search_extent_mapping(em_tree, 0, 0);
+ if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
+ alloc_hint = em->block_start;
+ if (em)
+ free_extent_map(em);
+ } else {
+ alloc_hint = em->block_start;
+ free_extent_map(em);
+ }
+ }
+ read_unlock(&em_tree->lock);
+
+ return alloc_hint;
+}
+
/*
* when extent_io.c finds a delayed allocation range in the file,
* the call backs end up in this code. The basic idea is to
@@ -734,6 +767,7 @@ static noinline int cow_file_range(struct inode *inode,
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
actual_end = min_t(u64, isize, end + 1);
@@ -753,7 +787,6 @@ static noinline int cow_file_range(struct inode *inode,
EXTENT_CLEAR_UNLOCK_PAGE |
EXTENT_CLEAR_UNLOCK |
EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_ACCOUNTING |
EXTENT_CLEAR_DIRTY |
EXTENT_SET_WRITEBACK |
EXTENT_END_WRITEBACK);
@@ -769,29 +802,7 @@ static noinline int cow_file_range(struct inode *inode,
BUG_ON(disk_num_bytes >
btrfs_super_total_bytes(&root->fs_info->super_copy));
-
- read_lock(&BTRFS_I(inode)->extent_tree.lock);
- em = search_extent_mapping(&BTRFS_I(inode)->extent_tree,
- start, num_bytes);
- if (em) {
- /*
- * if block start isn't an actual block number then find the
- * first block in this inode and use that as a hint. If that
- * block is also bogus then just don't worry about it.
- */
- if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
- free_extent_map(em);
- em = search_extent_mapping(em_tree, 0, 0);
- if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
- alloc_hint = em->block_start;
- if (em)
- free_extent_map(em);
- } else {
- alloc_hint = em->block_start;
- free_extent_map(em);
- }
- }
- read_unlock(&BTRFS_I(inode)->extent_tree.lock);
+ alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
while (disk_num_bytes > 0) {
@@ -1174,6 +1185,13 @@ out_check:
num_bytes, num_bytes, type);
BUG_ON(ret);
+ if (root->root_key.objectid ==
+ BTRFS_DATA_RELOC_TREE_OBJECTID) {
+ ret = btrfs_reloc_clone_csums(inode, cur_offset,
+ num_bytes);
+ BUG_ON(ret);
+ }
+
extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
cur_offset, cur_offset + num_bytes - 1,
locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
@@ -1226,15 +1244,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
}
static int btrfs_split_extent_hook(struct inode *inode,
- struct extent_state *orig, u64 split)
+ struct extent_state *orig, u64 split)
{
+ /* not delalloc, ignore it */
if (!(orig->state & EXTENT_DELALLOC))
return 0;
- spin_lock(&BTRFS_I(inode)->accounting_lock);
- BTRFS_I(inode)->outstanding_extents++;
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
+ atomic_inc(&BTRFS_I(inode)->outstanding_extents);
return 0;
}
@@ -1252,10 +1268,7 @@ static int btrfs_merge_extent_hook(struct inode *inode,
if (!(other->state & EXTENT_DELALLOC))
return 0;
- spin_lock(&BTRFS_I(inode)->accounting_lock);
- BTRFS_I(inode)->outstanding_extents--;
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
+ atomic_dec(&BTRFS_I(inode)->outstanding_extents);
return 0;
}
@@ -1264,8 +1277,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
* bytes in this file, and to maintain the list of inodes that
* have pending delalloc work to be done.
*/
-static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
- unsigned long old, unsigned long bits)
+static int btrfs_set_bit_hook(struct inode *inode,
+ struct extent_state *state, int *bits)
{
/*
@@ -1273,17 +1286,18 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
* but in this case, we are only testeing for the DELALLOC
* bit, which is only set or cleared with irqs on
*/
- if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
+ if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
+ u64 len = state->end + 1 - state->start;
- spin_lock(&BTRFS_I(inode)->accounting_lock);
- BTRFS_I(inode)->outstanding_extents++;
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
- btrfs_delalloc_reserve_space(root, inode, end - start + 1);
+ if (*bits & EXTENT_FIRST_DELALLOC)
+ *bits &= ~EXTENT_FIRST_DELALLOC;
+ else
+ atomic_inc(&BTRFS_I(inode)->outstanding_extents);
spin_lock(&root->fs_info->delalloc_lock);
- BTRFS_I(inode)->delalloc_bytes += end - start + 1;
- root->fs_info->delalloc_bytes += end - start + 1;
+ BTRFS_I(inode)->delalloc_bytes += len;
+ root->fs_info->delalloc_bytes += len;
if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
&root->fs_info->delalloc_inodes);
@@ -1297,45 +1311,32 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
* extent_io.c clear_bit_hook, see set_bit_hook for why
*/
static int btrfs_clear_bit_hook(struct inode *inode,
- struct extent_state *state, unsigned long bits)
+ struct extent_state *state, int *bits)
{
/*
* set_bit and clear bit hooks normally require _irqsave/restore
* but in this case, we are only testeing for the DELALLOC
* bit, which is only set or cleared with irqs on
*/
- if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
+ if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
+ u64 len = state->end + 1 - state->start;
- if (bits & EXTENT_DO_ACCOUNTING) {
- spin_lock(&BTRFS_I(inode)->accounting_lock);
- WARN_ON(!BTRFS_I(inode)->outstanding_extents);
- BTRFS_I(inode)->outstanding_extents--;
- spin_unlock(&BTRFS_I(inode)->accounting_lock);
- btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
- }
+ if (*bits & EXTENT_FIRST_DELALLOC)
+ *bits &= ~EXTENT_FIRST_DELALLOC;
+ else if (!(*bits & EXTENT_DO_ACCOUNTING))
+ atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+
+ if (*bits & EXTENT_DO_ACCOUNTING)
+ btrfs_delalloc_release_metadata(inode, len);
+
+ if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+ btrfs_free_reserved_data_space(inode, len);
spin_lock(&root->fs_info->delalloc_lock);
- if (state->end - state->start + 1 >
- root->fs_info->delalloc_bytes) {
- printk(KERN_INFO "btrfs warning: delalloc account "
- "%llu %llu\n",
- (unsigned long long)
- state->end - state->start + 1,
- (unsigned long long)
- root->fs_info->delalloc_bytes);
- btrfs_delalloc_free_space(root, inode, (u64)-1);
- root->fs_info->delalloc_bytes = 0;
- BTRFS_I(inode)->delalloc_bytes = 0;
- } else {
- btrfs_delalloc_free_space(root, inode,
- state->end -
- state->start + 1);
- root->fs_info->delalloc_bytes -= state->end -
- state->start + 1;
- BTRFS_I(inode)->delalloc_bytes -= state->end -
- state->start + 1;
- }
+ root->fs_info->delalloc_bytes -= len;
+ BTRFS_I(inode)->delalloc_bytes -= len;
+
if (BTRFS_I(inode)->delalloc_bytes == 0 &&
!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_del_init(&BTRFS_I(inode)->delalloc_inodes);
@@ -1384,7 +1385,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
*/
static int __btrfs_submit_bio_start(struct inode *inode, int rw,
struct bio *bio, int mirror_num,
- unsigned long bio_flags)
+ unsigned long bio_flags,
+ u64 bio_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
@@ -1403,7 +1405,8 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw,
* are inserted into the btree
*/
static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
- int mirror_num, unsigned long bio_flags)
+ int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
return btrfs_map_bio(root, rw, bio, mirror_num, 1);
@@ -1414,7 +1417,8 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
* on write, or reading the csums from the tree before a read
*/
static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
- int mirror_num, unsigned long bio_flags)
+ int mirror_num, unsigned long bio_flags,
+ u64 bio_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
@@ -1439,7 +1443,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
/* we're doing a write, do the async checksumming */
return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
inode, rw, bio, mirror_num,
- bio_flags, __btrfs_submit_bio_start,
+ bio_flags, bio_offset,
+ __btrfs_submit_bio_start,
__btrfs_submit_bio_done);
}
@@ -1520,6 +1525,7 @@ again:
goto again;
}
+ BUG();
btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
ClearPageChecked(page);
out:
@@ -1650,7 +1656,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
+ struct btrfs_trans_handle *trans = NULL;
struct btrfs_ordered_extent *ordered_extent = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_state *cached_state = NULL;
@@ -1668,9 +1674,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (!ret) {
trans = btrfs_join_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
- btrfs_end_transaction(trans, root);
}
goto out;
}
@@ -1680,6 +1687,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
0, &cached_state, GFP_NOFS);
trans = btrfs_join_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
compressed = 1;
@@ -1711,12 +1720,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
add_pending_csums(trans, inode, ordered_extent->file_offset,
&ordered_extent->list);
- /* this also removes the ordered extent from the tree */
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
- btrfs_end_transaction(trans, root);
out:
+ btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+ if (trans)
+ btrfs_end_transaction(trans, root);
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */
@@ -1838,7 +1848,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
failrec->last_mirror,
- failrec->bio_flags);
+ failrec->bio_flags, 0);
return 0;
}
@@ -1993,32 +2003,196 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
}
/*
+ * calculate extra metadata reservation when snapshotting a subvolume
+ * contains orphan files.
+ */
+void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
+ struct btrfs_pending_snapshot *pending,
+ u64 *bytes_to_reserve)
+{
+ struct btrfs_root *root;
+ struct btrfs_block_rsv *block_rsv;
+ u64 num_bytes;
+ int index;
+
+ root = pending->root;
+ if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
+ return;
+
+ block_rsv = root->orphan_block_rsv;
+
+ /* orphan block reservation for the snapshot */
+ num_bytes = block_rsv->size;
+
+ /*
+ * after the snapshot is created, COWing tree blocks may use more
+ * space than it frees. So we should make sure there is enough
+ * reserved space.
+ */
+ index = trans->transid & 0x1;
+ if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
+ num_bytes += block_rsv->size -
+ (block_rsv->reserved + block_rsv->freed[index]);
+ }
+
+ *bytes_to_reserve += num_bytes;
+}
+
+void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
+ struct btrfs_pending_snapshot *pending)
+{
+ struct btrfs_root *root = pending->root;
+ struct btrfs_root *snap = pending->snap;
+ struct btrfs_block_rsv *block_rsv;
+ u64 num_bytes;
+ int index;
+ int ret;
+
+ if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
+ return;
+
+ /* refill source subvolume's orphan block reservation */
+ block_rsv = root->orphan_block_rsv;
+ index = trans->transid & 0x1;
+ if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
+ num_bytes = block_rsv->size -
+ (block_rsv->reserved + block_rsv->freed[index]);
+ ret = btrfs_block_rsv_migrate(&pending->block_rsv,
+ root->orphan_block_rsv,
+ num_bytes);
+ BUG_ON(ret);
+ }
+
+ /* setup orphan block reservation for the snapshot */
+ block_rsv = btrfs_alloc_block_rsv(snap);
+ BUG_ON(!block_rsv);
+
+ btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
+ snap->orphan_block_rsv = block_rsv;
+
+ num_bytes = root->orphan_block_rsv->size;
+ ret = btrfs_block_rsv_migrate(&pending->block_rsv,
+ block_rsv, num_bytes);
+ BUG_ON(ret);
+
+#if 0
+ /* insert orphan item for the snapshot */
+ WARN_ON(!root->orphan_item_inserted);
+ ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
+ snap->root_key.objectid);
+ BUG_ON(ret);
+ snap->orphan_item_inserted = 1;
+#endif
+}
+
+enum btrfs_orphan_cleanup_state {
+ ORPHAN_CLEANUP_STARTED = 1,
+ ORPHAN_CLEANUP_DONE = 2,
+};
+
+/*
+ * This is called in transaction commmit time. If there are no orphan
+ * files in the subvolume, it removes orphan item and frees block_rsv
+ * structure.
+ */
+void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ int ret;
+
+ if (!list_empty(&root->orphan_list) ||
+ root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
+ return;
+
+ if (root->orphan_item_inserted &&
+ btrfs_root_refs(&root->root_item) > 0) {
+ ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
+ root->root_key.objectid);
+ BUG_ON(ret);
+ root->orphan_item_inserted = 0;
+ }
+
+ if (root->orphan_block_rsv) {
+ WARN_ON(root->orphan_block_rsv->size > 0);
+ btrfs_free_block_rsv(root, root->orphan_block_rsv);
+ root->orphan_block_rsv = NULL;
+ }
+}
+
+/*
* This creates an orphan entry for the given inode in case something goes
* wrong in the middle of an unlink/truncate.
+ *
+ * NOTE: caller of this function should reserve 5 units of metadata for
+ * this function.
*/
int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret = 0;
+ struct btrfs_block_rsv *block_rsv = NULL;
+ int reserve = 0;
+ int insert = 0;
+ int ret;
- spin_lock(&root->list_lock);
+ if (!root->orphan_block_rsv) {
+ block_rsv = btrfs_alloc_block_rsv(root);
+ BUG_ON(!block_rsv);
+ }
- /* already on the orphan list, we're good */
- if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
- spin_unlock(&root->list_lock);
- return 0;
+ spin_lock(&root->orphan_lock);
+ if (!root->orphan_block_rsv) {
+ root->orphan_block_rsv = block_rsv;
+ } else if (block_rsv) {
+ btrfs_free_block_rsv(root, block_rsv);
+ block_rsv = NULL;
+ }
+
+ if (list_empty(&BTRFS_I(inode)->i_orphan)) {
+ list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
+#if 0
+ /*
+ * For proper ENOSPC handling, we should do orphan
+ * cleanup when mounting. But this introduces backward
+ * compatibility issue.
+ */
+ if (!xchg(&root->orphan_item_inserted, 1))
+ insert = 2;
+ else
+ insert = 1;
+#endif
+ insert = 1;
+ } else {
+ WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
}
- list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
+ if (!BTRFS_I(inode)->orphan_meta_reserved) {
+ BTRFS_I(inode)->orphan_meta_reserved = 1;
+ reserve = 1;
+ }
+ spin_unlock(&root->orphan_lock);
- spin_unlock(&root->list_lock);
+ if (block_rsv)
+ btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
- /*
- * insert an orphan item to track this unlinked/truncated file
- */
- ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
+ /* grab metadata reservation from transaction handle */
+ if (reserve) {
+ ret = btrfs_orphan_reserve_metadata(trans, inode);
+ BUG_ON(ret);
+ }
- return ret;
+ /* insert an orphan item to track this unlinked/truncated file */
+ if (insert >= 1) {
+ ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
+ BUG_ON(ret);
+ }
+
+ /* insert an orphan item to track subvolume contains orphan files */
+ if (insert >= 2) {
+ ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
+ root->root_key.objectid);
+ BUG_ON(ret);
+ }
+ return 0;
}
/*
@@ -2028,26 +2202,31 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
+ int delete_item = 0;
+ int release_rsv = 0;
int ret = 0;
- spin_lock(&root->list_lock);
-
- if (list_empty(&BTRFS_I(inode)->i_orphan)) {
- spin_unlock(&root->list_lock);
- return 0;
+ spin_lock(&root->orphan_lock);
+ if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
+ list_del_init(&BTRFS_I(inode)->i_orphan);
+ delete_item = 1;
}
- list_del_init(&BTRFS_I(inode)->i_orphan);
- if (!trans) {
- spin_unlock(&root->list_lock);
- return 0;
+ if (BTRFS_I(inode)->orphan_meta_reserved) {
+ BTRFS_I(inode)->orphan_meta_reserved = 0;
+ release_rsv = 1;
}
+ spin_unlock(&root->orphan_lock);
- spin_unlock(&root->list_lock);
+ if (trans && delete_item) {
+ ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
+ BUG_ON(ret);
+ }
- ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
+ if (release_rsv)
+ btrfs_orphan_release_metadata(inode);
- return ret;
+ return 0;
}
/*
@@ -2064,7 +2243,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
struct inode *inode;
int ret = 0, nr_unlink = 0, nr_truncate = 0;
- if (!xchg(&root->clean_orphans, 0))
+ if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
return;
path = btrfs_alloc_path();
@@ -2117,16 +2296,15 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
found_key.type = BTRFS_INODE_ITEM_KEY;
found_key.offset = 0;
inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
- if (IS_ERR(inode))
- break;
+ BUG_ON(IS_ERR(inode));
/*
* add this inode to the orphan list so btrfs_orphan_del does
* the proper thing when we hit it
*/
- spin_lock(&root->list_lock);
+ spin_lock(&root->orphan_lock);
list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
- spin_unlock(&root->list_lock);
+ spin_unlock(&root->orphan_lock);
/*
* if this is a bad inode, means we actually succeeded in
@@ -2135,7 +2313,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
* do a destroy_inode
*/
if (is_bad_inode(inode)) {
- trans = btrfs_start_transaction(root, 1);
+ trans = btrfs_start_transaction(root, 0);
btrfs_orphan_del(trans, inode);
btrfs_end_transaction(trans, root);
iput(inode);
@@ -2153,13 +2331,23 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
/* this will do delete_inode and everything for us */
iput(inode);
}
+ btrfs_free_path(path);
+
+ root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
+
+ if (root->orphan_block_rsv)
+ btrfs_block_rsv_release(root, root->orphan_block_rsv,
+ (u64)-1);
+
+ if (root->orphan_block_rsv || root->orphan_item_inserted) {
+ trans = btrfs_join_transaction(root, 1);
+ btrfs_end_transaction(trans, root);
+ }
if (nr_unlink)
printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
if (nr_truncate)
printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
-
- btrfs_free_path(path);
}
/*
@@ -2478,29 +2666,201 @@ out:
return ret;
}
-static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
+/* helper to check if there is any shared block in the path */
+static int check_path_shared(struct btrfs_root *root,
+ struct btrfs_path *path)
+{
+ struct extent_buffer *eb;
+ int level;
+ int ret;
+ u64 refs = 1;
+
+ for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+ if (!path->nodes[level])
+ break;
+ eb = path->nodes[level];
+ if (!btrfs_block_can_be_shared(root, eb))
+ continue;
+ ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len,
+ &refs, NULL);
+ if (refs > 1)
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * helper to start transaction for unlink and rmdir.
+ *
+ * unlink and rmdir are special in btrfs, they do not always free space.
+ * so in enospc case, we should make sure they will free space before
+ * allowing them to use the global metadata reservation.
+ */
+static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
+ struct dentry *dentry)
{
- struct btrfs_root *root;
struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct btrfs_path *path;
+ struct btrfs_inode_ref *ref;
+ struct btrfs_dir_item *di;
struct inode *inode = dentry->d_inode;
+ u64 index;
+ int check_link = 1;
+ int err = -ENOSPC;
int ret;
- unsigned long nr = 0;
- root = BTRFS_I(dir)->root;
+ trans = btrfs_start_transaction(root, 10);
+ if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
+ return trans;
- /*
- * 5 items for unlink inode
- * 1 for orphan
- */
- ret = btrfs_reserve_metadata_space(root, 6);
- if (ret)
- return ret;
+ if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+ return ERR_PTR(-ENOSPC);
+
+ /* check if there is someone else holds reference */
+ if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1)
+ return ERR_PTR(-ENOSPC);
- trans = btrfs_start_transaction(root, 1);
+ if (atomic_read(&inode->i_count) > 2)
+ return ERR_PTR(-ENOSPC);
+
+ if (xchg(&root->fs_info->enospc_unlink, 1))
+ return ERR_PTR(-ENOSPC);
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ root->fs_info->enospc_unlink = 0;
+ return ERR_PTR(-ENOMEM);
+ }
+
+ trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
- btrfs_unreserve_metadata_space(root, 6);
- return PTR_ERR(trans);
+ btrfs_free_path(path);
+ root->fs_info->enospc_unlink = 0;
+ return trans;
+ }
+
+ path->skip_locking = 1;
+ path->search_commit_root = 1;
+
+ ret = btrfs_lookup_inode(trans, root, path,
+ &BTRFS_I(dir)->location, 0);
+ if (ret < 0) {
+ err = ret;
+ goto out;
+ }
+ if (ret == 0) {
+ if (check_path_shared(root, path))
+ goto out;
+ } else {
+ check_link = 0;
+ }
+ btrfs_release_path(root, path);
+
+ ret = btrfs_lookup_inode(trans, root, path,
+ &BTRFS_I(inode)->location, 0);
+ if (ret < 0) {
+ err = ret;
+ goto out;
+ }
+ if (ret == 0) {
+ if (check_path_shared(root, path))
+ goto out;
+ } else {
+ check_link = 0;
+ }
+ btrfs_release_path(root, path);
+
+ if (ret == 0 && S_ISREG(inode->i_mode)) {
+ ret = btrfs_lookup_file_extent(trans, root, path,
+ inode->i_ino, (u64)-1, 0);
+ if (ret < 0) {
+ err = ret;
+ goto out;
+ }
+ BUG_ON(ret == 0);
+ if (check_path_shared(root, path))
+ goto out;
+ btrfs_release_path(root, path);
+ }
+
+ if (!check_link) {
+ err = 0;
+ goto out;
+ }
+
+ di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
+ dentry->d_name.name, dentry->d_name.len, 0);
+ if (IS_ERR(di)) {
+ err = PTR_ERR(di);
+ goto out;
+ }
+ if (di) {
+ if (check_path_shared(root, path))
+ goto out;
+ } else {
+ err = 0;
+ goto out;
}
+ btrfs_release_path(root, path);
+
+ ref = btrfs_lookup_inode_ref(trans, root, path,
+ dentry->d_name.name, dentry->d_name.len,
+ inode->i_ino, dir->i_ino, 0);
+ if (IS_ERR(ref)) {
+ err = PTR_ERR(ref);
+ goto out;
+ }
+ BUG_ON(!ref);
+ if (check_path_shared(root, path))
+ goto out;
+ index = btrfs_inode_ref_index(path->nodes[0], ref);
+ btrfs_release_path(root, path);
+
+ di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index,
+ dentry->d_name.name, dentry->d_name.len, 0);
+ if (IS_ERR(di)) {
+ err = PTR_ERR(di);
+ goto out;
+ }
+ BUG_ON(ret == -ENOENT);
+ if (check_path_shared(root, path))
+ goto out;
+
+ err = 0;
+out:
+ btrfs_free_path(path);
+ if (err) {
+ btrfs_end_transaction(trans, root);
+ root->fs_info->enospc_unlink = 0;
+ return ERR_PTR(err);
+ }
+
+ trans->block_rsv = &root->fs_info->global_block_rsv;
+ return trans;
+}
+
+static void __unlink_end_trans(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ if (trans->block_rsv == &root->fs_info->global_block_rsv) {
+ BUG_ON(!root->fs_info->enospc_unlink);
+ root->fs_info->enospc_unlink = 0;
+ }
+ btrfs_end_transaction_throttle(trans, root);
+}
+
+static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct btrfs_trans_handle *trans;
+ struct inode *inode = dentry->d_inode;
+ int ret;
+ unsigned long nr = 0;
+
+ trans = __unlink_start_trans(dir, dentry);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
btrfs_set_trans_block_group(trans, dir);
@@ -2508,14 +2868,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
dentry->d_name.name, dentry->d_name.len);
+ BUG_ON(ret);
- if (inode->i_nlink == 0)
+ if (inode->i_nlink == 0) {
ret = btrfs_orphan_add(trans, inode);
+ BUG_ON(ret);
+ }
nr = trans->blocks_used;
-
- btrfs_end_transaction_throttle(trans, root);
- btrfs_unreserve_metadata_space(root, 6);
+ __unlink_end_trans(trans, root);
btrfs_btree_balance_dirty(root, nr);
return ret;
}
@@ -2587,7 +2948,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
int err = 0;
- int ret;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_trans_handle *trans;
unsigned long nr = 0;
@@ -2596,15 +2956,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
return -ENOTEMPTY;
- ret = btrfs_reserve_metadata_space(root, 5);
- if (ret)
- return ret;
-
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- btrfs_unreserve_metadata_space(root, 5);
+ trans = __unlink_start_trans(dir, dentry);
+ if (IS_ERR(trans))
return PTR_ERR(trans);
- }
btrfs_set_trans_block_group(trans, dir);
@@ -2627,12 +2981,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
btrfs_i_size_write(inode, 0);
out:
nr = trans->blocks_used;
- ret = btrfs_end_transaction_throttle(trans, root);
- btrfs_unreserve_metadata_space(root, 5);
+ __unlink_end_trans(trans, root);
btrfs_btree_balance_dirty(root, nr);
- if (ret && !err)
- err = ret;
return err;
}
@@ -3029,6 +3380,7 @@ out:
if (pending_del_nr) {
ret = btrfs_del_items(trans, root, path, pending_del_slot,
pending_del_nr);
+ BUG_ON(ret);
}
btrfs_free_path(path);
return err;
@@ -3056,11 +3408,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
if ((offset & (blocksize - 1)) == 0)
goto out;
- ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
- if (ret)
- goto out;
-
- ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
+ ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
if (ret)
goto out;
@@ -3068,8 +3416,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
again:
page = grab_cache_page(mapping, index);
if (!page) {
- btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
- btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
+ btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
goto out;
}
@@ -3132,8 +3479,7 @@ again:
out_unlock:
if (ret)
- btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
- btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
+ btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
unlock_page(page);
page_cache_release(page);
out:
@@ -3145,7 +3491,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- struct extent_map *em;
+ struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
u64 mask = root->sectorsize - 1;
u64 hole_start = (inode->i_size + mask) & ~mask;
@@ -3183,11 +3529,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
u64 hint_byte = 0;
hole_size = last_byte - cur_offset;
- err = btrfs_reserve_metadata_space(root, 2);
- if (err)
+ trans = btrfs_start_transaction(root, 2);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
break;
-
- trans = btrfs_start_transaction(root, 1);
+ }
btrfs_set_trans_block_group(trans, inode);
err = btrfs_drop_extents(trans, inode, cur_offset,
@@ -3205,14 +3551,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
last_byte - 1, 0);
btrfs_end_transaction(trans, root);
- btrfs_unreserve_metadata_space(root, 2);
}
free_extent_map(em);
+ em = NULL;
cur_offset = last_byte;
if (cur_offset >= block_end)
break;
}
+ free_extent_map(em);
unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
GFP_NOFS);
return err;
@@ -3239,11 +3586,10 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
}
}
- ret = btrfs_reserve_metadata_space(root, 1);
- if (ret)
- return ret;
+ trans = btrfs_start_transaction(root, 5);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
- trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
ret = btrfs_orphan_add(trans, inode);
@@ -3251,7 +3597,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
- btrfs_unreserve_metadata_space(root, 1);
btrfs_btree_balance_dirty(root, nr);
if (attr->ia_size > inode->i_size) {
@@ -3264,8 +3609,11 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
i_size_write(inode, attr->ia_size);
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
- trans = btrfs_start_transaction(root, 1);
+ trans = btrfs_start_transaction(root, 0);
+ BUG_ON(IS_ERR(trans));
btrfs_set_trans_block_group(trans, inode);
+ trans->block_rsv = root->orphan_block_rsv;
+ BUG_ON(!trans->block_rsv);
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
@@ -3345,10 +3693,21 @@ void btrfs_delete_inode(struct inode *inode)
btrfs_i_size_write(inode, 0);
while (1) {
- trans = btrfs_start_transaction(root, 1);
+ trans = btrfs_start_transaction(root, 0);
+ BUG_ON(IS_ERR(trans));
btrfs_set_trans_block_group(trans, inode);
- ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
+ trans->block_rsv = root->orphan_block_rsv;
+ ret = btrfs_block_rsv_check(trans, root,
+ root->orphan_block_rsv, 0, 5);
+ if (ret) {
+ BUG_ON(ret != -EAGAIN);