diff options
author | Jiri Kosina <jkosina@suse.cz> | 2010-06-16 18:08:13 +0200 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2010-06-16 18:08:13 +0200 |
commit | f1bbbb6912662b9f6070c5bfc4ca9eb1f06a9d5b (patch) | |
tree | c2c130a74be25b0b2dff992e1a195e2728bdaadd /fs/btrfs/inode.c | |
parent | fd0961ff67727482bb20ca7e8ea97b83e9de2ddb (diff) | |
parent | 7e27d6e778cd87b6f2415515d7127eba53fe5d02 (diff) |
Merge branch 'master' into for-next
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 1724 |
1 files changed, 1363 insertions, 361 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2bfdc641d4e..1bff92ad474 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -252,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, inline_len, compressed_size, compressed_pages); BUG_ON(ret); + btrfs_delalloc_release_metadata(inode, end + 1 - start); btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); return 0; } @@ -414,6 +415,7 @@ again: trans = btrfs_join_transaction(root, 1); BUG_ON(!trans); btrfs_set_trans_block_group(trans, inode); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; /* lets try to make an inline extent */ if (ret || total_in < (actual_end - start)) { @@ -439,7 +441,6 @@ again: start, end, NULL, EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_ACCOUNTING | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); btrfs_end_transaction(trans, root); @@ -697,6 +698,38 @@ retry: return 0; } +static u64 get_extent_allocation_hint(struct inode *inode, u64 start, + u64 num_bytes) +{ + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *em; + u64 alloc_hint = 0; + + read_lock(&em_tree->lock); + em = search_extent_mapping(em_tree, start, num_bytes); + if (em) { + /* + * if block start isn't an actual block number then find the + * first block in this inode and use that as a hint. If that + * block is also bogus then just don't worry about it. + */ + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + free_extent_map(em); + em = search_extent_mapping(em_tree, 0, 0); + if (em && em->block_start < EXTENT_MAP_LAST_BYTE) + alloc_hint = em->block_start; + if (em) + free_extent_map(em); + } else { + alloc_hint = em->block_start; + free_extent_map(em); + } + } + read_unlock(&em_tree->lock); + + return alloc_hint; +} + /* * when extent_io.c finds a delayed allocation range in the file, * the call backs end up in this code. The basic idea is to @@ -734,6 +767,7 @@ static noinline int cow_file_range(struct inode *inode, trans = btrfs_join_transaction(root, 1); BUG_ON(!trans); btrfs_set_trans_block_group(trans, inode); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; actual_end = min_t(u64, isize, end + 1); @@ -753,7 +787,6 @@ static noinline int cow_file_range(struct inode *inode, EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | - EXTENT_CLEAR_ACCOUNTING | EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); @@ -769,29 +802,7 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(disk_num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); - - read_lock(&BTRFS_I(inode)->extent_tree.lock); - em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, - start, num_bytes); - if (em) { - /* - * if block start isn't an actual block number then find the - * first block in this inode and use that as a hint. If that - * block is also bogus then just don't worry about it. - */ - if (em->block_start >= EXTENT_MAP_LAST_BYTE) { - free_extent_map(em); - em = search_extent_mapping(em_tree, 0, 0); - if (em && em->block_start < EXTENT_MAP_LAST_BYTE) - alloc_hint = em->block_start; - if (em) - free_extent_map(em); - } else { - alloc_hint = em->block_start; - free_extent_map(em); - } - } - read_unlock(&BTRFS_I(inode)->extent_tree.lock); + alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); while (disk_num_bytes > 0) { @@ -1174,6 +1185,13 @@ out_check: num_bytes, num_bytes, type); BUG_ON(ret); + if (root->root_key.objectid == + BTRFS_DATA_RELOC_TREE_OBJECTID) { + ret = btrfs_reloc_clone_csums(inode, cur_offset, + num_bytes); + BUG_ON(ret); + } + extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, cur_offset, cur_offset + num_bytes - 1, locked_page, EXTENT_CLEAR_UNLOCK_PAGE | @@ -1226,15 +1244,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, } static int btrfs_split_extent_hook(struct inode *inode, - struct extent_state *orig, u64 split) + struct extent_state *orig, u64 split) { + /* not delalloc, ignore it */ if (!(orig->state & EXTENT_DELALLOC)) return 0; - spin_lock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->outstanding_extents++; - spin_unlock(&BTRFS_I(inode)->accounting_lock); - + atomic_inc(&BTRFS_I(inode)->outstanding_extents); return 0; } @@ -1252,10 +1268,7 @@ static int btrfs_merge_extent_hook(struct inode *inode, if (!(other->state & EXTENT_DELALLOC)) return 0; - spin_lock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->outstanding_extents--; - spin_unlock(&BTRFS_I(inode)->accounting_lock); - + atomic_dec(&BTRFS_I(inode)->outstanding_extents); return 0; } @@ -1264,8 +1277,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, * bytes in this file, and to maintain the list of inodes that * have pending delalloc work to be done. */ -static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, - unsigned long old, unsigned long bits) +static int btrfs_set_bit_hook(struct inode *inode, + struct extent_state *state, int *bits) { /* @@ -1273,17 +1286,18 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, * but in this case, we are only testeing for the DELALLOC * bit, which is only set or cleared with irqs on */ - if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { + if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; + u64 len = state->end + 1 - state->start; - spin_lock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->outstanding_extents++; - spin_unlock(&BTRFS_I(inode)->accounting_lock); - btrfs_delalloc_reserve_space(root, inode, end - start + 1); + if (*bits & EXTENT_FIRST_DELALLOC) + *bits &= ~EXTENT_FIRST_DELALLOC; + else + atomic_inc(&BTRFS_I(inode)->outstanding_extents); spin_lock(&root->fs_info->delalloc_lock); - BTRFS_I(inode)->delalloc_bytes += end - start + 1; - root->fs_info->delalloc_bytes += end - start + 1; + BTRFS_I(inode)->delalloc_bytes += len; + root->fs_info->delalloc_bytes += len; if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { list_add_tail(&BTRFS_I(inode)->delalloc_inodes, &root->fs_info->delalloc_inodes); @@ -1297,45 +1311,32 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, * extent_io.c clear_bit_hook, see set_bit_hook for why */ static int btrfs_clear_bit_hook(struct inode *inode, - struct extent_state *state, unsigned long bits) + struct extent_state *state, int *bits) { /* * set_bit and clear bit hooks normally require _irqsave/restore * but in this case, we are only testeing for the DELALLOC * bit, which is only set or cleared with irqs on */ - if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { + if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; + u64 len = state->end + 1 - state->start; - if (bits & EXTENT_DO_ACCOUNTING) { - spin_lock(&BTRFS_I(inode)->accounting_lock); - WARN_ON(!BTRFS_I(inode)->outstanding_extents); - BTRFS_I(inode)->outstanding_extents--; - spin_unlock(&BTRFS_I(inode)->accounting_lock); - btrfs_unreserve_metadata_for_delalloc(root, inode, 1); - } + if (*bits & EXTENT_FIRST_DELALLOC) + *bits &= ~EXTENT_FIRST_DELALLOC; + else if (!(*bits & EXTENT_DO_ACCOUNTING)) + atomic_dec(&BTRFS_I(inode)->outstanding_extents); + + if (*bits & EXTENT_DO_ACCOUNTING) + btrfs_delalloc_release_metadata(inode, len); + + if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) + btrfs_free_reserved_data_space(inode, len); spin_lock(&root->fs_info->delalloc_lock); - if (state->end - state->start + 1 > - root->fs_info->delalloc_bytes) { - printk(KERN_INFO "btrfs warning: delalloc account " - "%llu %llu\n", - (unsigned long long) - state->end - state->start + 1, - (unsigned long long) - root->fs_info->delalloc_bytes); - btrfs_delalloc_free_space(root, inode, (u64)-1); - root->fs_info->delalloc_bytes = 0; - BTRFS_I(inode)->delalloc_bytes = 0; - } else { - btrfs_delalloc_free_space(root, inode, - state->end - - state->start + 1); - root->fs_info->delalloc_bytes -= state->end - - state->start + 1; - BTRFS_I(inode)->delalloc_bytes -= state->end - - state->start + 1; - } + root->fs_info->delalloc_bytes -= len; + BTRFS_I(inode)->delalloc_bytes -= len; + if (BTRFS_I(inode)->delalloc_bytes == 0 && !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { list_del_init(&BTRFS_I(inode)->delalloc_inodes); @@ -1384,7 +1385,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, */ static int __btrfs_submit_bio_start(struct inode *inode, int rw, struct bio *bio, int mirror_num, - unsigned long bio_flags) + unsigned long bio_flags, + u64 bio_offset) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; @@ -1403,7 +1405,8 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw, * are inserted into the btree */ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, - int mirror_num, unsigned long bio_flags) + int mirror_num, unsigned long bio_flags, + u64 bio_offset) { struct btrfs_root *root = BTRFS_I(inode)->root; return btrfs_map_bio(root, rw, bio, mirror_num, 1); @@ -1414,7 +1417,8 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, * on write, or reading the csums from the tree before a read */ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, - int mirror_num, unsigned long bio_flags) + int mirror_num, unsigned long bio_flags, + u64 bio_offset) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret = 0; @@ -1439,7 +1443,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, /* we're doing a write, do the async checksumming */ return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, inode, rw, bio, mirror_num, - bio_flags, __btrfs_submit_bio_start, + bio_flags, bio_offset, + __btrfs_submit_bio_start, __btrfs_submit_bio_done); } @@ -1520,6 +1525,7 @@ again: goto again; } + BUG(); btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); ClearPageChecked(page); out: @@ -1650,7 +1656,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) { struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_trans_handle *trans; + struct btrfs_trans_handle *trans = NULL; struct btrfs_ordered_extent *ordered_extent = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_state *cached_state = NULL; @@ -1668,9 +1674,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); if (!ret) { trans = btrfs_join_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); - btrfs_end_transaction(trans, root); } goto out; } @@ -1680,6 +1687,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) 0, &cached_state, GFP_NOFS); trans = btrfs_join_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + trans->block_rsv = &root->fs_info->delalloc_block_rsv; if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) compressed = 1; @@ -1711,12 +1720,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); - /* this also removes the ordered extent from the tree */ btrfs_ordered_update_i_size(inode, 0, ordered_extent); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); - btrfs_end_transaction(trans, root); out: + btrfs_delalloc_release_metadata(inode, ordered_extent->len); + if (trans) + btrfs_end_transaction(trans, root); /* once for us */ btrfs_put_ordered_extent(ordered_extent); /* once for the tree */ @@ -1838,7 +1848,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, failrec->last_mirror, - failrec->bio_flags); + failrec->bio_flags, 0); return 0; } @@ -1993,32 +2003,196 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) } /* + * calculate extra metadata reservation when snapshotting a subvolume + * contains orphan files. + */ +void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, + struct btrfs_pending_snapshot *pending, + u64 *bytes_to_reserve) +{ + struct btrfs_root *root; + struct btrfs_block_rsv *block_rsv; + u64 num_bytes; + int index; + + root = pending->root; + if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) + return; + + block_rsv = root->orphan_block_rsv; + + /* orphan block reservation for the snapshot */ + num_bytes = block_rsv->size; + + /* + * after the snapshot is created, COWing tree blocks may use more + * space than it frees. So we should make sure there is enough + * reserved space. + */ + index = trans->transid & 0x1; + if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { + num_bytes += block_rsv->size - + (block_rsv->reserved + block_rsv->freed[index]); + } + + *bytes_to_reserve += num_bytes; +} + +void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, + struct btrfs_pending_snapshot *pending) +{ + struct btrfs_root *root = pending->root; + struct btrfs_root *snap = pending->snap; + struct btrfs_block_rsv *block_rsv; + u64 num_bytes; + int index; + int ret; + + if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) + return; + + /* refill source subvolume's orphan block reservation */ + block_rsv = root->orphan_block_rsv; + index = trans->transid & 0x1; + if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { + num_bytes = block_rsv->size - + (block_rsv->reserved + block_rsv->freed[index]); + ret = btrfs_block_rsv_migrate(&pending->block_rsv, + root->orphan_block_rsv, + num_bytes); + BUG_ON(ret); + } + + /* setup orphan block reservation for the snapshot */ + block_rsv = btrfs_alloc_block_rsv(snap); + BUG_ON(!block_rsv); + + btrfs_add_durable_block_rsv(root->fs_info, block_rsv); + snap->orphan_block_rsv = block_rsv; + + num_bytes = root->orphan_block_rsv->size; + ret = btrfs_block_rsv_migrate(&pending->block_rsv, + block_rsv, num_bytes); + BUG_ON(ret); + +#if 0 + /* insert orphan item for the snapshot */ + WARN_ON(!root->orphan_item_inserted); + ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, + snap->root_key.objectid); + BUG_ON(ret); + snap->orphan_item_inserted = 1; +#endif +} + +enum btrfs_orphan_cleanup_state { + ORPHAN_CLEANUP_STARTED = 1, + ORPHAN_CLEANUP_DONE = 2, +}; + +/* + * This is called in transaction commmit time. If there are no orphan + * files in the subvolume, it removes orphan item and frees block_rsv + * structure. + */ +void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + int ret; + + if (!list_empty(&root->orphan_list) || + root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) + return; + + if (root->orphan_item_inserted && + btrfs_root_refs(&root->root_item) > 0) { + ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, + root->root_key.objectid); + BUG_ON(ret); + root->orphan_item_inserted = 0; + } + + if (root->orphan_block_rsv) { + WARN_ON(root->orphan_block_rsv->size > 0); + btrfs_free_block_rsv(root, root->orphan_block_rsv); + root->orphan_block_rsv = NULL; + } +} + +/* * This creates an orphan entry for the given inode in case something goes * wrong in the middle of an unlink/truncate. + * + * NOTE: caller of this function should reserve 5 units of metadata for + * this function. */ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; - int ret = 0; + struct btrfs_block_rsv *block_rsv = NULL; + int reserve = 0; + int insert = 0; + int ret; - spin_lock(&root->list_lock); + if (!root->orphan_block_rsv) { + block_rsv = btrfs_alloc_block_rsv(root); + BUG_ON(!block_rsv); + } - /* already on the orphan list, we're good */ - if (!list_empty(&BTRFS_I(inode)->i_orphan)) { - spin_unlock(&root->list_lock); - return 0; + spin_lock(&root->orphan_lock); + if (!root->orphan_block_rsv) { + root->orphan_block_rsv = block_rsv; + } else if (block_rsv) { + btrfs_free_block_rsv(root, block_rsv); + block_rsv = NULL; + } + + if (list_empty(&BTRFS_I(inode)->i_orphan)) { + list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); +#if 0 + /* + * For proper ENOSPC handling, we should do orphan + * cleanup when mounting. But this introduces backward + * compatibility issue. + */ + if (!xchg(&root->orphan_item_inserted, 1)) + insert = 2; + else + insert = 1; +#endif + insert = 1; + } else { + WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved); } - list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); + if (!BTRFS_I(inode)->orphan_meta_reserved) { + BTRFS_I(inode)->orphan_meta_reserved = 1; + reserve = 1; + } + spin_unlock(&root->orphan_lock); - spin_unlock(&root->list_lock); + if (block_rsv) + btrfs_add_durable_block_rsv(root->fs_info, block_rsv); - /* - * insert an orphan item to track this unlinked/truncated file - */ - ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); + /* grab metadata reservation from transaction handle */ + if (reserve) { + ret = btrfs_orphan_reserve_metadata(trans, inode); + BUG_ON(ret); + } - return ret; + /* insert an orphan item to track this unlinked/truncated file */ + if (insert >= 1) { + ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); + BUG_ON(ret); + } + + /* insert an orphan item to track subvolume contains orphan files */ + if (insert >= 2) { + ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, + root->root_key.objectid); + BUG_ON(ret); + } + return 0; } /* @@ -2028,26 +2202,31 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; + int delete_item = 0; + int release_rsv = 0; int ret = 0; - spin_lock(&root->list_lock); - - if (list_empty(&BTRFS_I(inode)->i_orphan)) { - spin_unlock(&root->list_lock); - return 0; + spin_lock(&root->orphan_lock); + if (!list_empty(&BTRFS_I(inode)->i_orphan)) { + list_del_init(&BTRFS_I(inode)->i_orphan); + delete_item = 1; } - list_del_init(&BTRFS_I(inode)->i_orphan); - if (!trans) { - spin_unlock(&root->list_lock); - return 0; + if (BTRFS_I(inode)->orphan_meta_reserved) { + BTRFS_I(inode)->orphan_meta_reserved = 0; + release_rsv = 1; } + spin_unlock(&root->orphan_lock); - spin_unlock(&root->list_lock); + if (trans && delete_item) { + ret = btrfs_del_orphan_item(trans, root, inode->i_ino); + BUG_ON(ret); + } - ret = btrfs_del_orphan_item(trans, root, inode->i_ino); + if (release_rsv) + btrfs_orphan_release_metadata(inode); - return ret; + return 0; } /* @@ -2064,7 +2243,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) struct inode *inode; int ret = 0, nr_unlink = 0, nr_truncate = 0; - if (!xchg(&root->clean_orphans, 0)) + if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) return; path = btrfs_alloc_path(); @@ -2117,16 +2296,15 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); - if (IS_ERR(inode)) - break; + BUG_ON(IS_ERR(inode)); /* * add this inode to the orphan list so btrfs_orphan_del does * the proper thing when we hit it */ - spin_lock(&root->list_lock); + spin_lock(&root->orphan_lock); list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); - spin_unlock(&root->list_lock); + spin_unlock(&root->orphan_lock); /* * if this is a bad inode, means we actually succeeded in @@ -2135,7 +2313,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) * do a destroy_inode */ if (is_bad_inode(inode)) { - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); btrfs_orphan_del(trans, inode); btrfs_end_transaction(trans, root); iput(inode); @@ -2153,13 +2331,23 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) /* this will do delete_inode and everything for us */ iput(inode); } + btrfs_free_path(path); + + root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; + + if (root->orphan_block_rsv) + btrfs_block_rsv_release(root, root->orphan_block_rsv, + (u64)-1); + + if (root->orphan_block_rsv || root->orphan_item_inserted) { + trans = btrfs_join_transaction(root, 1); + btrfs_end_transaction(trans, root); + } if (nr_unlink) printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); if (nr_truncate) printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); - - btrfs_free_path(path); } /* @@ -2478,29 +2666,201 @@ out: return ret; } -static int btrfs_unlink(struct inode *dir, struct dentry *dentry) +/* helper to check if there is any shared block in the path */ +static int check_path_shared(struct btrfs_root *root, + struct btrfs_path *path) +{ + struct extent_buffer *eb; + int level; + int ret; + u64 refs = 1; + + for (level = 0; level < BTRFS_MAX_LEVEL; level++) { + if (!path->nodes[level]) + break; + eb = path->nodes[level]; + if (!btrfs_block_can_be_shared(root, eb)) + continue; + ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len, + &refs, NULL); + if (refs > 1) + return 1; + } + return 0; +} + +/* + * helper to start transaction for unlink and rmdir. + * + * unlink and rmdir are special in btrfs, they do not always free space. + * so in enospc case, we should make sure they will free space before + * allowing them to use the global metadata reservation. + */ +static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, + struct dentry *dentry) { - struct btrfs_root *root; struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_path *path; + struct btrfs_inode_ref *ref; + struct btrfs_dir_item *di; struct inode *inode = dentry->d_inode; + u64 index; + int check_link = 1; + int err = -ENOSPC; int ret; - unsigned long nr = 0; - root = BTRFS_I(dir)->root; + trans = btrfs_start_transaction(root, 10); + if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) + return trans; - /* - * 5 items for unlink inode - * 1 for orphan - */ - ret = btrfs_reserve_metadata_space(root, 6); - if (ret) - return ret; + if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) + return ERR_PTR(-ENOSPC); + + /* check if there is someone else holds reference */ + if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1) + return ERR_PTR(-ENOSPC); - trans = btrfs_start_transaction(root, 1); + if (atomic_read(&inode->i_count) > 2) + return ERR_PTR(-ENOSPC); + + if (xchg(&root->fs_info->enospc_unlink, 1)) + return ERR_PTR(-ENOSPC); + + path = btrfs_alloc_path(); + if (!path) { + root->fs_info->enospc_unlink = 0; + return ERR_PTR(-ENOMEM); + } + + trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { - btrfs_unreserve_metadata_space(root, 6); - return PTR_ERR(trans); + btrfs_free_path(path); + root->fs_info->enospc_unlink = 0; + return trans; + } + + path->skip_locking = 1; + path->search_commit_root = 1; + + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(dir)->location, 0); + if (ret < 0) { + err = ret; + goto out; + } + if (ret == 0) { + if (check_path_shared(root, path)) + goto out; + } else { + check_link = 0; + } + btrfs_release_path(root, path); + + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(inode)->location, 0); + if (ret < 0) { + err = ret; + goto out; + } + if (ret == 0) { + if (check_path_shared(root, path)) + goto out; + } else { + check_link = 0; + } + btrfs_release_path(root, path); + + if (ret == 0 && S_ISREG(inode->i_mode)) { + ret = btrfs_lookup_file_extent(trans, root, path, + inode->i_ino, (u64)-1, 0); + if (ret < 0) { + err = ret; + goto out; + } + BUG_ON(ret == 0); + if (check_path_shared(root, path)) + goto out; + btrfs_release_path(root, path); + } + + if (!check_link) { + err = 0; + goto out; + } + + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + dentry->d_name.name, dentry->d_name.len, 0); + if (IS_ERR(di)) { + err = PTR_ERR(di); + goto out; + } + if (di) { + if (check_path_shared(root, path)) + goto out; + } else { + err = 0; + goto out; } + btrfs_release_path(root, path); + + ref = btrfs_lookup_inode_ref(trans, root, path, + dentry->d_name.name, dentry->d_name.len, + inode->i_ino, dir->i_ino, 0); + if (IS_ERR(ref)) { + err = PTR_ERR(ref); + goto out; + } + BUG_ON(!ref); + if (check_path_shared(root, path)) + goto out; + index = btrfs_inode_ref_index(path->nodes[0], ref); + btrfs_release_path(root, path); + + di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, + dentry->d_name.name, dentry->d_name.len, 0); + if (IS_ERR(di)) { + err = PTR_ERR(di); + goto out; + } + BUG_ON(ret == -ENOENT); + if (check_path_shared(root, path)) + goto out; + + err = 0; +out: + btrfs_free_path(path); + if (err) { + btrfs_end_transaction(trans, root); + root->fs_info->enospc_unlink = 0; + return ERR_PTR(err); + } + + trans->block_rsv = &root->fs_info->global_block_rsv; + return trans; +} + +static void __unlink_end_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + if (trans->block_rsv == &root->fs_info->global_block_rsv) { + BUG_ON(!root->fs_info->enospc_unlink); + root->fs_info->enospc_unlink = 0; + } + btrfs_end_transaction_throttle(trans, root); +} + +static int btrfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_trans_handle *trans; + struct inode *inode = dentry->d_inode; + int ret; + unsigned long nr = 0; + + trans = __unlink_start_trans(dir, dentry); + if (IS_ERR(trans)) + return PTR_ERR(trans); btrfs_set_trans_block_group(trans, dir); @@ -2508,14 +2868,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, dentry->d_name.name, dentry->d_name.len); + BUG_ON(ret); - if (inode->i_nlink == 0) + if (inode->i_nlink == 0) { ret = btrfs_orphan_add(trans, inode); + BUG_ON(ret); + } nr = trans->blocks_used; - - btrfs_end_transaction_throttle(trans, root); - btrfs_unreserve_metadata_space(root, 6); + __unlink_end_trans(trans, root); btrfs_btree_balance_dirty(root, nr); return ret; } @@ -2587,7 +2948,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; int err = 0; - int ret; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_trans_handle *trans; unsigned long nr = 0; @@ -2596,15 +2956,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) return -ENOTEMPTY; - ret = btrfs_reserve_metadata_space(root, 5); - if (ret) - return ret; - - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - btrfs_unreserve_metadata_space(root, 5); + trans = __unlink_start_trans(dir, dentry); + if (IS_ERR(trans)) return PTR_ERR(trans); - } btrfs_set_trans_block_group(trans, dir); @@ -2627,12 +2981,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) btrfs_i_size_write(inode, 0); out: nr = trans->blocks_used; - ret = btrfs_end_transaction_throttle(trans, root); - btrfs_unreserve_metadata_space(root, 5); + __unlink_end_trans(trans, root); btrfs_btree_balance_dirty(root, nr); - if (ret && !err) - err = ret; return err; } @@ -3029,6 +3380,7 @@ out: if (pending_del_nr) { ret = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); + BUG_ON(ret); } btrfs_free_path(path); return err; @@ -3056,11 +3408,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if ((offset & (blocksize - 1)) == 0) goto out; - ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); - if (ret) - goto out; - - ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); + ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); if (ret) goto out; @@ -3068,8 +3416,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) again: page = grab_cache_page(mapping, index); if (!page) { - btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); - btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); goto out; } @@ -3132,8 +3479,7 @@ again: out_unlock: if (ret) - btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); - btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); unlock_page(page); page_cache_release(page); out: @@ -3145,7 +3491,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct extent_map *em; + struct extent_map *em = NULL; struct extent_state *cached_state = NULL; u64 mask = root->sectorsize - 1; u64 hole_start = (inode->i_size + mask) & ~mask; @@ -3183,11 +3529,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) u64 hint_byte = 0; hole_size = last_byte - cur_offset; - err = btrfs_reserve_metadata_space(root, 2); - if (err) + trans = btrfs_start_transaction(root, 2); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); break; - - trans = btrfs_start_transaction(root, 1); + } btrfs_set_trans_block_group(trans, inode); err = btrfs_drop_extents(trans, inode, cur_offset, @@ -3205,14 +3551,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) last_byte - 1, 0); btrfs_end_transaction(trans, root); - btrfs_unreserve_metadata_space(root, 2); } free_extent_map(em); + em = NULL; cur_offset = last_byte; if (cur_offset >= block_end) break; } + free_extent_map(em); unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, GFP_NOFS); return err; @@ -3239,11 +3586,10 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) } } - ret = btrfs_reserve_metadata_space(root, 1); - if (ret) - return ret; + trans = btrfs_start_transaction(root, 5); + if (IS_ERR(trans)) + return PTR_ERR(trans); - trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_orphan_add(trans, inode); @@ -3251,7 +3597,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) nr = trans->blocks_used; btrfs_end_transaction(trans, root); - btrfs_unreserve_metadata_space(root, 1); btrfs_btree_balance_dirty(root, nr); if (attr->ia_size > inode->i_size) { @@ -3264,8 +3609,11 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) i_size_write(inode, attr->ia_size); btrfs_ordered_update_i_size(inode, inode->i_size, NULL); - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); + BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); + trans->block_rsv = root->orphan_block_rsv; + BUG_ON(!trans->block_rsv); ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); @@ -3345,10 +3693,21 @@ void btrfs_delete_inode(struct inode *inode) btrfs_i_size_write(inode, 0); while (1) { - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); + BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); - ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); + trans->block_rsv = root->orphan_block_rsv; + ret = btrfs_block_rsv_check(trans, root, + root->orphan_block_rsv, 0, 5); + if (ret) { + BUG_ON(ret != -EAGAIN); |