diff options
38 files changed, 1630 insertions, 702 deletions
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index d61feca7945..310a7f6d09b 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -19,7 +19,7 @@ #ifndef __BTRFS_BACKREF__ #define __BTRFS_BACKREF__ -#include "ioctl.h" +#include <linux/btrfs.h> #include "ulist.h" #include "extent_io.h" diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 2a8c242bc4f..d9b97d4960e 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -40,6 +40,8 @@ #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 #define BTRFS_INODE_NEEDS_FULL_SYNC 7 #define BTRFS_INODE_COPY_EVERYTHING 8 +#define BTRFS_INODE_IN_DELALLOC_LIST 9 +#define BTRFS_INODE_READDIO_NEED_LOCK 10 /* in memory btrfs inode */ struct btrfs_inode { @@ -216,4 +218,22 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) return 0; } +/* + * Disable DIO read nolock optimization, so new dio readers will be forced + * to grab i_mutex. It is used to avoid the endless truncate due to + * nonlocked dio read. + */ +static inline void btrfs_inode_block_unlocked_dio(struct inode *inode) +{ + set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags); + smp_mb(); +} + +static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode) +{ + smp_mb__before_clear_bit(); + clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, + &BTRFS_I(inode)->runtime_flags); +} + #endif diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 11d47bfb62b..18af6f48781 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -813,8 +813,7 @@ static int btrfsic_process_superblock_dev_mirror( (bh->b_data + (dev_bytenr & 4095)); if (btrfs_super_bytenr(super_tmp) != dev_bytenr || - strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, - sizeof(super_tmp->magic)) || + super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) || memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || btrfs_super_nodesize(super_tmp) != state->metablock_size || btrfs_super_leafsize(super_tmp) != state->metablock_size || diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index eea5da7a2b9..ecd25a1b4e5 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1138,6 +1138,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, switch (tm->op) { case MOD_LOG_KEY_REMOVE_WHILE_FREEING: BUG_ON(tm->slot < n); + /* Fallthrough */ case MOD_LOG_KEY_REMOVE_WHILE_MOVING: case MOD_LOG_KEY_REMOVE: btrfs_set_node_key(eb, &tm->key, tm->slot); @@ -1222,7 +1223,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, __tree_mod_log_rewind(eb_rewin, time_seq, tm); WARN_ON(btrfs_header_nritems(eb_rewin) > - BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root)); + BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root)); return eb_rewin; } @@ -1441,7 +1442,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) */ int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, - int start_slot, int cache_only, u64 *last_ret, + int start_slot, u64 *last_ret, struct btrfs_key *progress) { struct extent_buffer *cur; @@ -1461,8 +1462,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_disk_key disk_key; parent_level = btrfs_header_level(parent); - if (cache_only && parent_level != 1) - return 0; WARN_ON(trans->transaction != root->fs_info->running_transaction); WARN_ON(trans->transid != root->fs_info->generation); @@ -1508,10 +1507,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, else uptodate = 0; if (!cur || !uptodate) { - if (cache_only) { - free_extent_buffer(cur); - continue; - } if (!cur) { cur = read_tree_block(root, blocknr, blocksize, gen); @@ -4825,8 +4820,8 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) /* * A helper function to walk down the tree starting at min_key, and looking - * for nodes or leaves that are either in cache or have a minimum - * transaction id. This is used by the btree defrag code, and tree logging + * for nodes or leaves that are have a minimum transaction id. + * This is used by the btree defrag code, and tree logging * * This does not cow, but it does stuff the starting key it finds back * into min_key, so you can call btrfs_search_slot with cow=1 on the @@ -4847,7 +4842,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) */ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, struct btrfs_key *max_key, - struct btrfs_path *path, int cache_only, + struct btrfs_path *path, u64 min_trans) { struct extent_buffer *cur; @@ -4887,15 +4882,12 @@ again: if (sret && slot > 0) slot--; /* - * check this node pointer against the cache_only and - * min_trans parameters. If it isn't in cache or is too - * old, skip to the next one. + * check this node pointer against the min_trans parameters. + * If it is too old, old, skip to the next one. */ while (slot < nritems) { u64 blockptr; u64 gen; - struct extent_buffer *tmp; - struct btrfs_disk_key disk_key; blockptr = btrfs_node_blockptr(cur, slot); gen = btrfs_node_ptr_generation(cur, slot); @@ -4903,27 +4895,7 @@ again: slot++; continue; } - if (!cache_only) - break; - - if (max_key) { - btrfs_node_key(cur, &disk_key, slot); - if (comp_keys(&disk_key, max_key) >= 0) { - ret = 1; - goto out; - } - } - - tmp = btrfs_find_tree_block(root, blockptr, - btrfs_level_size(root, level - 1)); - - if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) { - free_extent_buffer(tmp); - break; - } - if (tmp) - free_extent_buffer(tmp); - slot++; + break; } find_next_key: /* @@ -4934,7 +4906,7 @@ find_next_key: path->slots[level] = slot; btrfs_set_path_blocking(path); sret = btrfs_find_next_key(root, path, min_key, level, - cache_only, min_trans); + min_trans); if (sret == 0) { btrfs_release_path(path); goto again; @@ -5399,8 +5371,7 @@ out: /* * this is similar to btrfs_next_leaf, but does not try to preserve * and fixup the path. It looks for and returns the next key in the - * tree based on the current path and the cache_only and min_trans - * parameters. + * tree based on the current path and the min_trans parameters. * * 0 is returned if another key is found, < 0 if there are any errors * and 1 is returned if there are no higher keys in the tree @@ -5409,8 +5380,7 @@ out: * calling this function. */ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, - struct btrfs_key *key, int level, - int cache_only, u64 min_trans) + struct btrfs_key *key, int level, u64 min_trans) { int slot; struct extent_buffer *c; @@ -5461,22 +5431,8 @@ next: if (level == 0) btrfs_item_key_to_cpu(c, key, slot); else { - u64 blockptr = btrfs_node_blockptr(c, slot); u64 gen = btrfs_node_ptr_generation(c, slot); - if (cache_only) { - struct extent_buffer *cur; - cur = btrfs_find_tree_block(root, blockptr, - btrfs_level_size(root, level - 1)); - if (!cur || - btrfs_buffer_uptodate(cur, gen, 1) <= 0) { - slot++; - if (cur) - free_extent_buffer(cur); - goto next; - } - free_extent_buffer(cur); - } if (gen < min_trans) { slot++; goto next; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 547b7b05727..1679051f4d3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -31,10 +31,10 @@ #include <trace/events/btrfs.h> #include <asm/kmap_types.h> #include <linux/pagemap.h> +#include <linux/btrfs.h> #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" -#include "ioctl.h" struct btrfs_trans_handle; struct btrfs_transaction; @@ -46,7 +46,7 @@ extern struct kmem_cache *btrfs_path_cachep; extern struct kmem_cache *btrfs_free_space_cachep; struct btrfs_ordered_sum; -#define BTRFS_MAGIC "_BHRfS_M" +#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ #define BTRFS_MAX_MIRRORS 3 @@ -191,6 +191,8 @@ static int btrfs_csum_sizes[] = { 4, 0 }; /* ioprio of readahead is set to idle */ #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) +#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024) + /* * The key defines the order in the tree, and so it also defines (optimal) * block layout. @@ -336,7 +338,9 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) /* * File system states */ +#define BTRFS_FS_STATE_ERROR 0 +/* Super block flags */ /* Errors detected */ #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) @@ -953,7 +957,15 @@ struct btrfs_dev_replace_item { #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) #define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE -#define BTRFS_NR_RAID_TYPES 5 + +enum btrfs_raid_types { + BTRFS_RAID_RAID10, + BTRFS_RAID_RAID1, + BTRFS_RAID_DUP, + BTRFS_RAID_RAID0, + BTRFS_RAID_SINGLE, + BTRFS_NR_RAID_TYPES +}; #define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ BTRFS_BLOCK_GROUP_SYSTEM | \ @@ -1225,6 +1237,11 @@ struct seq_list { u64 seq; }; +enum btrfs_orphan_cleanup_state { + ORPHAN_CLEANUP_STARTED = 1, + ORPHAN_CLEANUP_DONE = 2, +}; + /* fs_info */ struct reloc_control; struct btrfs_device; @@ -1250,6 +1267,7 @@ struct btrfs_fs_info { /* block group cache stuff */ spinlock_t block_group_cache_lock; + u64 first_logical_byte; struct rb_root block_group_cache_tree; /* keep track of unallocated space */ @@ -1288,7 +1306,23 @@ struct btrfs_fs_info { u64 last_trans_log_full_commit; unsigned long mount_opt; unsigned long compress_type:4; + /* + * It is a suggestive number, the read side is safe even it gets a + * wrong number because we will write out the data into a regular + * extent. The write side(mount/remount) is under ->s_umount lock, + * so it is also safe. + */ u64 max_inline; + /* + * Protected by ->chunk_mutex and sb->s_umount. + * + * The reason that we use two lock to protect it is because only + * remount and mount operations can change it and these two operations + * are under sb->s_umount, but the read side (chunk allocation) can not + * acquire sb->s_umount or the deadlock would happen. So we use two + * locks to protect it. On the write side, we must acquire two locks, + * and on the read side, we just need acquire one of them. + */ u64 alloc_start; struct btrfs_transaction *running_transaction; wait_queue_head_t transaction_throttle; @@ -1365,6 +1399,7 @@ struct btrfs_fs_info { */ struct list_head ordered_extents; + spinlock_t delalloc_lock; /* * all of the inodes that have delalloc bytes. It is possible for * this list to be empty even when there is still dirty data=ordered @@ -1373,13 +1408,6 @@ struct btrfs_fs_info { struct list_head delalloc_inodes; /* - * special rename and truncate targets that must be on disk before - * we're allowed to commit. This is basically the ext3 style - * data=ordered list. - */ - struct list_head ordered_operations; - - /* * there is a pool of worker threads for checksumming during writes * and a pool for checksumming after reads. This is because readers * can run with FS locks held, and the writers may be waiting for @@ -1423,10 +1451,12 @@ struct btrfs_fs_info { u64 total_pinned; - /* protected by the delalloc lock, used to keep from writing - * metadata until there is a nice batch - */ - u64 dirty_metadata_bytes; + /* used to keep from writing metadata until there is a nice batch */ + struct percpu_counter dirty_metadata_bytes; + struct percpu_counter delalloc_bytes; + s32 dirty_metadata_batch; + s32 delalloc_batch; + struct list_head dirty_cowonly_roots; struct btrfs_fs_devices *fs_devices; @@ -1442,9 +1472,6 @@ struct btrfs_fs_info { struct reloc_control *reloc_ctl; - spinlock_t delalloc_lock; - u64 delalloc_bytes; - /* data_alloc_cluster is only used in ssd mode */ struct btrfs_free_cluster data_alloc_cluster; @@ -1456,6 +1483,8 @@ struct btrfs_fs_info { struct rb_root defrag_inodes; atomic_t defrag_running; + /* Used to protect avail_{data, metadata, system}_alloc_bits */ + seqlock_t profiles_lock; /* * these three are in extended format (availability of single * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other @@ -1520,7 +1549,7 @@ struct btrfs_fs_info { u64 qgroup_seq; /* filesystem state */ - u64 fs_state; + unsigned long fs_state; struct btrfs_delayed_root *delayed_root; @@ -1623,6 +1652,9 @@ struct btrfs_root { struct list_head root_list; + spinlock_t log_extents_lock[2]; + struct list_head logged_list[2]; + spinlock_t orphan_lock; atomic_t orphan_inodes; struct btrfs_block_rsv *orphan_block_rsv; @@ -2936,8 +2968,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, u64 num_bytes, u64 *refs, u64 *flags); int btrfs_pin_extent(struct btrfs_root *root, u64 bytenr, u64 num, int reserved); -int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, - struct btrfs_root *root, +int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, u64 bytenr, u64 num_bytes); int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -3092,10 +3123,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root); struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, int lowest_level, - int cache_only, u64 min_trans); + u64 min_trans); int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, struct btrfs_key *max_key, - struct btrfs_path *path, int cache_only, + struct btrfs_path *path, u64 min_trans); enum btrfs_compare_tree_result { BTRFS_COMPARE_TREE_NEW, @@ -3148,7 +3179,7 @@ int btrfs_search_slot_for_read(struct btrfs_root *root, int find_higher, int return_any); int btrfs_realloc_node(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *parent, - int start_slot, int cache_only, u64 *last_ret, + int start_slot, u64 *last_ret, struct btrfs_key *progress); void btrfs_release_path(struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); @@ -3543,7 +3574,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, /* tree-defrag.c */ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, - struct btrfs_root *root, int cache_only); + struct btrfs_root *root); /* sysfs.c */ int btrfs_init_sysfs(void); @@ -3620,11 +3651,14 @@ __printf(5, 6) void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, unsigned int line, int errno, const char *fmt, ...); +/* + * If BTRFS_MOUNT_PANIC_ON_FATAL_ERROR is in mount_opt, __btrfs_panic + * will panic(). Otherwise we BUG() here. + */ #define btrfs_panic(fs_info, errno, fmt, args...) \ do { \ - struct btrfs_fs_info *_i = (fs_info); \ - __btrfs_panic(_i, __func__, __LINE__, errno, fmt, ##args); \ - BUG_ON(!(_i->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)); \ + __btrfs_panic(fs_info, __func__, __LINE__, errno, fmt, ##args); \ + BUG(); \ } while (0) /* acl.c */ @@ -3745,4 +3779,11 @@ static inline int is_fstree(u64 rootid) return 1; return 0; } + +static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info) +{ + return signal_pending(current); +} + + #endif diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 34836036f01..0b278b117cb 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -875,7 +875,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, struct btrfs_delayed_item *delayed_item) { struct extent_buffer *leaf; - struct btrfs_item *item; char *ptr; int ret; @@ -886,7 +885,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; - item = btrfs_item_nr(leaf, path->slots[0]); ptr = btrfs_item_ptr(leaf, path->slots[0], char); write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr, @@ -1065,32 +1063,25 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node) } } -static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_delayed_node *node) +static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_delayed_node *node) { struct btrfs_key key; struct btrfs_inode_item *inode_item; struct extent_buffer *leaf; int ret; - mutex_lock(&node->mutex); - if (!node->inode_dirty) { - mutex_unlock(&node->mutex); - return 0; - } - key.objectid = node->inode_id; btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; + ret = btrfs_lookup_inode(trans, root, path, &key, 1); if (ret > 0) { btrfs_release_path(path); - mutex_unlock(&node->mutex); return -ENOENT; } else if (ret < 0) { - mutex_unlock(&node->mutex); return ret; } @@ -1105,11 +1096,47 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, btrfs_delayed_inode_release_metadata(root, node); btrfs_release_delayed_inode(node); - mutex_unlock(&node->mutex); return 0; } +static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_delayed_node *node) +{ + int ret; + + mutex_lock(&node->mutex); + if (!node->inode_dirty) { + mutex_unlock(&node->mutex); + return 0; + } + + ret = __btrfs_update_delayed_inode(trans, root, path, node); + mutex_unlock(&node->mutex); + return ret; +} + +static inline int +__btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct btrfs_delayed_node *node) +{ + int ret; + + ret = btrfs_insert_delayed_items(trans, path, node->root, node); + if (ret) + return ret; + + ret = btrfs_delete_delayed_items(trans, path, node->root, node); + if (ret) + return ret; + + ret = btrfs_update_delayed_inode(trans, node->root, path, node); + return ret; +} + /* * Called when committing the transaction. * Returns 0 on success. @@ -1119,7 +1146,6 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, int nr) { - struct btrfs_root *curr_root = root; struct btrfs_delayed_root *delayed_root; struct btrfs_delayed_node *curr_node, *prev_node; struct btrfs_path *path; @@ -1142,15 +1168,8 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, curr_node = btrfs_first_delayed_node(delayed_root); while (curr_node && (!count || (count && nr--))) { - curr_root = curr_node->root; - ret = btrfs_insert_delayed_items(trans, path, curr_root, - curr_node); - if (!ret) - ret = btrfs_delete_delayed_items(trans, path, - curr_root, curr_node); - if (!ret) - ret = btrfs_update_delayed_inode(trans, curr_root, - path, curr_node); + ret = __btrfs_commit_inode_delayed_items(trans, path, + curr_node); if (ret) { btrfs_release_delayed_node(curr_node); curr_node = NULL; @@ -1183,51 +1202,93 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, return __btrfs_run_delayed_items(trans, root, nr); } -static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, - struct btrfs_delayed_node *node) +int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, + struct inode *inode) { + struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); struct btrfs_path *path; struct btrfs_block_rsv *block_rsv; int ret; + if (!delayed_node) + return 0; + + mutex_lock(&delayed_node->mutex); + if (!delayed_node->count) { + mutex_unlock(&delayed_node->mutex); + btrfs_release_delayed_node(delayed_node); + return 0; + } + mutex_unlock(&delayed_node->mutex); + path = btrfs_alloc_path(); if (!path) return -ENOMEM; path->leave_spinning = 1; block_rsv = trans->block_rsv; - trans->block_rsv = &node->root->fs_info->delayed_block_rsv; + trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv; - ret = btrfs_insert_delayed_items(trans, path, node->root, node); - if (!ret) - ret = btrfs_delete_delayed_items(trans, path, node->root, node); - if (!ret) - ret = btrfs_update_delayed_inode(trans, node->root, path, node); - btrfs_free_path(path); + ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node); + btrfs_release_delayed_node(delayed_node); + btrfs_free_path(path); trans->block_rsv = block_rsv; + return ret; } -int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, - struct inode *inode) +int btrfs_commit_inode_delayed_inode(struct inode *inode) { + struct btrfs_trans_handle *trans; struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); + struct btrfs_path *path; + struct btrfs_block_rsv *block_rsv; int ret; if (!delayed_node) return 0; mutex_lock(&delayed_node->mutex); - if (!delayed_node->count) { + if (!delayed_node->inode_dirty) { mutex_unlock(&delayed_node->mutex); btrfs_release_delayed_node(delayed_node); return 0; } mutex_unlock(&delayed_node->mutex); - ret = __btrfs_commit_inode_delayed_items(trans, delayed_node); + trans = btrfs_join_transaction(delayed_node->root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto trans_out; + } + path->leave_spinning = 1; + + block_rsv = trans->block_rsv; + trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv; + + mutex_lock(&delayed_node->mutex); + if (delayed_node->inode_dirty) + ret = __btrfs_update_delayed_inode(trans, delayed_node->root, + path, delayed_node); + else + ret = 0; + mutex_unlock(&delayed_node->mutex); + + btrfs_free_path(path); + trans->block_rsv = block_rsv; +trans_out: + btrfs_end_transaction(trans, delayed_node->root); + btrfs_btree_balance_dirty(delayed_node->root); +out: btrfs_release_delayed_node(delayed_node); + return ret; } @@ -1258,7 +1319,6 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) struct btrfs_root *root; struct btrfs_block_rsv *block_rsv; int need_requeue = 0; - int ret; async_node = container_of(work, struct btrfs_async_delayed_node, work); @@ -1277,14 +1337,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) block_rsv = trans->block_rsv; trans->block_rsv = &root->fs_info->delayed_block_rsv; - ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); - if (!ret) - ret = btrfs_delete_delayed_items(trans, path, root, - delayed_node); - - if (!ret) - btrfs_update_delayed_inode(trans, root, path, delayed_node); - + __btrfs_commit_inode_delayed_items(trans, path, delayed_node); /* * Maybe new delayed items have been inserted, so we need requeue * the work. Besides that, we must dequeue the empty delayed nodes diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 4f808e1baee..78b6ad0fc66 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -117,6 +117,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, /* Used for evicting the inode. */ void btrfs_remove_delayed_node(struct inode *inode); void btrfs_kill_delayed_inode_items(struct inode *inode); +int btrfs_commit_inode_delayed_inode(struct inode *inode); int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index ae941177339..b7a0641ead7 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -23,6 +23,10 @@ #include "delayed-ref.h" #include "transaction.h" +struct kmem_cache *btrfs_delayed_ref_head_cachep; +struct kmem_cache *btrfs_delayed_tree_ref_cachep; +struct kmem_cache *btrfs_delayed_data_ref_cachep; +struct kmem_cache *btrfs_delayed_extent_op_cachep; /* * delayed back reference update tracking. For subvolume trees * we queue up extent allocations and backref maintenance for @@ -422,6 +426,14 @@ again: return 1; } +void btrfs_release_ref_cluster(struct list_head *cluster) +{ + struct list_head *pos, *q; + + list_for_each_safe(pos, q, cluster) + list_del_init(pos); +} + /* * helper function to update an extent delayed ref in the * rbtree. existing and update must both have the same @@ -511,7 +523,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, ref->extent_op->flags_to_set; existing_ref->extent_op->update_flags = 1; } - kfree(ref->extent_op); + btrfs_free_delayed_extent_op(ref->extent_op); } } /* @@ -592,7 +604,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, * we've updated the existing ref, free the newly * allocated ref */ - kfree(head_ref); + kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); } else { delayed_refs->num_heads++; delayed_refs->num_heads_ready++; @@ -653,7 +665,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, * we've updated the existing ref, free the newly * allocated ref |