diff options
Diffstat (limited to 'fs')
70 files changed, 1028 insertions, 591 deletions
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index d5c1401f003..d34896cfb19 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -980,19 +980,11 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, } } -static DEFINE_MUTEX(autofs4_ioctl_mutex); - static long autofs4_root_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { - long ret; struct inode *inode = filp->f_dentry->d_inode; - - mutex_lock(&autofs4_ioctl_mutex); - ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); - mutex_unlock(&autofs4_ioctl_mutex); - - return ret; + return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); } #ifdef CONFIG_COMPAT @@ -1002,13 +994,11 @@ static long autofs4_root_compat_ioctl(struct file *filp, struct inode *inode = filp->f_path.dentry->d_inode; int ret; - mutex_lock(&autofs4_ioctl_mutex); if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL) ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); else ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, (unsigned long)compat_ptr(arg)); - mutex_unlock(&autofs4_ioctl_mutex); return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c547cca26a2..51d2e4de34e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -696,6 +696,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, __btree_submit_bio_done); } +#ifdef CONFIG_MIGRATION static int btree_migratepage(struct address_space *mapping, struct page *newpage, struct page *page) { @@ -712,12 +713,9 @@ static int btree_migratepage(struct address_space *mapping, if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) return -EAGAIN; -#ifdef CONFIG_MIGRATION return migrate_page(mapping, newpage, page); -#else - return -ENOSYS; -#endif } +#endif static int btree_writepage(struct page *page, struct writeback_control *wbc) { @@ -1009,7 +1007,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root, blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); - BUG_ON(!root->node); + if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { + free_extent_buffer(root->node); + return -EIO; + } root->commit_root = btrfs_root_node(root); return 0; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index bcd59c7dfb5..227e5815d83 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -429,6 +429,7 @@ err: static int cache_block_group(struct btrfs_block_group_cache *cache, struct btrfs_trans_handle *trans, + struct btrfs_root *root, int load_cache_only) { struct btrfs_fs_info *fs_info = cache->fs_info; @@ -442,9 +443,12 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, /* * We can't do the read from on-disk cache during a commit since we need - * to have the normal tree locking. + * to have the normal tree locking. Also if we are currently trying to + * allocate blocks for the tree root we can't do the fast caching since + * we likely hold important locks. */ - if (!trans->transaction->in_commit) { + if (!trans->transaction->in_commit && + (root && root != root->fs_info->tree_root)) { spin_lock(&cache->lock); if (cache->cached != BTRFS_CACHE_NO) { spin_unlock(&cache->lock); @@ -2741,6 +2745,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, struct btrfs_root *root = block_group->fs_info->tree_root; struct inode *inode = NULL; u64 alloc_hint = 0; + int dcs = BTRFS_DC_ERROR; int num_pages = 0; int retries = 0; int ret = 0; @@ -2795,6 +2800,8 @@ again: spin_lock(&block_group->lock); if (block_group->cached != BTRFS_CACHE_FINISHED) { + /* We're not cached, don't bother trying to write stuff out */ + dcs = BTRFS_DC_WRITTEN; spin_unlock(&block_group->lock); goto out_put; } @@ -2821,6 +2828,8 @@ again: ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, num_pages, num_pages, &alloc_hint); + if (!ret) + dcs = BTRFS_DC_SETUP; btrfs_free_reserved_data_space(inode, num_pages); out_put: iput(inode); @@ -2828,10 +2837,7 @@ out_free: btrfs_release_path(root, path); out: spin_lock(&block_group->lock); - if (ret) - block_group->disk_cache_state = BTRFS_DC_ERROR; - else - block_group->disk_cache_state = BTRFS_DC_SETUP; + block_group->disk_cache_state = dcs; spin_unlock(&block_group->lock); return ret; @@ -3037,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) { - u64 num_devices = root->fs_info->fs_devices->rw_devices; + /* + * we add in the count of missing devices because we want + * to make sure that any RAID levels on a degraded FS + * continue to be honored. + */ + u64 num_devices = root->fs_info->fs_devices->rw_devices + + root->fs_info->fs_devices->missing_devices; if (num_devices == 1) flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); @@ -4080,7 +4092,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, * space back to the block group, otherwise we will leak space. */ if (!alloc && cache->cached == BTRFS_CACHE_NO) - cache_block_group(cache, trans, 1); + cache_block_group(cache, trans, NULL, 1); byte_in_group = bytenr - cache->key.objectid; WARN_ON(byte_in_group > cache->key.offset); @@ -4930,11 +4942,31 @@ search: btrfs_get_block_group(block_group); search_start = block_group->key.objectid; + /* + * this can happen if we end up cycling through all the + * raid types, but we want to make sure we only allocate + * for the proper type. + */ + if (!block_group_bits(block_group, data)) { + u64 extra = BTRFS_BLOCK_GROUP_DUP | + BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_RAID10; + + /* + * if they asked for extra copies and this block group + * doesn't provide them, bail. This does allow us to + * fill raid0 from raid1. + */ + if ((data & extra) && !(block_group->flags & extra)) + goto loop; + } + have_block_group: if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { u64 free_percent; - ret = cache_block_group(block_group, trans, 1); + ret = cache_block_group(block_group, trans, + orig_root, 1); if (block_group->cached == BTRFS_CACHE_FINISHED) goto have_block_group; @@ -4958,7 +4990,8 @@ have_block_group: if (loop > LOOP_CACHING_NOWAIT || (loop > LOOP_FIND_IDEAL && atomic_read(&space_info->caching_threads) < 2)) { - ret = cache_block_group(block_group, trans, 0); + ret = cache_block_group(block_group, trans, + orig_root, 0); BUG_ON(ret); } found_uncached_bg = true; @@ -5515,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, u64 num_bytes = ins->offset; block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); - cache_block_group(block_group, trans, 0); + cache_block_group(block_group, trans, NULL, 0); caching_ctl = get_caching_control(block_group); if (!caching_ctl) { @@ -6300,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root, NULL, NULL); BUG_ON(ret < 0); if (ret > 0) { - ret = btrfs_del_orphan_item(trans, tree_root, - root->root_key.objectid); - BUG_ON(ret); + /* if we fail to delete the orphan item this time + * around, it'll get picked up the next time. + * + * The most common failure here is just -ENOENT. + */ + btrfs_del_orphan_item(trans, tree_root, + root->root_key.objectid); } } @@ -7878,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; - num_devices = root->fs_info->fs_devices->rw_devices; + /* + * we add in the count of missing devices because we want + * to make sure that any RAID levels on a degraded FS + * continue to be honored. + */ + num_devices = root->fs_info->fs_devices->rw_devices + + root->fs_info->fs_devices->missing_devices; + if (num_devices == 1) { stripped |= BTRFS_BLOCK_GROUP_DUP; stripped = flags & ~stripped; @@ -8247,7 +8291,6 @@ int btrfs_read_block_groups(struct btrfs_root *root) break; if (ret != 0) goto error; - leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); cache = kzalloc(sizeof(*cache), GFP_NOFS); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c1faded5fca..66836d85763 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, struct page **prepared_pages, struct iov_iter *i) { - size_t copied; + size_t copied = 0; int pg = 0; int offset = pos & (PAGE_CACHE_SIZE - 1); + int total_copied = 0; while (write_bytes > 0) { size_t count = min_t(size_t, PAGE_CACHE_SIZE - offset, write_bytes); struct page *page = prepared_pages[pg]; -again: - if (unlikely(iov_iter_fault_in_readable(i, count))) - return -EFAULT; - - /* Copy data from userspace to the current page */ - copied = iov_iter_copy_from_user(page, i, offset, count); + /* + * Copy data from userspace to the current page + * + * Disable pagefault to avoid recursive lock since + * the pages are already locked + */ + pagefault_disable(); + copied = iov_iter_copy_from_user_atomic(page, i, offset, count); + pagefault_enable(); /* Flush processor's dcache for this page */ flush_dcache_page(page); iov_iter_advance(i, copied); write_bytes -= copied; + total_copied += copied; + /* Return to btrfs_file_aio_write to fault page */ if (unlikely(copied == 0)) { - count = min_t(size_t, PAGE_CACHE_SIZE - offset, - iov_iter_single_seg_count(i)); - goto again; + break; } if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { @@ -81,7 +85,7 @@ again: offset = 0; } } - return 0; + return total_copied; } /* @@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, unsigned long last_index; int will_write; int buffered = 0; + int copied = 0; + int dirty_pages = 0; will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || (file->f_flags & O_DIRECT)); @@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, WARN_ON(num_pages > nrptrs); memset(pages, 0, sizeof(struct page *) * nrptrs); - ret = btrfs_delalloc_reserve_space(inode, write_bytes); + /* + * Fault pages before locking them in prepare_pages + * to avoid recursive lock + */ + if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) { + ret = -EFAULT; + goto out; + } + + ret = btrfs_delalloc_reserve_space(inode, + num_pages << PAGE_CACHE_SHIFT); if (ret) goto out; @@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, pos, first_index, last_index, write_bytes); if (ret) { - btrfs_delalloc_release_space(inode, write_bytes); + btrfs_delalloc_release_space(inode, + num_pages << PAGE_CACHE_SHIFT); goto out; } - ret = btrfs_copy_from_user(pos, num_pages, + copied = btrfs_copy_from_user(pos, num_pages, write_bytes, pages, &i); - if (ret == 0) { + dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + + if (num_pages > dirty_pages) { + if (copied > 0) + atomic_inc( + &BTRFS_I(inode)->outstanding_extents); + btrfs_delalloc_release_space(inode, + (num_pages - dirty_pages) << + PAGE_CACHE_SHIFT); + } + + if (copied > 0) { dirty_and_release_pages(NULL, root, file, pages, - num_pages, pos, write_bytes); + dirty_pages, pos, copied); } btrfs_drop_pages(pages, num_pages); - if (ret) { - btrfs_delalloc_release_space(inode, write_bytes); - goto out; - } - if (will_write) { - filemap_fdatawrite_range(inode->i_mapping, pos, - pos + write_bytes - 1); - } else { - balance_dirty_pages_ratelimited_nr(inode->i_mapping, - num_pages); - if (num_pages < - (root->leafsize >> PAGE_CACHE_SHIFT) + 1) - btrfs_btree_balance_dirty(root, 1); - btrfs_throttle(root); + if (copied > 0) { + if (will_write) { + filemap_fdatawrite_range(inode->i_mapping, pos, + pos + copied - 1); + } else { + balance_dirty_pages_ratelimited_nr( + inode->i_mapping, + dirty_pages); + if (dirty_pages < + (root->leafsize >> PAGE_CACHE_SHIFT) + 1) + btrfs_btree_balance_dirty(root, 1); + btrfs_throttle(root); + } } - pos += write_bytes; - num_written += write_bytes; + pos += copied; + num_written += copied; cond_resched(); } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 22ee0dc2e6b..60d68426695 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -290,7 +290,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, (unsigned long long)BTRFS_I(inode)->generation, (unsigned long long)generation, (unsigned long long)block_group->key.objectid); - goto out; + goto free_cache; } if (!num_entries) @@ -524,6 +524,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, return 0; } + node = rb_first(&block_group->free_space_offset); + if (!node) { + iput(inode); + return 0; + } + last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; filemap_write_and_wait(inode->i_mapping); btrfs_wait_ordered_range(inode, inode->i_size & @@ -543,10 +549,6 @@ int btrfs_write_out_cache(struct btrfs_root *root, */ first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); - node = rb_first(&block_group->free_space_offset); - if (!node) - goto out_free; - /* * Lock all pages first so we can lock the extent safely. * diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8039390bd6a..72f31ecb5c9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -495,7 +495,7 @@ again: add_async_extent(async_cow, start, num_bytes, total_compressed, pages, nr_pages_ret); - if (start + num_bytes < end && start + num_bytes < actual_end) { + if (start + num_bytes < end) { start += num_bytes; pages = NULL; cond_resched(); @@ -5712,9 +5712,9 @@ static void btrfs_end_dio_bio(struct bio *bio, int err) if (err) { printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " - "disk_bytenr %lu len %u err no %d\n", - dip->inode->i_ino, bio->bi_rw, bio->bi_sector, - bio->bi_size, err); + "sector %#Lx len %u err no %d\n", + dip->inode->i_ino, bio->bi_rw, + (unsigned long long)bio->bi_sector, bio->bi_size, err); dip->errors = 1; /* @@ -5934,8 +5934,7 @@ free_ordered: */ if (write) { struct btrfs_ordered_extent *ordered; - ordered = btrfs_lookup_ordered_extent(inode, - dip->logical_offset); + ordered = btrfs_lookup_ordered_extent(inode, file_offset); if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) btrfs_free_reserved_extent(root, ordered->start, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f1c9bb4079e..f87552a1d7e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -947,23 +947,42 @@ out: static noinline int btrfs_ioctl_snap_create(struct file *file, void __user *arg, int subvol, - int async) + int v2) { struct btrfs_ioctl_vol_args *vol_args = NULL; - struct btrfs_ioctl_async_vol_args *async_vol_args = NULL; + struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL; char *name; u64 fd; - u64 transid = 0; int ret; - if (async) { - async_vol_args = memdup_user(arg, sizeof(*async_vol_args)); - if (IS_ERR(async_vol_args)) - return PTR_ERR(async_vol_args); + if (v2) { |