diff options
Diffstat (limited to 'fs')
100 files changed, 1620 insertions, 1212 deletions
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index d8d8e7ba6a1..eb1cc92cd67 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -110,6 +110,7 @@ struct autofs_sb_info { int sub_version; int min_proto; int max_proto; + int compat_daemon; unsigned long exp_timeout; unsigned int type; int reghost_enabled; diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 76741d8d778..85f1fcdb30e 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -385,6 +385,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, sbi->pipefd = pipefd; sbi->pipe = pipe; sbi->catatonic = 0; + sbi->compat_daemon = is_compat_task(); } out: mutex_unlock(&sbi->wq_mutex); diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 450f529a4ea..1feb68ecef9 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -124,6 +124,7 @@ start: /* Negative dentry - try next */ if (!simple_positive(q)) { spin_unlock(&p->d_lock); + lock_set_subclass(&q->d_lock.dep_map, 0, _RET_IP_); p = q; goto again; } @@ -186,6 +187,7 @@ again: /* Negative dentry - try next */ if (!simple_positive(ret)) { spin_unlock(&p->d_lock); + lock_set_subclass(&ret->d_lock.dep_map, 0, _RET_IP_); p = ret; goto again; } diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index e16980b00b8..06858d95512 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -19,6 +19,7 @@ #include <linux/parser.h> #include <linux/bitops.h> #include <linux/magic.h> +#include <linux/compat.h> #include "autofs_i.h" #include <linux/module.h> @@ -224,6 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) set_autofs_type_indirect(&sbi->type); sbi->min_proto = 0; sbi->max_proto = 0; + sbi->compat_daemon = is_compat_task(); mutex_init(&sbi->wq_mutex); mutex_init(&sbi->pipe_mutex); spin_lock_init(&sbi->fs_lock); diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index da8876d38a7..9c098db4334 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -91,7 +91,24 @@ static int autofs4_write(struct autofs_sb_info *sbi, return (bytes > 0); } - + +/* + * The autofs_v5 packet was misdesigned. + * + * The packets are identical on x86-32 and x86-64, but have different + * alignment. Which means that 'sizeof()' will give different results. + * Fix it up for the case of running 32-bit user mode on a 64-bit kernel. + */ +static noinline size_t autofs_v5_packet_size(struct autofs_sb_info *sbi) +{ + size_t pktsz = sizeof(struct autofs_v5_packet); +#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT) + if (sbi->compat_daemon > 0) + pktsz -= 4; +#endif + return pktsz; +} + static void autofs4_notify_daemon(struct autofs_sb_info *sbi, struct autofs_wait_queue *wq, int type) @@ -155,8 +172,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, { struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; - pktsz = sizeof(*packet); - + pktsz = autofs_v5_packet_size(sbi); packet->wait_queue_token = wq->wait_queue_token; packet->len = wq->name.len; memcpy(packet->name, wq->name.name, wq->name.len); @@ -505,13 +505,9 @@ EXPORT_SYMBOL(bio_clone); int bio_get_nr_vecs(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - int nr_pages; - - nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (nr_pages > queue_max_segments(q)) - nr_pages = queue_max_segments(q); - - return nr_pages; + return min_t(unsigned, + queue_max_segments(q), + queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1); } EXPORT_SYMBOL(bio_get_nr_vecs); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index b9a843226de..98f6bf10bbd 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -297,7 +297,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, struct btrfs_delayed_extent_op *extent_op = head->extent_op; struct rb_node *n = &head->node.rb_node; int sgn; - int ret; + int ret = 0; if (extent_op && extent_op->update_key) btrfs_disk_key_to_cpu(info_key, &extent_op->key); @@ -392,7 +392,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, struct btrfs_key *info_key, int *info_level, struct list_head *prefs) { - int ret; + int ret = 0; int slot; struct extent_buffer *leaf; struct btrfs_key key; @@ -892,6 +892,8 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, if (eb != eb_in) free_extent_buffer(eb); ret = inode_ref_info(parent, 0, fs_root, path, &found_key); + if (ret > 0) + ret = -ENOENT; if (ret) break; next_inum = found_key.offset; diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index ad0b3ba735b..d986824bb2b 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -644,7 +644,7 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( static int btrfsic_process_superblock(struct btrfsic_state *state, struct btrfs_fs_devices *fs_devices) { - int ret; + int ret = 0; struct btrfs_super_block *selected_super; struct list_head *dev_head = &fs_devices->devices; struct btrfs_device *device; @@ -1662,7 +1662,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, &state->block_hashtable); if (NULL != block) { - u64 bytenr; + u64 bytenr = 0; struct list_head *elem_ref_to; struct list_head *tmp_ref_to; @@ -2777,9 +2777,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) printk(KERN_INFO "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," " size=%lu, data=%p, bdev=%p)\n", - rw, bh->b_blocknr, - (unsigned long long)dev_bytenr, bh->b_size, - bh->b_data, bh->b_bdev); + rw, (unsigned long)bh->b_blocknr, + (unsigned long long)dev_bytenr, + (unsigned long)bh->b_size, bh->b_data, + bh->b_bdev); btrfsic_process_written_block(dev_state, dev_bytenr, bh->b_data, bh->b_size, NULL, NULL, bh, rw); @@ -2844,7 +2845,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) printk(KERN_INFO "submit_bio(rw=0x%x, bi_vcnt=%u," " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", - rw, bio->bi_vcnt, bio->bi_sector, + rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, (unsigned long long)dev_bytenr, bio->bi_bdev); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 14f1c5a0b2d..d02c27cd14c 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -588,6 +588,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, page_offset(bio->bi_io_vec->bv_page), PAGE_CACHE_SIZE); read_unlock(&em_tree->lock); + if (!em) + return -EIO; compressed_len = em->block_len; cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 27ebe61d3cc..80b6486fd5e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -886,7 +886,7 @@ struct btrfs_block_rsv { u64 reserved; struct btrfs_space_info *space_info; spinlock_t lock; - unsigned int full:1; + unsigned int full; }; /* diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7aa9cd36bf1..534266fe505 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -962,6 +962,13 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; + /* + * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing + * slab allocation from alloc_extent_state down the callchain where + * it'd hit a BUG_ON as those flags are not allowed. + */ + gfp_flags &= ~GFP_SLAB_BUG_MASK; + ret = try_release_extent_state(map, tree, page, gfp_flags); if (!ret) return 0; @@ -2253,6 +2260,12 @@ int open_ctree(struct super_block *sb, goto fail_sb_buffer; } + if (sectorsize < PAGE_SIZE) { + printk(KERN_WARNING "btrfs: Incompatible sector size " + "found on %s\n", sb->s_id); + goto fail_sb_buffer; + } + mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_sys_array(tree_root); mutex_unlock(&fs_info->chunk_mutex); @@ -2294,6 +2307,12 @@ int open_ctree(struct super_block *sb, btrfs_close_extra_devices(fs_devices); + if (!fs_devices->latest_bdev) { + printk(KERN_CRIT "btrfs: failed to read devices on %s\n", + sb->s_id); + goto fail_tree_roots; + } + retry_root_backup: blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 700879ed64c..37e0a800d34 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -34,23 +34,24 @@ #include "locking.h" #include "free-space-cache.h" -/* control flags for do_chunk_alloc's force field +/* + * control flags for do_chunk_alloc's force field * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk * if we really need one. * - * CHUNK_ALLOC_FORCE means it must try to allocate one - * * CHUNK_ALLOC_LIMITED means to only try and allocate one * if we have very few chunks already allocated. This is * used as part of the clustering code to help make sure * we have a good pool of storage to cluster in, without * filling the FS with empty chunks * + * CHUNK_ALLOC_FORCE means it must try to allocate one + * */ enum { CHUNK_ALLOC_NO_FORCE = 0, - CHUNK_ALLOC_FORCE = 1, - CHUNK_ALLOC_LIMITED = 2, + CHUNK_ALLOC_LIMITED = 1, + CHUNK_ALLOC_FORCE = 2, }; /* @@ -3311,7 +3312,8 @@ commit_trans: } data_sinfo->bytes_may_use += bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", - (u64)data_sinfo, bytes, 1); + (u64)(unsigned long)data_sinfo, + bytes, 1); spin_unlock(&data_sinfo->lock); return 0; @@ -3332,7 +3334,8 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) spin_lock(&data_sinfo->lock); data_sinfo->bytes_may_use -= bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", - (u64)data_sinfo, bytes, 0); + (u64)(unsigned long)data_sinfo, + bytes, 0); spin_unlock(&data_sinfo->lock); } @@ -3414,7 +3417,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, again: spin_lock(&space_info->lock); - if (space_info->force_alloc) + if (force < space_info->force_alloc) force = space_info->force_alloc; if (space_info->full) { spin_unlock(&space_info->lock); @@ -3610,12 +3613,15 @@ static int may_commit_transaction(struct btrfs_root *root, if (space_info != delayed_rsv->space_info) return -ENOSPC; + spin_lock(&space_info->lock); spin_lock(&delayed_rsv->lock); - if (delayed_rsv->size < bytes) { + if (space_info->bytes_pinned + delayed_rsv->size < bytes) { spin_unlock(&delayed_rsv->lock); + spin_unlock(&space_info->lock); return -ENOSPC; } spin_unlock(&delayed_rsv->lock); + spin_unlock(&space_info->lock); commit: trans = btrfs_join_transaction(root); @@ -3694,9 +3700,9 @@ again: if (used + orig_bytes <= space_info->total_bytes) { space_info->bytes_may_use += orig_bytes; trace_btrfs_space_reservation(root->fs_info, - "space_info", - (u64)space_info, - orig_bytes, 1); + "space_info", + (u64)(unsigned long)space_info, + orig_bytes, 1); ret = 0; } else { /* @@ -3765,9 +3771,9 @@ again: if (used + num_bytes < space_info->total_bytes + avail) { space_info->bytes_may_use += orig_bytes; trace_btrfs_space_reservation(root->fs_info, - "space_info", - (u64)space_info, - orig_bytes, 1); + "space_info", + (u64)(unsigned long)space_info, + orig_bytes, 1); ret = 0; } else { wait_ordered = true; @@ -3912,8 +3918,8 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, spin_lock(&space_info->lock); space_info->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - (u64)space_info, - num_bytes, 0); + (u64)(unsigned long)space_info, + num_bytes, 0); space_info->reservation_progress++; spin_unlock(&space_info->lock); } @@ -4104,7 +4110,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) num_bytes += div64_u64(data_used + meta_used, 50); if (num_bytes * 3 > meta_used) - num_bytes = div64_u64(meta_used, 3); + num_bytes = div64_u64(meta_used, 3) * 2; return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); } @@ -4131,14 +4137,14 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) block_rsv->reserved += num_bytes; sinfo->bytes_may_use += num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - (u64)sinfo, num_bytes, 1); + (u64)(unsigned long)sinfo, num_bytes, 1); } if (block_rsv->reserved >= block_rsv->size) { num_bytes = block_rsv->reserved - block_rsv->size; sinfo->bytes_may_use -= num_bytes; trace_btrfs_space_reservation(fs_info, "space_info", - (u64)sinfo, num_bytes, 0); + (u64)(unsigned long)sinfo, num_bytes, 0); sinfo->reservation_progress++; block_rsv->reserved = block_rsv->size; block_rsv->full = 1; @@ -4191,7 +4197,8 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, if (!trans->bytes_reserved) return; - trace_btrfs_space_reservation(root->fs_info, "transaction", (u64)trans, + trace_btrfs_space_reservation(root->fs_info, "transaction", + (u64)(unsigned long)trans, trans->bytes_reserved, 0); btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); trans->bytes_reserved = 0; @@ -4709,9 +4716,9 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, space_info->bytes_reserved += num_bytes; if (reserve == RESERVE_ALLOC) { trace_btrfs_space_reservation(cache->fs_info, - "space_info", - (u64)space_info, - num_bytes, 0); + "space_info", + (u64)(unsigned long)space_info, + num_bytes, 0); space_info->bytes_may_use -= num_bytes; } } @@ -5794,6 +5801,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, u64 search_end, struct btrfs_key *ins, u64 data) { + bool final_tried = false; int ret; u64 search_start = 0; @@ -5813,22 +5821,25 @@ again: search_start, search_end, hint_byte, ins, data); - if (ret == -ENOSPC && num_bytes > min_alloc_size) { - num_bytes = num_bytes >> 1; - num_bytes = num_bytes & ~(root->sectorsize - 1); - num_bytes = max(num_bytes, min_alloc_size); - do_chunk_alloc(trans, root->fs_info->extent_root, - num_bytes, data, CHUNK_ALLOC_FORCE); - goto again; - } - if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { - struct btrfs_space_info *sinfo; - - sinfo = __find_space_info(root->fs_info, data); - printk(KERN_ERR "btrfs allocation failed flags %llu, " - "wanted %llu\n", (unsigned long long)data, - (unsigned long long)num_bytes); - dump_space_info(sinfo, num_bytes, 1); + if (ret == -ENOSPC) { + if (!final_tried) { + num_bytes = num_bytes >> 1; + num_bytes = num_bytes & ~(root->sectorsize - 1); + num_bytes = max(num_bytes, min_alloc_size); + do_chunk_alloc(trans, root->fs_info->extent_root, + num_bytes, data, CHUNK_ALLOC_FORCE); + if (num_bytes == min_alloc_size) + final_tried = true; + goto again; + } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) { + struct btrfs_space_info *sinfo; + + sinfo = __find_space_info(root->fs_info, data); + printk(KERN_ERR "btrfs allocation failed flags %llu, " + "wanted %llu\n", (unsigned long long)data, + (unsigned long long)num_bytes); + dump_space_info(sinfo, num_bytes, 1); + } } trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); @@ -7881,9 +7892,16 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) u64 start; u64 end; u64 trimmed = 0; + u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); int ret = 0; - cache = btrfs_lookup_block_group(fs_info, range->start); + /* + * try to trim all FS space, our block group may start from non-zero. + */ + if (range->len == total_bytes) + cache = btrfs_lookup_first_block_group(fs_info, range->start); + else + cache = btrfs_lookup_block_group(fs_info, range->start); while (cache) { if (cache->key.objectid >= (range->start + range->len)) { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9d09a4f8187..a55fbe6252d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -513,6 +513,15 @@ hit_next: WARN_ON(state->end < start); last_end = state->end; + if (state->end < end && !need_resched()) + next_node = rb_next(&state->rb_node); + else + next_node = NULL; + + /* the state doesn't have the wanted bits, go ahead */ + if (!(state->state & bits)) + goto next; + /* * | ---- desired range ---- | * | state | or @@ -565,20 +574,15 @@ hit_next: goto out; } - if (state->end < end && prealloc && !need_resched()) - next_node = rb_next(&state->rb_node); - else - next_node = NULL; - set |= clear_state_bit(tree, state, &bits, wake); +next: if (last_end == (u64)-1) goto out; start = last_end + 1; if (start <= end && next_node) { state = rb_entry(next_node, struct extent_state, rb_node); - if (state->start == start) - goto hit_next; + goto hit_next; } goto search_again; @@ -961,8 +965,6 @@ hit_next: set_state_bits(tree, state, &bits); clear_state_bit(tree, state, &clear_bits, 0); - - merge_state(tree, state); if (last_end == (u64)-1) goto out; @@ -1007,7 +1009,6 @@ hit_next: if (state->end <= end) { set_state_bits(tree, state, &bits); clear_state_bit(tree, state, &clear_bits, 0); - merge_state(tree, state); if (last_end == (u64)-1) goto out; start = last_end + 1; @@ -1068,8 +1069,6 @@ hit_next: set_state_bits(tree, prealloc, &bits); clear_state_bit(tree, prealloc, &clear_bits, 0); - - merge_state(tree, prealloc); prealloc = NULL; goto out; } @@ -2154,13 +2153,46 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, failrec->this_mirror, num_copies, failrec->in_validation); - tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror, - failrec->bio_flags, 0); - return 0; + ret = tree->ops->submit_bio_hook(inode, read_mode, bio, + failrec->this_mirror, + failrec->bio_flags, 0); + return ret; } /* lots and lots of room for performance fixes in the end_bio funcs */ +int end_extent_writepage(struct page *page, int err, u64 start, u64 end) +{ + int uptodate = (err == 0); + struct extent_io_tree *tree; + int ret; + + tree = &BTRFS_I(page->mapping->host)->io_tree; + + if (tree->ops && tree->ops->writepage_end_io_hook) { + ret = tree->ops->writepage_end_io_hook(page, start, + end, NULL, uptodate); + if (ret) + uptodate = 0; + } + + if (!uptodate && tree->ops && + tree->ops->writepage_io_failed_hook) { + ret = tree->ops->writepage_io_failed_hook(NULL, page, + start, end, NULL); + /* Writeback already completed */ + if (ret == 0) + return 1; + } + + if (!uptodate) { + clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); + ClearPageUptodate(page); + SetPageError(page); + } + return 0; +} + /* * after a writepage IO is done, we need to: * clear the uptodate bits on error @@ -2172,13 +2204,11 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, */ static void end_bio_extent_writepage(struct bio *bio, int err) { - int uptodate = err == 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct extent_io_tree *tree; u64 start; u64 end; int whole_page; - int ret; do { struct page *page = bvec->bv_page; @@ -2195,28 +2225,9 @@ static void end_bio_extent_writepage(struct bio *bio, int err) if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - if (tree->ops && tree->ops->writepage_end_io_hook) { - ret = tree->ops->writepage_end_io_hook(page, start, - end, NULL, uptodate); - if (ret) - uptodate = 0; - } - - if (!uptodate && tree->ops && - tree->ops->writepage_io_failed_hook) { - ret = tree->ops->writepage_io_failed_hook(bio, page, - start, end, NULL); - if (ret == 0) { - uptodate = (err == 0); - continue; - } - } - if (!uptodate) { - clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); - ClearPageUptodate(page); - SetPageError(page); - } + if (end_extent_writepage(page, err, start, end)) + continue; if (whole_page) end_page_writeback(page); @@ -2779,9 +2790,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_start = delalloc_end + 1; continue; } - tree->ops->fill_delalloc(inode, page, delalloc_start, - delalloc_end, &page_started, - &nr_written); + ret = tree->ops->fill_delalloc(inode, page, + delalloc_start, + delalloc_end, + &page_started, + &nr_written); + BUG_ON(ret); /* * delalloc_end is already one less than the total * length, so we don't subtract one from @@ -2818,8 +2832,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (tree->ops && tree->ops->writepage_start_hook) { ret = tree->ops->writepage_start_hook(page, start, page_end); - if (ret == -EAGAIN) { - redirty_page_for_writepage(wbc, page); + if (ret) { + /* Fixup worker will requeue */ + if (ret == -EBUSY) + wbc->pages_skipped++; + else + redirty_page_for_writepage(wbc, page); update_nr_written(page, wbc, nr_written); unlock_page(page); ret = 0; @@ -3289,7 +3307,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, len = end - start + 1; write_lock(&map->lock); em = lookup_extent_mapping(map, start, len); - if (IS_ERR_OR_NULL(em)) { + if (!em) { write_unlock(&map->lock); break; } @@ -3853,10 +3871,9 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); - if (eb_straddles_pages(eb)) { - clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - cached_state, GFP_NOFS); - } + clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + cached_state, GFP_NOFS); + for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (page) @@ -3909,6 +3926,8 @@ int extent_range_uptodate(struct extent_io_tree *tree, while (start <= end) { index = start >> PAGE_CACHE_SHIFT; page = find_get_page(tree->mapping, index); + if (!page) + return 1; uptodate = PageUptodate(page); page_cache_release(page); if (!uptodate) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index bc6a042cb6f..cecc3518c12 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -319,4 +319,5 @@ struct btrfs_mapping_tree; int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, u64 length, u64 logical, struct page *page, int mirror_num); +int end_extent_writepage(struct page *page, int err, u64 start, u64 end); #endif diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 33a7890b1f4..1195f09761f 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h @@ -26,8 +26,8 @@ struct extent_map { unsigned long flags; struct block_device *bdev; atomic_t refs; - unsigned int in_tree:1; - unsigned int compress_type:4; + unsigned int in_tree; + unsigned int compress_type; }; struct extent_map_tree { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 859ba2dd889..e8d06b6b919 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1605,6 +1605,14 @@ static long btrfs_fallocate(struct file *file, int mode, return -EOPNOTSUPP; /* + * Make sure we have enough space before we do the + * allocation. + */ + ret = btrfs_check_data_free_space(inode, len); + if (ret) + return ret; + + /* * wait for ordered IO before we have any locks. We'll loop again * below with the locks held. */ @@ -1667,27 +1675,12 @@ static long btrfs_fallocate(struct file *file, int mode, if (em->block_start == EXTENT_MAP_HOLE || (cur_offset >= inode->i_size && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { - - /* - * Make sure we have enough space before we do the - * allocation. - */ - ret = btrfs_check_data_free_space(inode, last_byte - - cur_offset); - if (ret) { - free_extent_map(em); - break; - } - ret = btrfs_prealloc_file_range(inode, mode, cur_offset, last_byte - cur_offset, 1 << inode->i_blkbits, offset + len, &alloc_hint); - /* Let go of our reservation. */ - btrfs_free_reserved_data_space(inode, last_byte - - cur_offset); if (ret < 0) { free_extent_map(em); break; @@ -1715,6 +1708,8 @@ static long btrfs_fallocate(struct file *file, int mode, &cached_state, GFP_NOFS); out: mutex_unlock(&inode->i_mutex); + /* Let go of our reservation. */ + btrfs_free_reserved_data_space(inode, len); return ret; } @@ -1761,7 +1756,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) start - root->sectorsize, root->sectorsize, 0); if (IS_ERR(em)) { - ret = -ENXIO; + ret = PTR_ERR(em); goto out; } last_end = em->start + em->len; @@ -1773,7 +1768,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) while (1) { em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0); if (IS_ERR(em)) { - ret = -ENXIO; + ret = PTR_ERR(em); break; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index d20ff87ca60..710ea380c7e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -777,6 +777,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, spin_lock(&block_group->lock); if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { spin_unlock(&block_group->lock); + btrfs_free_path(path); goto out; } spin_unlock(&block_group->lock); @@ -2242,7 +2243,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, if (entry->bitmap) { ret = btrfs_alloc_from_bitmap(block_group, cluster, entry, bytes, - min_start); + cluster->window_start); if (ret == 0) { node = rb_next(&entry->offset_index); if (!node) @@ -2251,6 +2252,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, offset_index); continue; } + cluster->window_start += bytes; } else { ret = entry->offset; @@ -2475,7 +2477,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, } list_for_each_entry(entry, bitmaps, list) { - if (entry->bytes < min_bytes) + if (entry->bytes < bytes) continue; ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, bytes, cont1_bytes, min_bytes); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 213ffa86ce1..ee15d88b33d 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -438,7 +438,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root, trans->bytes_reserved); if (ret) goto out; - trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans, + trace_btrfs_space_reservation(root->fs_info, "ino_cache", + (u64)(unsigned long)trans, trans->bytes_reserved, 1); again: inode = lookup_free_ino_inode(root, path); @@ -500,7 +501,8 @@ again: out_put: iput(inode); out_release: - trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans, + trace_btrfs_space_reservation(root->fs_info, "ino_cache", + (u64)(unsigned long)trans, trans->bytes_reserved, 0); btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved); out: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0da19a0ea00..892b34785cc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1555,6 +1555,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) struct inode *inode; u64 page_start; u64 page_end; + int ret; fixup = container_of(work, struct btrfs_writepage_fixup, work); page = fixup->page; @@ -1582,12 +1583,21 @@ again: page_end, &cached_state, GFP_NOFS); unlock_page(page); btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); goto again; } - BUG(); + ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); + if (ret) { + mapping_set_error(page->mapping, ret); + end_extent_writepage(page, ret, page_start, page_end); + ClearPageChecked(page); + goto out; + } + btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); ClearPageChecked(page); + set_page_dirty(page); out: unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, &cached_state, GFP_NOFS); @@ -1630,7 +1640,7 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) fixup->work.func = btrfs_writepage_fixup_worker; fixup->page = page; btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); - return -EAGAIN; + return -EBUSY; } static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, @@ -4575,7 +4585,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_insert_dir_item(trans, root, name, name_len, parent_inode, &key, btrfs_inode_type(inode), index); - BUG_ON(ret); + if (ret) + goto fail_dir_item; btrfs_i_size_write(parent_inode, parent_inode->i_size + name_len * 2); @@ -4583,6 +4594,23 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, ret = btrfs_update_inode(trans, root, parent_inode); } return ret; + +fail_dir_item: + if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) { + u64 local_index; + int err; + err = btrfs_del_root_ref(trans, root->fs_info->tree_root, + key.objectid, root->root_key.objectid, + parent_ino, &local_index, name, name_len); + + } else if (add_backref) { + u64 local_index; + int err; + + err = btrfs_del_inode_ref(trans, root, name, name_len, + ino, parent_ino, &local_index); + } + return ret; } static int btrfs_add_nondir(struct btrfs_trans_handle *trans, @@ -6401,18 +6429,23 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) unsigned long zero_start; loff_t size; int ret; + int reserved = 0; u64 page_start; u64 page_end; ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); - if (!ret) + if (!ret) { ret = btrfs_update_time(vma->vm_file); + reserved = 1; + } if (ret) { if (ret == -ENOMEM) ret = VM_FAULT_OOM; else /* -ENOSPC, -EIO, etc */ ret = VM_FAULT_SIGBUS; - goto out; + if (reserved) + goto out; + goto out_noreserve; } ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ @@ -6495,6 +6528,7 @@ out_unlock: unlock_page(page); out: btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); +out_noreserve: return ret; } @@ -6690,8 +6724,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, int err; u64 index = 0; - inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, - new_dirid, S_IFDIR | 0700, &index); + inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, + new_dirid, new_dirid, + S_IFDIR | (~current_umask() & S_IRWXUGO), + &index); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = &btrfs_dir_inode_operations; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ab620014bcc..d8b54715c2d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -861,6 +861,7 @@ static int cluster_pages_for_defrag(struct inode *inode, int i_done; struct btrfs_ordered_extent *ordered; struct extent_state *cached_state = NULL; + struct extent_io_tree *tree; gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); if (isize == 0) @@ -871,18 +872,34 @@ static int cluster_pages_for_defrag(struct inode *inode, num_pages << PAGE_CACHE_SHIFT); if (ret) return ret; -again: - ret = 0; i_done = 0; + tree = &BTRFS_I(inode)->io_tree; /* step one, lock all the pages */ for (i = 0; i < num_pages; i++) { struct page *page; +again: page = find_or_create_page(inode->i_mapping, - start_index + i, mask); + start_index + i, mask); if (!page) break; + page_start = page_offset(page); + page_end = page_start + PAGE_CACHE_SIZE - 1; + while (1) { + lock_extent(tree, page_start, page_end, GFP_NOFS); + ordered = btrfs_lookup_ordered_extent(inode, + page_start); + unlock_extent(tree, page_start, page_end, GFP_NOFS); + if (!ordered) + break; + + unlock_page(page); + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + lock_page(page); + } + if (!PageUptodate(page)) { btrfs_readpage(NULL, page); lock_page(page); @@ -893,15 +910,22 @@ again: break; } } + isize = i_size_read(inode); file_end = (isize - 1) >> PAGE_CACHE_SHIFT; - if (!isize || page->index > file_end || - page->mapping != inode->i_mapping) { + if (!isize || page->index > file_end) { /* whoops, we blew past eof, skip this page */ unlock_page(page); page_cache_release(page); break; } + + if (page->mapping != inode->i_mapping) { + unlock_page(page); + page_cache_release(page); + goto again; + } + pages[i] = page; i_done++; } @@ -924,25 +948,6 @@ again: lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, 0, &cached_state, GFP_NOFS); - ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1); - if (ordered && - ordered->file_offset + ordered->len > page_start && - ordered->file_offset < page_end) { - btrfs_put_ordered_extent(ordered); - unlock_extent_cached(&BTRFS_I(inode)->io_tree, - page_start, page_end - 1, - &cached_state, GFP_NOFS); - for (i = 0; i < i_done; i++) { - unlock_page(pages[i]); - page_cache_release(pages[i]); - } - btrfs_wait_ordered_range(inode, page_start, - page_end - page_start); - goto again; - } - if (ordered) - btrfs_put_ordered_extent(ordered); - clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, @@ -1065,7 +1070,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, i = range->start >> PAGE_CACHE_SHIFT; } if (!max_to_defrag) - max_to_defrag = last_index; + max_to_defrag = last_index + 1; /* * make writeback starts from i, so the defrag range can be @@ -1327,6 +1332,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, goto out; } + if (name[0] == '.' && + (namelen == 1 || (name[1] == '.' && namelen == 2))) { + ret = -EEXIST; + goto out; + } + if (subvol) { ret = btrfs_mksubvol(&file->f_path, name, namelen, NULL, transid, readonly); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 9770cc5bfb7..abc0fbffa51 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1367,7 +1367,8 @@ out: } static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, - u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length) + u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length, + u64 dev_offset) { struct btrfs_mapping_tree *map_tree = &sdev->dev->dev_root->fs_info->mapping_tree; @@ -1391,7 +1392,8 @@ static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, goto out; for (i = 0; i < map->num_stripes; ++i) { - if (map->stripes[i].dev == sdev->dev) { + if (map->stripes[i].dev == sdev->dev && + map->stripes[i].physical == dev_offset) { ret = scrub_stripe(sdev, map, i, chunk_offset, length); if (ret) goto out; @@ -1487,7 +1489,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) break; } ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, - chunk_offset, length); + chunk_offset, length, found_key.offset); btrfs_put_block_group(cache); if (ret) break; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 287a6728b1a..04b77e3ceb7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -327,7 +327,8 @@ again: if (num_bytes) { trace_btrfs_space_reservation(root->fs_info, "transaction", - (u64)h, num_bytes, 1); + (u64)(unsigned long)h, + num_bytes, 1); h->block_rsv = &root->fs_info->trans_block_rsv; h->bytes_reserved = num_bytes; } @@ -915,7 +916,11 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, dentry->d_name.name, dentry->d_name.len, parent_inode, &key, BTRFS_FT_DIR, index); - BUG_ON(ret); + if (ret) { + pending->error = -EEXIST; + dput(parent); + goto fail; + } btrfs_i_size_write(parent_inode, parent_inode->i_size + dentry->d_name.len * 2); @@ -993,12 +998,9 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, { struct btrfs_pending_snapshot *pending; struct list_head *head = &trans->transaction->pending_snapshots; - int ret; - list_for_each_entry(pending, head, list) { - ret = create_pending_snapshot(trans, fs_info, pending); - BUG_ON(ret); - } + list_for_each_entry(pending, head, list) + create_pending_snapshot(trans, fs_info, pending); return 0; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index cb877e0886a..966cc74f5d6 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1957,7 +1957,8 @@ static int wait_log_commit(struct btrfs_trans_handle *trans, finish_wait(&root->log_commit_wait[index], &wait); mutex_lock(&root->log_mutex); - } while (root->log_transid < transid + 2 && + } while (root->fs_info->last_trans_log_full_commit != + trans->transid && root->log_transid < transid + 2 && atomic_read(&root->log_commit[index])); return 0; } @@ -1966,7 +1967,8 @@ static int wait_for_writer(struct btrfs_trans_handle *trans, struct btrfs_root *root) { DEFINE_WAIT(wait); - while (atomic_read(&root->log_writers)) { + while (root->fs_info->last_trans_log_full_commit != + trans->transid && atomic_read(&root->log_writers)) { prepare_to_wait(&root->log_writer_wait, &wait, TASK_UNINTERRUPTIBLE); mutex_unlock(&root->log_mutex); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0b4e2af7954..ef41f285a47 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -459,12 +459,23 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device, *next; + struct block_device *latest_bdev = NULL; + u64 latest_devid = 0; + u64 latest_transid = 0; + mutex_lock(&uuid_mutex); again: /* This is the initialized path, it is safe to release the devices. */ list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { - if (device->in_fs_metadata) + if (device->in_fs_metadata) { + if (!latest_transid || + device->generation > latest_transid) { + latest_devid = device->devid; + latest_transid = device->generation; + latest_bdev = device->bdev; + } continue; + } if (device->bdev) { blkdev_put(device->bdev, device->mode); @@ -487,6 +498,10 @@ again: goto again; } + fs_devices->latest_bdev = latest_bdev; + fs_devices->latest_devid = latest_devid; + fs_devices->latest_trans = latest_transid; + mutex_unlock(&uuid_mutex); return 0; } @@ -1953,7 +1968,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, em = lookup_extent_mapping(em_tree, chunk_offset, 1); read_unlock(&em_tree->lock); - BUG_ON(em->start > chunk_offset || + BUG_ON(!em || em->start > chunk_offset || em->start + em->len < chunk_offset); map = (struct map_lookup *)em->bdev; @@ -4356,6 +4371,20 @@ int btrfs_read_sys_array(struct btrfs_root *root) return -ENOMEM; btrfs_set_buffer_uptodate(sb); btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); + /* + * The sb extent buffer is artifical and just used to read the system array. + * btrfs_set_buffer_uptodate() call does not properly mark all it's + * pages up-to-date when the page is larger: extent does not cover the + * whole page and consequently check_page_uptodate does not find all + * the page's extents up-to-date (the hole beyond sb), + * write_extent_buffer then triggers a WARN_ON. + * + * Regular short extents go through mark_extent_buffer_dirty/writeback cycle, + * but sb spans only this function. Add an explicit SetPageUptodate call + * to silence the warning eg. on PowerPC 64. + */ + if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE) + SetPageUptodate(sb->first_page); write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); array_size = btrfs_super_sys_array_size(super_copy); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b60fc8bfb3e..620daad201d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -641,10 +641,10 @@ static int __cap_is_valid(struct ceph_cap *cap) unsigned long ttl; u32 gen; - spin_lock(&cap->session->s_cap_lock); + spin_lock(&cap->session->s_gen_ttl_lock); gen = cap->session->s_cap_gen; ttl = cap->session->s_cap_ttl; - spin_unlock(&cap->session->s_cap_lock); + spin_unlock(&cap->session->s_gen_ttl_lock); if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) { dout("__cap_is_valid %p cap %p issued %s " diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 618246bc219..3e8094be460 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -975,10 +975,10 @@ static int dentry_lease_is_valid(struct dentry *dentry) di = ceph_dentry(dentry); if (di->lease_session) { s = di->lease_session; - spin_lock(&s->s_cap_lock); + spin_lock(&s->s_gen_ttl_lock); gen = s->s_cap_gen; ttl = s->s_cap_ttl; - spin_unlock(&s->s_cap_lock); + spin_unlock(&s->s_gen_ttl_lock); if (di->lease_gen == gen && time_before(jiffies, dentry->d_time) && diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 23ab6a3f182..866e8d7ca37 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -262,6 +262,7 @@ static int parse_reply_info(struct ceph_msg *msg, /* trace */ ceph_decode_32_safe(&p, end, len, bad); if (len > 0) { + ceph_decode_need(&p, end, len, bad); err = parse_reply_info_trace(&p, p+len, info, features); if (err < 0) goto out_bad; @@ -270,6 +271,7 @@ static int parse_reply_info(struct ceph_msg *msg, /* extra */ ceph_decode_32_safe(&p, end, len, bad); if (len > 0) { + ceph_decode_need(&p, end, len, bad); err = parse_reply_info_extra(&p, p+len, info, features); if (err < 0) goto out_bad; @@ -398,9 +400,11 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; s->s_con.peer_name.num = cpu_to_le64(mds); - spin_lock_init(&s->s_cap_lock); + spin_lock_init(&s->s_gen_ttl_lock); s->s_cap_gen = 0; s->s_cap_ttl = 0; + + spin_lock_init(&s->s_cap_lock); s->s_renew_requested = 0; s->s_renew_seq = 0; INIT_LIST_HEAD(&s->s_caps); @@ -2326,10 +2330,10 @@ static void handle_session(struct ceph_mds_session *session, case CEPH_SESSION_STALE: pr_info("mds%d caps went stale, renewing\n", session->s_mds); - spin_lock(&session->s_cap_lock); + spin_lock(&session->s_gen_ttl_lock); session->s_cap_gen++; session->s_cap_ttl = 0; - spin_unlock(&session->s_cap_lock); + spin_unlock(&session->s_gen_ttl_lock); send_renew_caps(mdsc, session); break; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index a50ca0e3947..8c7c04ebb59 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -117,10 +117,13 @@ struct ceph_mds_session { void *s_authorizer_buf, *s_authorizer_reply_buf; size_t s_authorizer_buf_len, s_authorizer_reply_buf_len; - /* protected by s_cap_lock */ - spinlock_t s_cap_lock; + /* protected by s_gen_ttl_lock */ + spinlock_t s_gen_ttl_lock; u32 s_cap_gen; /* inc each time we get mds stale msg */ unsigned long s_cap_ttl; /* when session caps expire */ + + /* protected by s_cap_lock */ + spinlock_t s_cap_lock; struct list_head s_caps; /* all caps issued by this session */ int s_nr_caps, s_trim_caps; int s_num_cap_releases; diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 857214ae8c0..a76f697303d 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -111,8 +111,10 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, } static struct ceph_vxattr_cb ceph_file_vxattrs[] = { + { true, "ceph.file.layout", ceph_vxattrcb_layout}, + /* The following extended attribute name is deprecated */ { true, "ceph.layout", ceph_vxattrcb_layout}, - { NULL, NULL } + { true, NULL, NULL } }; static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode) diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index f66cc162515..2b243af70aa 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -139,8 +139,7 @@ config CIFS_DFS_UPCALL points. If unsure, say N. config CIFS_FSCACHE - bool "Provide CIFS client caching support (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "Provide CIFS client caching support" depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y help Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data @@ -148,8 +147,8 @@ config CIFS_FSCACHE manager. If unsure, say N. config CIFS_ACL - bool "Provide CIFS ACL support (EXPERIMENTAL)" - depends on EXPERIMENTAL && CIFS_XATTR && KEYS + bool "Provide CIFS ACL support" + depends on CIFS_XATTR && KEYS help Allows to fetch CIFS/NTFS ACL from the server. The DACL blob is handed over to the application/caller. diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 84e8c072470..24b3dfc0528 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -676,14 +676,23 @@ static ssize_t cifs_multiuser_mount_proc_write(struct file *file, { char c; int rc; + static bool warned; rc = get_user(c, buffer); if (rc) return rc; if (c == '0' || c == 'n' || c == 'N') multiuser_mount = 0; - else if (c == '1' || c == 'y' || c == 'Y') + else if (c == '1' || c == 'y' || c == 'Y') { multiuser_mount = 1; + if (!warned) { + warned = true; + printk(KERN_WARNING "CIFS VFS: The legacy multiuser " + "mount code is scheduled to be deprecated in " + "3.5. Please switch to using the multiuser " + "mount option."); + } + } return count; } diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 2272fd5fe5b..e622863b292 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -113,9 +113,11 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo) MAX_MECH_STR_LEN + UID_KEY_LEN + (sizeof(uid_t) * 2) + CREDUID_KEY_LEN + (sizeof(uid_t) * 2) + - USER_KEY_LEN + strlen(sesInfo->user_name) + PID_KEY_LEN + (sizeof(pid_t) * 2) + 1; + if (sesInfo->user_name) + desc_len += USER_KEY_LEN + strlen(sesInfo->user_name); + spnego_key = ERR_PTR(-ENOMEM); description = kzalloc(desc_len, GFP_KERNEL); if (description == NULL) @@ -152,8 +154,10 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo) dp = description + strlen(description); sprintf(dp, ";creduid=0x%x", sesInfo->cred_uid); - dp = description + strlen(description); - sprintf(dp, ";user=%s", sesInfo->user_name); + if (sesInfo->user_name) { + dp = description + strlen(description); + sprintf(dp, ";user=%s", sesInfo->user_name); + } dp = description + strlen(description); sprintf(dp, ";pid=0x%x", current->pid); diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 1b2e180b018..fbb9da95184 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -27,17 +27,17 @@ #include "cifs_debug.h" /* - * cifs_ucs2_bytes - how long will a string be after conversion? - * @ucs - pointer to input string + * cifs_utf16_bytes - how long will a string be after conversion? + * @utf16 - pointer to input string * @maxbytes - don't go past this many bytes of input string * @codepage - destination codepage * - * Walk a ucs2le string and return the number of bytes that the string will + * Walk a utf16le string and return the number of bytes that the string will * be after being converted to the given charset, not including any null * termination required. Don't walk past maxbytes in the source buffer. */ int -cifs_ucs2_bytes(const __le16 *from, int maxbytes, +cifs_utf16_bytes(const __le16 *from, int maxbytes, const struct nls_table *codepage) { int i; @@ -122,7 +122,7 @@ cp_convert: } /* - * cifs_from_ucs2 - convert utf16le string to local charset + * cifs_from_utf16 - convert utf16le string to local charset * @to - destination buffer * @from - source buffer * @tolen - destination buffer size (in bytes) @@ -130,7 +130,7 @@ cp_convert: * @codepage - codepage to which characters should be converted * @mapchar - should characters be remapped according to the mapchars option? * - * Convert a little-endian ucs2le string (as sent by the server) to a string + * Convert a little-endian utf16le string (as sent by the server) to a string * in the provided codepage. The tolen and fromlen parameters are to ensure * that the code doesn't walk off of the end of the buffer (which is always * a danger if the alignment of the source buffer is off). The destination @@ -139,12 +139,12 @@ cp_convert: * null terminator). * * Note that some windows versions actually send multiword UTF-16 characters - * instead of straight UCS-2. The linux nls routines however aren't able to + * instead of straight UTF16-2. The linux nls routines however aren't able to * deal with those characters properly. In the event that we get some of * those characters, they won't be translated properly. */ int -cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, +cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, const struct nls_table *codepage, bool mapchar) { int i, charlen, safelen; @@ -190,13 +190,13 @@ cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, } /* - * NAME: cifs_strtoUCS() + * NAME: cifs_strtoUTF16() * * FUNCTION: Convert character string to unicode string * */ int -cifs_strtoUCS(__le16 *to, const char *from, int len, +cifs_strtoUTF16(__le16 *to, const char *from, int len, const struct nls_table *codepage) { int charlen; @@ -206,7 +206,7 @@ cifs_strtoUCS(__le16 *to, const char *from, int len, for (i = 0; len && *from; i++, from += charlen, len -= charlen) { charlen = codepage->char2uni(from, len, &wchar_to); if (charlen < 1) { - cERROR(1, "strtoUCS: char2uni of 0x%x returned %d", + cERROR(1, "strtoUTF16: char2uni of 0x%x returned %d", *from, charlen); /* A question mark */ wchar_to = 0x003f; @@ -220,7 +220,8 @@ cifs_strtoUCS(__le16 *to, const char *from, int len, } /* - * cifs_strndup_from_ucs - copy a string from wire format to the local codepage + * cifs_strndup_from_utf16 - copy a string from wire format to the local + * codepage * @src - source string * @maxlen - don't walk past this many bytes in the source string * @is_unicode - is this a unicode string? @@ -231,19 +232,19 @@ cifs_strtoUCS(__le16 *to, const char *from, int len, * error. */ char * -cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode, - const struct nls_table *codepage) +cifs_strndup_from_utf16(const char *src, const int maxlen, + const bool is_unicode, const struct nls_table *codepage) { int len; char *dst; if (is_unicode) { - len = cifs_ucs2_bytes((__le16 *) src, maxlen, codepage); + len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage); len += nls_nullsize(codepage); dst = kmalloc(len, GFP_KERNEL); if (!dst) return NULL; - cifs_from_ucs2(dst, (__le16 *) src, len, maxlen, codepage, + cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage, false); } else { len = strnlen(src, maxlen); @@ -264,7 +265,7 @@ cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode, * names are little endian 16 bit Unicode on the wire */ int -cifsConvertToUCS(__le16 *target, const char *source, int srclen, +cifsConvertToUTF16(__le16 *target, const char *source, int srclen, const struct nls_table *cp, int mapChars) { int i, j, charlen; @@ -273,7 +274,7 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen, wchar_t tmp; if (!mapChars) - return cifs_strtoUCS(target, source, PATH_MAX, cp); + return cifs_strtoUTF16(target, source, PATH_MAX, cp); for (i = 0, j = 0; i < srclen; j++) { src_char = source[i]; @@ -281,7 +282,7 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen, switch (src_char) { case 0: put_unaligned(0, &target[j]); - goto ctoUCS_out; + goto ctoUTF16_out; case ':': dst_char = cpu_to_le16(UNI_COLON); break; @@ -326,7 +327,7 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen, put_unaligned(dst_char, &target[j]); } -ctoUCS_out: +ctoUTF16_out: return i; } diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 6d02fd56056..a513a546700 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -74,16 +74,16 @@ extern const struct UniCaseRange CifsUniLowerRange[]; #endif /* UNIUPR_NOLOWER */ #ifdef __KERNEL__ -int cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen, - const struct nls_table *codepage, bool mapchar); -int cifs_ucs2_bytes(const __le16 *from, int maxbytes, - const struct nls_table *codepage); -int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *); -char *cifs_strndup_from_ucs(const char *src, const int maxlen, - const bool is_unicode, - const struct nls_table *codepage); -extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen, - const struct nls_table *cp, int mapChars); +int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, + const struct nls_table *codepage, bool mapchar); +int cifs_utf16_bytes(const __le16 *from, int maxbytes, + const struct nls_table *codepage); +int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *); +char *cifs_strndup_from_utf16(const char *src, const int maxlen, + const bool is_unicode, + const struct nls_table *codepage); +extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen, + const struct nls_table *cp, int mapChars); #endif diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 72ddf23ef6f..c1b25448738 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -909,6 +909,8 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl, umode_t group_mask = S_IRWXG; umode_t other_mask = S_IRWXU | S_IRWXG | S_IRWXO; + if (num_aces > ULONG_MAX / sizeof(struct cifs_ace *)) + return; ppace = kmalloc(num_aces * sizeof(struct cifs_ace *), GFP_KERNEL); if (!ppace) { diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 5d9b9acc5fc..63c460e503b 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -327,7 +327,7 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME); attrptr->length = cpu_to_le16(2 * dlen); blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); - cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp); + cifs_strtoUTF16((__le16 *)blobptr, ses->domainName, dlen, nls_cp); return 0; } @@ -376,7 +376,7 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp) kmalloc(attrsize + 1, GFP_KERNEL); if (!ses->domainName) return -ENOMEM; - cifs_from_ucs2(ses->domainName, + cifs_from_utf16(ses->domainName, (__le16 *)blobptr, attrsize, attrsize, nls_cp, false); break; @@ -420,15 +420,20 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, } /* convert ses->user_name to unicode and uppercase */ - len = strlen(ses->user_name); + len = ses->user_name ? strlen(ses->user_name) : 0; user = kmalloc(2 + (len * 2), GFP_KERNEL); if (user == NULL) { cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); rc = -ENOMEM; return rc; } - len = cifs_strtoUCS((__le16 *)user, ses->user_name, len, nls_cp); - UniStrupr(user); + + if (len) { + len = cifs_strtoUTF16((__le16 *)user, ses->user_name, len, nls_cp); + UniStrupr(user); + } else { + memset(user, '\0', 2); + } rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, (char *)user, 2 * len); @@ -448,8 +453,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, rc = -ENOMEM; return rc; } - len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, - nls_cp); + len = cifs_strtoUTF16((__le16 *)domain, ses->domainName, len, + nls_cp); rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, (char *)domain, 2 * len); @@ -468,7 +473,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, rc = -ENOMEM; return rc; } - len = cifs_strtoUCS((__le16 *)server, ses->serverName, len, + len = cifs_strtoUTF16((__le16 *)server, ses->serverName, len, nls_cp); rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index ba53c1c6c6c..76e7d8b6da1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -879,6 +879,8 @@ require use of the stronger protocol */ #define CIFSSEC_MASK 0xB70B7 /* current flags supported if weak */ #endif /* UPCALL */ #else /* do not allow weak pw hash */ +#define CIFSSEC_MUST_LANMAN 0 +#define CIFSSEC_MUST_PLNTXT 0 #ifdef CONFIG_CIFS_UPCALL #define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */ #else diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 6600aa2d2ef..8b7794c3159 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -821,8 +821,8 @@ PsxDelete: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB add path length overrun check */ @@ -893,8 +893,8 @@ DelFileRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->fileName, fileName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->fileName, fileName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve check for buffer overruns BB */ @@ -938,8 +938,8 @@ RmDirRetry: return rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = cifsConvertToUCS((__le16 *) pSMB->DirName, dirName, - PATH_MAX, nls_codepage, remap); + name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, dirName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve check for buffer overruns BB */ @@ -981,8 +981,8 @@ MkDirRetry: return rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = cifsConvertToUCS((__le16 *) pSMB->DirName, name, - PATH_MAX, nls_codepage, remap); + name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, name, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve check for buffer overruns BB */ @@ -1030,8 +1030,8 @@ PsxCreat: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, name, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, name, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -1197,8 +1197,8 @@ OldOpenRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { count = 1; /* account for one byte pad to word boundary */ name_len = - cifsConvertToUCS((__le16 *) (pSMB->fileName + 1), - fileName, PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) (pSMB->fileName + 1), + fileName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve check for buffer overruns BB */ @@ -1304,8 +1304,8 @@ openRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { count = 1; /* account for one byte pad to word boundary */ name_len = - cifsConvertToUCS((__le16 *) (pSMB->fileName + 1), - fileName, PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) (pSMB->fileName + 1), + fileName, PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; pSMB->NameLength = cpu_to_le16(name_len); @@ -2649,16 +2649,16 @@ renameRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->OldFileName, fromName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; pSMB->OldFileName[name_len] = 0x04; /* pad */ /* protocol requires ASCII signature byte on Unicode string */ pSMB->OldFileName[name_len + 1] = 0x00; name_len2 = - cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2], - toName, PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], + toName, PATH_MAX, nls_codepage, remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ } else { /* BB improve the check for buffer overruns BB */ @@ -2738,10 +2738,12 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifs_tcon *pTcon, /* unicode only call */ if (target_name == NULL) { sprintf(dummy_string, "cifs%x", pSMB->hdr.Mid); - len_of_str = cifsConvertToUCS((__le16 *)rename_info->target_name, + len_of_str = + cifsConvertToUTF16((__le16 *)rename_info->target_name, dummy_string, 24, nls_codepage, remap); } else { - len_of_str = cifsConvertToUCS((__le16 *)rename_info->target_name, + len_of_str = + cifsConvertToUTF16((__le16 *)rename_info->target_name, target_name, PATH_MAX, nls_codepage, remap); } @@ -2795,17 +2797,17 @@ copyRetry: pSMB->Flags = cpu_to_le16(flags & COPY_TREE); if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = cifsConvertToUCS((__le16 *) pSMB->OldFileName, - fromName, PATH_MAX, nls_codepage, - remap); + name_len = cifsConvertToUTF16((__le16 *) pSMB->OldFileName, + fromName, PATH_MAX, nls_codepage, + remap); name_len++; /* trailing null */ name_len *= 2; pSMB->OldFileName[name_len] = 0x04; /* pad */ /* protocol requires ASCII signature byte on Unicode string */ pSMB->OldFileName[name_len + 1] = 0x00; name_len2 = - cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2], - toName, PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], + toName, PATH_MAX, nls_codepage, remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ } else { /* BB improve the check for buffer overruns BB */ @@ -2861,9 +2863,9 @@ createSymLinkRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifs_strtoUCS((__le16 *) pSMB->FileName, fromName, PATH_MAX - /* find define for this maxpathcomponent */ - , nls_codepage); + cifs_strtoUTF16((__le16 *) pSMB->FileName, fromName, + /* find define for this maxpathcomponent */ + PATH_MAX, nls_codepage); name_len++; /* trailing null */ name_len *= 2; @@ -2885,9 +2887,9 @@ createSymLinkRetry: data_offset = (char *) (&pSMB->hdr.Protocol) + offset; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len_target = - cifs_strtoUCS((__le16 *) data_offset, toName, PATH_MAX - /* find define for this maxpathcomponent */ - , nls_codepage); + cifs_strtoUTF16((__le16 *) data_offset, toName, PATH_MAX + /* find define for this maxpathcomponent */ + , nls_codepage); name_len_target++; /* trailing null */ name_len_target *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -2949,8 +2951,8 @@ createHardLinkRetry: return rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { - name_len = cifsConvertToUCS((__le16 *) pSMB->FileName, toName, - PATH_MAX, nls_codepage, remap); + name_len = cifsConvertToUTF16((__le16 *) pSMB->FileName, toName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; @@ -2972,8 +2974,8 @@ createHardLinkRetry: data_offset = (char *) (&pSMB->hdr.Protocol) + offset; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len_target = - cifsConvertToUCS((__le16 *) data_offset, fromName, PATH_MAX, - nls_codepage, remap); + cifsConvertToUTF16((__le16 *) data_offset, fromName, + PATH_MAX, nls_codepage, remap); name_len_target++; /* trailing null */ name_len_target *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -3042,8 +3044,8 @@ winCreateHardLinkRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->OldFileName, fromName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->OldFileName, fromName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; @@ -3051,8 +3053,8 @@ winCreateHardLinkRetry: pSMB->OldFileName[name_len] = 0x04; pSMB->OldFileName[name_len + 1] = 0x00; /* pad */ name_len2 = - cifsConvertToUCS((__le16 *)&pSMB->OldFileName[name_len + 2], - toName, PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *)&pSMB->OldFileName[name_len+2], + toName, PATH_MAX, nls_codepage, remap); name_len2 += 1 /* trailing null */ + 1 /* Signature word */ ; name_len2 *= 2; /* convert to bytes */ } else { /* BB improve the check for buffer overruns BB */ @@ -3108,8 +3110,8 @@ querySymLinkRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifs_strtoUCS((__le16 *) pSMB->FileName, searchName, - PATH_MAX, nls_codepage); + cifs_strtoUTF16((__le16 *) pSMB->FileName, searchName, + PATH_MAX, nls_codepage); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -3166,8 +3168,8 @@ querySymLinkRetry: is_unicode = false; /* BB FIXME investigate remapping reserved chars here */ - *symlinkinfo = cifs_strndup_from_ucs(data_start, count, - is_unicode, nls_codepage); + *symlinkinfo = cifs_strndup_from_utf16(data_start, + count, is_unicode, nls_codepage); if (!*symlinkinfo) rc = -ENOMEM; } @@ -3450,8 +3452,9 @@ queryAclRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, + searchName, PATH_MAX, nls_codepage, + remap); name_len++; /* trailing null */ name_len *= 2; pSMB->FileName[name_len] = 0; @@ -3537,8 +3540,8 @@ setAclRetry: return rc; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -3948,8 +3951,9 @@ QInfRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, + searchName, PATH_MAX, nls_codepage, + remap); name_len++; /* trailing null */ name_len *= 2; } else { @@ -4086,8 +4090,8 @@ QPathInfoRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -4255,8 +4259,8 @@ UnixQPathInfoRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -4344,8 +4348,8 @@ findFirstRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, + PATH_MAX, nls_codepage, remap); /* We can not add the asterik earlier in case it got remapped to 0xF03A as if it were part of the directory name instead of a wildcard */ @@ -4656,8 +4660,9 @@ GetInodeNumberRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, + searchName, PATH_MAX, nls_codepage, + remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -4794,9 +4799,9 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, rc = -ENOMEM; goto parse_DFS_referrals_exit; } - cifsConvertToUCS((__le16 *) tmp, searchName, - PATH_MAX, nls_codepage, remap); - node->path_consumed = cifs_ucs2_bytes(tmp, + cifsConvertToUTF16((__le16 *) tmp, searchName, + PATH_MAX, nls_codepage, remap); + node->path_consumed = cifs_utf16_bytes(tmp, le16_to_cpu(pSMBr->PathConsumed), nls_codepage); kfree(tmp); @@ -4809,8 +4814,8 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, /* copy DfsPath */ temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset); max_len = data_end - temp; - node->path_name = cifs_strndup_from_ucs(temp, max_len, - is_unicode, nls_codepage); + node->path_name = cifs_strndup_from_utf16(temp, max_len, + is_unicode, nls_codepage); if (!node->path_name) { rc = -ENOMEM; goto parse_DFS_referrals_exit; @@ -4819,8 +4824,8 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, /* copy link target UNC */ temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset); max_len = data_end - temp; - node->node_name = cifs_strndup_from_ucs(temp, max_len, - is_unicode, nls_codepage); + node->node_name = cifs_strndup_from_utf16(temp, max_len, + is_unicode, nls_codepage); if (!node->node_name) rc = -ENOMEM; } @@ -4873,8 +4878,9 @@ getDFSRetry: if (ses->capabilities & CAP_UNICODE) { pSMB->hdr.Flags2 |= SMBFLG2_UNICODE; name_len = - cifsConvertToUCS((__le16 *) pSMB->RequestFileName, - searchName, PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->RequestFileName, + searchName, PATH_MAX, nls_codepage, + remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -5506,8 +5512,8 @@ SetEOFRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -5796,8 +5802,8 @@ SetTimesRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -5877,8 +5883,8 @@ SetAttrLgcyRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - ConvertToUCS((__le16 *) pSMB->fileName, fileName, - PATH_MAX, nls_codepage); + ConvertToUTF16((__le16 *) pSMB->fileName, fileName, + PATH_MAX, nls_codepage); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -6030,8 +6036,8 @@ setPermsRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -6123,8 +6129,8 @@ QAllEAsRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { list_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, searchName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, searchName, + PATH_MAX, nls_codepage, remap); list_len++; /* trailing null */ list_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -6301,8 +6307,8 @@ SetEARetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifsConvertToUCS((__le16 *) pSMB->FileName, fileName, - PATH_MAX, nls_codepage, remap); + cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName, + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4666780f315..602f77c304c 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -38,6 +38,7 @@ #include <asm/processor.h> #include <linux/inet.h> #include <linux/module.h> +#include <keys/user-type.h> #include <net/ipv6.h> #include "cifspdu.h" #include "cifsglob.h" @@ -225,74 +226,90 @@ static int check2ndT2(struct smb_hdr *pSMB) static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB) { - struct smb_t2_rsp *pSMB2 = (struct smb_t2_rsp *)psecond; + struct smb_t2_rsp *pSMBs = (struct smb_t2_rsp *)psecond; struct smb_t2_rsp *pSMBt = (struct smb_t2_rsp *)pTargetSMB; - char *data_area_of_target; - char *data_area_of_buf2; + char *data_area_of_tgt; + char *data_area_of_src; int remaining; - unsigned int byte_count, total_in_buf; - __u16 total_data_size, total_in_buf2; + unsigned int byte_count, total_in_tgt; + __u16 tgt_total_cnt, src_total_cnt, total_in_src; - total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); + src_total_cnt = get_unaligned_le16(&pSMBs->t2_rsp.TotalDataCount); + tgt_total_cnt = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); - if (total_data_size != - get_unaligned_le16(&pSMB2->t2_rsp.TotalDataCount)) - cFYI(1, "total data size of primary and secondary t2 differ"); + if (tgt_total_cnt != src_total_cnt) + cFYI(1, "total data count of primary and secondary t2 differ " + "source=%hu target=%hu", src_total_cnt, tgt_total_cnt); - total_in_buf = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); + total_in_tgt = get_unaligned_le16(&pSMBt->t2_rsp.DataCount); - remaining = total_data_size - total_in_buf; + remaining = tgt_total_cnt - total_in_tgt; - if (remaining < 0) + if (remaining < 0) { + cFYI(1, "Server sent too much data. tgt_total_cnt=%hu " + "total_in_tgt=%hu", tgt_total_cnt, total_in_tgt); return -EPROTO; + } - if (remaining == 0) /* nothing to do, ignore */ + if (remaining == 0) { + /* nothing to do, ignore */ + cFYI(1, "no more data remains"); return 0; + } - total_in_buf2 = get_unaligned_le16(&pSMB2->t2_rsp.DataCount); - if (remaining < total_in_buf2) { + total_in_src = get_unaligned_le16(&pSMBs->t2_rsp.DataCount); + if (remaining < total_in_src) cFYI(1, "transact2 2nd response contains too much data"); - } /* find end of first SMB data area */ - data_area_of_target = (char *)&pSMBt->hdr.Protocol + + data_area_of_tgt = (char *)&pSMBt->hdr.Protocol + get_unaligned_le16(&pSMBt->t2_rsp.DataOffset); - /* validate target area */ - data_area_of_buf2 = (char *)&pSMB2->hdr.Protocol + - get_unaligned_le16(&pSMB2->t2_rsp.DataOffset); + /* validate target area */ + data_area_of_src = (char *)&pSMBs->hdr.Protocol + + get_unaligned_le16(&pSMBs->t2_rsp.DataOffset); - data_area_of_target += total_in_buf; + data_area_of_tgt += total_in_tgt; - /* copy second buffer into end of first buffer */ - total_in_buf += total_in_buf2; + total_in_tgt += total_in_src; /* is the result too big for the field? */ - if (total_in_buf > USHRT_MAX) + if (total_in_tgt > USHRT_MAX) { + cFYI(1, "coalesced DataCount too large (%u)", total_in_tgt); return -EPROTO; - put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount); + } + put_unaligned_le16(total_in_tgt, &pSMBt->t2_rsp.DataCount); /* fix up the BCC */ byte_count = get_bcc(pTargetSMB); - byte_count += total_in_buf2; + byte_count += total_in_src; /* is the result too big for the field? */ - if (byte_count > USHRT_MAX) + if (byte_count > USHRT_MAX) { + cFYI(1, "coalesced BCC too large (%u)", byte_count); return -EPROTO; + } put_bcc(byte_count, pTargetSMB); byte_count = be32_to_cpu(pTargetSMB->smb_buf_length); - byte_count += total_in_buf2; + byte_count += total_in_src; /* don't allow buffer to overflow */ - if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) + if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { + cFYI(1, "coalesced BCC exceeds buffer size (%u)", byte_count); return -ENOBUFS; + } pTargetSMB->smb_buf_length = cpu_to_be32(byte_count); - memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2); + /* copy second buffer into end of first buffer */ + memcpy(data_area_of_tgt, data_area_of_src, total_in_src); - if (remaining == total_in_buf2) { - cFYI(1, "found the last secondary response"); - return 0; /* we are done */ - } else /* more responses to go */ + if (remaining != total_in_src) { + /* more responses to go */ + cFYI(1, "waiting for more secondary responses"); return 1; + } + + /* we are done */ + cFYI(1, "found the last secondary response"); + return 0; } static void @@ -756,10 +773,11 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid) cifs_dump_mem("Bad SMB: ", buf, min_t(unsigned int, server->total_read, 48)); - if (mid) - handle_mid(mid, server, smb_buffer, length); + if (!mid) + return length; - return length; + handle_mid(mid, server, smb_buffer, length); + return 0; } static int @@ -1578,11 +1596,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, } } - if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) { - cERROR(1, "Multiuser mounts currently require krb5 " - "authentication!"); +#ifndef CONFIG_KEYS + /* Muliuser mounts require CONFIG_KEYS support */ + if (vol->multiuser) { + cERROR(1, "Multiuser mounts require kernels with " + "CONFIG_KEYS enabled."); goto cifs_parse_mount_err; } +#endif if (vol->UNCip == NULL) vol->UNCip = &vol->UNC[2]; @@ -1981,10 +2002,16 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol) return 0; break; default: + /* NULL username means anonymous session */ + if (ses->user_name == NULL) { + if (!vol->nullauth) + return 0; + break; + } + /* anything else takes username/password */ - if (ses->user_name == NULL) - return 0; - if (strncmp(ses->user_name, vol->username, + if (strncmp(ses->user_name, + vol->username ? vol->username : "", MAX_USERNAME_SIZE)) return 0; if (strlen(vol->username) != 0 && @@ -2039,6 +2066,132 @@ cifs_put_smb_ses(struct cifs_ses *ses) cifs_put_tcp_session(server); } +#ifdef CONFIG_KEYS + +/* strlen("cifs:a:") + INET6_ADDRSTRLEN + 1 */ +#define CIFSCREDS_DESC_SIZE (7 + INET6_ADDRSTRLEN + 1) + +/* Populate username and pw fields from keyring if possible */ +static int +cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses) +{ + int rc = 0; + char *desc, *delim, *payload; + ssize_t len; + struct key *key; + struct TCP_Server_Info *server = ses->server; + struct sockaddr_in *sa; + struct sockaddr_in6 *sa6; + struct user_key_payload *upayload; + + desc = kmalloc(CIFSCREDS_DESC_SIZE, GFP_KERNEL); + if (!desc) + return -ENOMEM; + + /* try to find an address key first */ + switch (server->dstaddr.ss_family) { + case AF_INET: + sa = (struct sockaddr_in *)&server->dstaddr; + sprintf(desc, "cifs:a:%pI4", &sa->sin_addr.s_addr); + break; + case AF_INET6: + sa6 = (struct sockaddr_in6 *)&server->dstaddr; + sprintf(desc, "cifs:a:%pI6c", &sa6->sin6_addr.s6_addr); + break; + default: + cFYI(1, "Bad ss_family (%hu)", server->dstaddr.ss_family); + rc = -EINVAL; + goto out_err; + } + + cFYI(1, "%s: desc=%s", __func__, desc); + key = request_key(&key_type_logon, desc, ""); + if (IS_ERR(key)) { + if (!ses->domainName) { + cFYI(1, "domainName is NULL"); + rc = PTR_ERR(key); + goto out_err; + } + + /* didn't work, try to find a domain key */ + sprintf(desc, "cifs:d:%s", ses->domainName); + cFYI(1, "%s: desc=%s", __func__, desc); + key = request_key(&key_type_logon, desc, ""); + if (IS_ERR(key)) { + rc = PTR_ERR(key); + goto out_err; + } + } + + down_read(&key->sem); + upayload = key->payload.data; + if (IS_ERR_OR_NULL(upayload)) { + rc = upayload ? PTR_ERR(upayload) : -EINVAL; + goto out_key_put; + } + + /* find first : in payload */ + payload = (char *)upayload->data; + delim = strnchr(payload, upayload->datalen, ':'); + cFYI(1, "payload=%s", payload); + if (!delim) { + cFYI(1, "Unable to find ':' in payload (datalen=%d)", + upayload->datalen); + rc = -EINVAL; + goto out_key_put; + } + + len = delim - payload; + if (len > MAX_USERNAME_SIZE || len <= 0) { + cFYI(1, "Bad value from username search (len=%zd)", len); + rc = -EINVAL; + goto out_key_put; + } + + vol->username = kstrndup(payload, len, GFP_KERNEL); + if (!vol->username) { + cFYI(1, "Unable to allocate %zd bytes for username", len); + rc = -ENOMEM; + goto out_key_put; + } + cFYI(1, "%s: username=%s", __func__, vol->username); + + len = key->datalen - (len + 1); + if (len > MAX_PASSWORD_SIZE || len <= 0) { + cFYI(1, "Bad len for password search (len=%zd)", len); + rc = -EINVAL; + kfree(vol->username); + vol->username = NULL; + goto out_key_put; + } + + ++delim; + vol->password = kstrndup(delim, len, GFP_KERNEL); + if (!vol->password) { + cFYI(1, "Unable to allocate %zd bytes for password", len); + rc = -ENOMEM; + kfree(vol->username); + vol->username = NULL; + goto out_key_put; + } + +out_key_put: + up_read(&key->sem); + key_put(key); +out_err: + kfree(desc); + cFYI(1, "%s: returning %d", __func__, rc); + return rc; +} +#else /* ! CONFIG_KEYS */ +static inline int +cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)), + struct cifs_ses *ses __attribute__((unused))) +{ + return -ENOSYS; +} +#endif /* CONFIG_KEYS */ + static bool warned_on_ntlm; /* globals init to false automatically */ static struct cifs_ses * @@ -2914,18 +3067,33 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, #define CIFS_DEFAULT_IOSIZE (1024 * 1024) /* - * Windows only supports a max of 60k reads. Default to that when posix - * extensions aren't in force. + * Windows only supports a max of 60kb reads and 65535 byte writes. Default to + * those values when posix extensions aren't in force. In actuality here, we + * use 65536 to allow for a write that is a multiple of 4k. Most servers seem + * to be ok with the extra byte even though Windows doesn't send writes that + * are that large. + * + * Citation: + * + * http://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx */ #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) +#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) static unsigned int cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) { __u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability); struct TCP_Server_Info *server = tcon->ses->server; - unsigned int wsize = pvolume_info->wsize ? pvolume_info->wsize : - CIFS_DEFAULT_IOSIZE; + unsigned int wsize; + + /* start with specified wsize, or default */ + if (pvolume_info->wsize) + wsize = pvolume_info->wsize; + else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) + wsize = CIFS_DEFAULT_IOSIZE; + else + wsize = CIFS_DEFAULT_NON_POSIX_WSIZE; /* can server support 24-bit write sizes? (via UNIX extensions) */ if (!tcon->unix_ext || !(unix_cap & CIFS_UNIX_LARGE_WRITE_CAP)) @@ -3136,10 +3304,9 @@ cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, return -EINVAL; if (volume_info->nullauth) { - cFYI(1, "null user"); - volume_info->username = kzalloc(1, GFP_KERNEL); - if (volume_info->username == NULL) - return -ENOMEM; + cFYI(1, "Anonymous login"); + kfree(volume_info->username); + volume_info->username = NULL; } else if (volume_info->username) { /* BB fixme parse for domain name here */ cFYI(1, "Username: %s", volume_info->username); @@ -3478,7 +3645,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses, if (ses->capabilities & CAP_UNICODE) { smb_buffer->Flags2 |= SMBFLG2_UNICODE; length = - cifs_strtoUCS((__le16 *) bcc_ptr, tree, + cifs_strtoUTF16((__le16 *) bcc_ptr, tree, 6 /* max utf8 char length in bytes */ * (/* server len*/ + 256 /* share len */), nls_codepage); bcc_ptr += 2 * length; /* convert num 16 bit words to bytes */ @@ -3533,7 +3700,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses, /* mostly informational -- no need to fail on error here */ kfree(tcon->nativeFileSystem); - tcon->nativeFileSystem = cifs_strndup_from_ucs(bcc_ptr, + tcon->nativeFileSystem = cifs_strndup_from_utf16(bcc_ptr, bytes_left, is_unicode, nls_codepage); @@ -3657,25 +3824,43 @@ int cifs_setup_session(unsigned int xid, struct cifs_ses *ses, return rc; } +static int +cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses) +{ + switch (ses->server->secType) { + case Kerberos: + vol->secFlg = CIFSSEC_MUST_KRB5; + return 0; + case NTLMv2: + vol->secFlg = CIFSSEC_MUST_NTLMV2; + break; + case NTLM: + vol->secFlg = CIFSSEC_MUST_NTLM; + break; + case RawNTLMSSP: + vol->secFlg = CIFSSEC_MUST_NTLMSSP; + break; + case LANMAN: + vol->secFlg = CIFSSEC_MUST_LANMAN; + break; + } + + return cifs_set_cifscreds(vol, ses); +} + static struct cifs_tcon * cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) { + int rc; struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb); struct cifs_ses *ses; struct cifs_tcon *tcon = NULL; struct smb_vol *vol_info; - char username[28]; /* big enough for "krb50x" + hex of ULONG_MAX 6+16 */ - /* We used to have this as MAX_USERNAME which is */ - /* way too big now (256 instead of 32) */ vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); - if (vol_info == NULL) { - tcon = ERR_PTR(-ENOMEM); - goto out; - } + if (vol_info == NULL) + return ERR_PTR(-ENOMEM); - snprintf(username, sizeof(username), "krb50x%x", fsuid); - vol_info->username = username; vol_info->local_nls = cifs_sb->local_nls; vol_info->linux_uid = fsuid; vol_info->cred_uid = fsuid; @@ -3685,8 +3870,11 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) vol_info->local_lease = master_tcon->local_lease; vol_info->no_linux_ext = !master_tcon->unix_ext; - /* FIXME: allow for other secFlg settings */ - vol_info->secFlg = CIFSSEC_MUST_KRB5; + rc = cifs_set_vol_auth(vol_info, master_tcon->ses); + if (rc) { + tcon = ERR_PTR(rc); + goto out; + } /* get a reference for the same TCP session */ spin_lock(&cifs_tcp_ses_lock); @@ -3709,6 +3897,8 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) if (ses->capabilities & CAP_UNIX) reset_cifs_unix_caps(0, tcon, NULL, vol_info); out: + kfree(vol_info->username); + kfree(vol_info->password); kfree(vol_info); return tcon; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index df8fecb5b99..63a196b97d5 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -492,7 +492,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ - __u32 oplock = 0; + __u32 oplock = enable_oplocks ? REQ_OPLOCK : 0; __u16 fileHandle = 0; bool posix_open = false; struct cifs_sb_info *cifs_sb; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index a090bbe6ee2..e2bbc683e01 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -647,10 +647,11 @@ static int cifs_filldir(char *find_entry, struct file *file, filldir_t filldir, name.name = scratch_buf; name.len = - cifs_from_ucs2((char *)name.name, (__le16 *)de.name, - UNICODE_NAME_MAX, - min(de.namelen, (size_t)max_len), nlt, - cifs_sb->mnt_cifs_flags & + cifs_from_utf16((char *)name.name, (__le16 *)de.name, + UNICODE_NAME_MAX, + min_t(size_t, de.namelen, + (size_t)max_len), nlt, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); name.len -= nls_nullsize(nlt); } else { diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 4ec3ee9d72c..551d0c2b973 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -167,16 +167,16 @@ unicode_oslm_strings(char **pbcc_area, const struct nls_table *nls_cp) int bytes_ret = 0; /* Copy OS version */ - bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32, - nls_cp); + bytes_ret = cifs_strtoUTF16((__le16 *)bcc_ptr, "Linux version ", 32, + nls_cp); bcc_ptr += 2 * bytes_ret; - bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release, - 32, nls_cp); + bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, init_utsname()->release, + 32, nls_cp); bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* trailing null */ - bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS, - 32, nls_cp); + bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS, + 32, nls_cp); bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* trailing null */ @@ -197,8 +197,8 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses, *(bcc_ptr+1) = 0; bytes_ret = 0; } else - bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->domainName, - 256, nls_cp); + bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->domainName, + 256, nls_cp); bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* account for null terminator */ @@ -226,8 +226,8 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, *bcc_ptr = 0; *(bcc_ptr+1) = 0; } else { - bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->user_name, - MAX_USERNAME_SIZE, nls_cp); + bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->user_name, + MAX_USERNAME_SIZE, nls_cp); } bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* account for null termination */ @@ -246,16 +246,15 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, /* copy user */ /* BB what about null user mounts - check that we do this BB */ /* copy user */ - if (ses->user_name != NULL) + if (ses->user_name != NULL) { strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE); + bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE); + } /* else null user mount */ - - bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE); *bcc_ptr = 0; bcc_ptr++; /* account for null termination */ /* copy domain */ - if (ses->domainName != NULL) { strncpy(bcc_ptr, ses->domainName, 256); bcc_ptr += strnlen(ses->domainName, 256); @@ -287,7 +286,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses, cFYI(1, "bleft %d", bleft); kfree(ses->serverOS); - ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp); + ses->serverOS = cifs_strndup_from_utf16(data, bleft, true, nls_cp); cFYI(1, "serverOS=%s", ses->serverOS); len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; data += len; @@ -296,7 +295,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses, return; kfree(ses->serverNOS); - ses->serverNOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp); + ses->serverNOS = cifs_strndup_from_utf16(data, bleft, true, nls_cp); cFYI(1, "serverNOS=%s", ses->serverNOS); len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2; data += len; @@ -305,7 +304,7 @@ decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifs_ses *ses, return; kfree(ses->serverDomain); - ses->serverDomain = cifs_strndup_from_ucs(data, bleft, true, nls_cp); + ses->serverDomain = cifs_strndup_from_utf16(data, bleft, true, nls_cp); cFYI(1, "serverDomain=%s", ses->serverDomain); return; @@ -395,6 +394,10 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags); tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset); tilen = le16_to_cpu(pblob->TargetInfoArray.Length); + if (tioffset > blob_len || tioffset + tilen > blob_len) { + cERROR(1, "tioffset + tilen too high %u + %u", tioffset, tilen); + return -EINVAL; + } if (tilen) { ses->auth_key.response = kmalloc(tilen, GFP_KERNEL); if (!ses->auth_key.response) { @@ -502,8 +505,8 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, tmp += 2; } else { int len; - len = cifs_strtoUCS((__le16 *)tmp, ses->domainName, - MAX_USERNAME_SIZE, nls_cp); + len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName, + MAX_USERNAME_SIZE, nls_cp); len *= 2; /* unicode is 2 bytes each */ sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->DomainName.Length = cpu_to_le16(len); @@ -518,8 +521,8 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, tmp += 2; } else { int len; - len = cifs_strtoUCS((__le16 *)tmp, ses->user_name, - MAX_USERNAME_SIZE, nls_cp); + len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name, + MAX_USERNAME_SIZE, nls_cp); len *= 2; /* unicode is 2 bytes each */ sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); sec_blob->UserName.Length = cpu_to_le16(len); diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 80d85088193..d5cd9aa7eac 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -213,7 +213,7 @@ E_md4hash(const unsigned char *passwd, unsigned char *p16, /* Password cannot be longer than 128 characters */ if (passwd) /* Password must be converted to NT unicode */ - len = cifs_strtoUCS(wpwd, passwd, 128, codepage); + len = cifs_strtoUTF16(wpwd, passwd, 128, codepage); else { len = 0; *wpwd = 0; /* Ensure string is null terminated */ diff --git a/fs/compat.c b/fs/compat.c index fa9d721ecfe..07880bae28a 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -131,41 +131,35 @@ asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_tim static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) { - compat_ino_t ino = stat->ino; - typeof(ubuf->st_uid) uid = 0; - typeof(ubuf->st_gid) gid = 0; - int err; + struct compat_stat tmp; - SET_UID(uid, stat->uid); - SET_GID(gid, stat->gid); + if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev)) + return -EOVERFLOW; - if ((u64) stat->size > MAX_NON_LFS || - !old_valid_dev(stat->dev) || - !old_valid_dev(stat->rdev)) + memset(&tmp, 0, sizeof(tmp)); + tmp.st_dev = old_encode_dev(stat->dev); + tmp.st_ino = stat->ino; + if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; - if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) + tmp.st_mode = stat->mode; + tmp.st_nlink = stat->nlink; + if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; - - if (clear_user(ubuf, sizeof(*ubuf))) - return -EFAULT; - - err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev); - err |= __put_user(ino, &ubuf->st_ino); - err |= __put_user(stat->mode, &ubuf->st_mode); - err |= __put_user(stat->nlink, &ubuf->st_nlink); - err |= __put_user(uid, &ubuf->st_uid); - err |= __put_user(gid, &ubuf->st_gid); - err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev); - err |= __put_user(stat->size, &ubuf->st_size); - err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime); - err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec); - err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime); - err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec); - err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime); - err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec); - err |= __put_user(stat->blksize, &ubuf->st_blksize); - err |= __put_user(stat->blocks, &ubuf->st_blocks); - return err; + SET_UID(tmp.st_uid, stat->uid); + SET_GID(tmp.st_gid, stat->gid); + tmp.st_rdev = old_encode_dev(stat->rdev); + if ((u64) stat->size > MAX_NON_LFS) + return -EOVERFLOW; + tmp.st_size = stat->size; + tmp.st_atime = stat->atime.tv_sec; + tmp.st_atime_nsec = stat->atime.tv_nsec; + tmp.st_mtime = stat->mtime.tv_sec; + tmp.st_mtime_nsec = stat->mtime.tv_nsec; + tmp.st_ctime = stat->ctime.tv_sec; + tmp.st_ctime_nsec = stat->ctime.tv_nsec; + tmp.st_blocks = stat->blocks; + tmp.st_blksize = stat->blksize; + return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; } asmlinkage long compat_sys_newstat(const char __user * filename, diff --git a/fs/dcache.c b/fs/dcache.c index 16a53cc2cc0..fe19ac13f75 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2968,7 +2968,7 @@ __setup("dhash_entries=", set_dhash_entries); static void __init dcache_init_early(void) { - int loop; + unsigned int loop; /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. @@ -2986,13 +2986,13 @@ static void __init dcache_init_early(void) &d_hash_mask, 0); - for (loop = 0; loop < (1 << d_hash_shift); loop++) + for (loop = 0; loop < (1U << d_hash_shift); loop++) INIT_HLIST_BL_HEAD(dentry_hashtable + loop); } static void __init dcache_init(void) { - int loop; + unsigned int loop; /* * A constructor could be added for stable state like the lists, @@ -3016,7 +3016,7 @@ static void __init dcache_init(void) &d_hash_mask, 0); - for (loop = 0; loop < (1 << d_hash_shift); loop++) + for (loop = 0; loop < (1U << d_hash_shift); loop++) INIT_HLIST_BL_HEAD(dentry_hashtable + loop); } diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index f65d4455c5e..ef023eef046 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -540,7 +540,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_blob); * debugfs_print_regs32 - use seq_print to describe a set of registers * @s: the seq_file structure being used to generate output * @regs: an array if struct debugfs_reg32 structures - * @mregs: the length of the above array + * @nregs: the length of the above array * @base: the base address to be used in reading the registers * @prefix: a string to be prefixed to every output line * diff --git a/fs/direct-io.c b/fs/direct-io.c index 4a588dbd11b..f4aadd15b61 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -173,7 +173,7 @@ void inode_dio_wait(struct inode *inode) if (atomic_read(&inode->i_dio_count)) __inode_dio_wait(inode); } -EXPORT_SYMBOL_GPL(inode_dio_wait); +EXPORT_SYMBOL(inode_dio_wait); /* * inode_dio_done - signal finish of a direct I/O requests @@ -187,7 +187,7 @@ void inode_dio_done(struct inode *inode) if (atomic_dec_and_test(&inode->i_dio_count)) wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); } -EXPORT_SYMBOL_GPL(inode_dio_done); +EXPORT_SYMBOL(inode_dio_done); /* * How many pages are in the queue? diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 2a834255c75..ea993128155 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -417,17 +417,6 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, (unsigned long long)(extent_base + extent_offset), rc); goto out; } - if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Encrypting extent " - "with iv:\n"); - ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); - ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " - "encryption:\n"); - ecryptfs_dump_hex((char *) - (page_address(page) - + (extent_offset * crypt_stat->extent_size)), - 8); - } rc = ecryptfs_encrypt_page_offset(crypt_stat, enc_extent_page, 0, page, (extent_offset * crypt_stat->extent_size), @@ -440,14 +429,6 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, goto out; } rc = 0; - if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16llx]; " - "rc = [%d]\n", - (unsigned long long)(extent_base + extent_offset), rc); - ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " - "encryption:\n"); - ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8); - } out: return rc; } @@ -543,17 +524,6 @@ static int ecryptfs_decrypt_extent(struct page *page, (unsigned long long)(extent_base + extent_offset), rc); goto out; } - if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Decrypting extent " - "with iv:\n"); - ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes); - ecryptfs_printk(KERN_DEBUG, "First 8 bytes before " - "decryption:\n"); - ecryptfs_dump_hex((char *) - (page_address(enc_extent_page) - + (extent_offset * crypt_stat->extent_size)), - 8); - } rc = ecryptfs_decrypt_page_offset(crypt_stat, page, (extent_offset * crypt_stat->extent_size), @@ -567,16 +537,6 @@ static int ecryptfs_decrypt_extent(struct page *page, goto out; } rc = 0; - if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16llx]; " - "rc = [%d]\n", - (unsigned long long)(extent_base + extent_offset), rc); - ecryptfs_printk(KERN_DEBUG, "First 8 bytes after " - "decryption:\n"); - ecryptfs_dump_hex((char *)(page_address(page) - + (extent_offset - * crypt_stat->extent_size)), 8); - } out: return rc; } @@ -1590,8 +1550,8 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, */ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) { - int rc = 0; - char *page_virt = NULL; + int rc; + char *page_virt; struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode; struct ecryptfs_crypt_stat *crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; @@ -1616,11 +1576,13 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) ecryptfs_dentry, ECRYPTFS_VALIDATE_HEADER_SIZE); if (rc) { + /* metadata is not in the file header, so try xattrs */ memset(page_virt, 0, PAGE_CACHE_SIZE); rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode); if (rc) { printk(KERN_DEBUG "Valid eCryptfs headers not found in " - "file header region or xattr region\n"); + "file header region or xattr region, inode %lu\n", + ecryptfs_inode->i_ino); rc = -EINVAL; goto out; } @@ -1629,7 +1591,8 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) ECRYPTFS_DONT_VALIDATE_HEADER_SIZE); if (rc) { printk(KERN_DEBUG "Valid eCryptfs headers not found in " - "file xattr region either\n"); + "file xattr region either, inode %lu\n", + ecryptfs_inode->i_ino); rc = -EINVAL; } if (crypt_stat->mount_crypt_stat->flags @@ -1640,7 +1603,8 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) "crypto metadata only in the extended attribute " "region, but eCryptfs was mounted without " "xattr support enabled. eCryptfs will not treat " - "this like an encrypted file.\n"); + "this like an encrypted file, inode %lu\n", + ecryptfs_inode->i_ino); rc = -EINVAL; } } @@ -2026,6 +1990,17 @@ out: return; } +static size_t ecryptfs_max_decoded_size(size_t encoded_size) +{ + /* Not exact; conservatively long. Every block of 4 + * encoded characters decodes into a block of 3 + * decoded characters. This segment of code provides + * the caller with the maximum amount of allocated + * space that @dst will need to point to in a + * subsequent call. */ + return ((encoded_size + 1) * 3) / 4; +} + /** * ecryptfs_decode_from_filename * @dst: If NULL, this function only sets @dst_size and returns. If @@ -2044,13 +2019,7 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size, size_t dst_byte_offset = 0; if (dst == NULL) { - /* Not exact; conservatively long. Every block of 4 - * encoded characters decodes into a block of 3 - * decoded characters. This segment of code provides - * the caller with the maximum amount of allocated - * space that @dst will need to point to in a - * subsequent call. */ - (*dst_size) = (((src_size + 1) * 3) / 4); + (*dst_size) = ecryptfs_max_decoded_size(src_size); goto out; } while (src_byte_offset < src_size) { @@ -2275,3 +2244,52 @@ out_free: out: return rc; } + +#define ENC_NAME_MAX_BLOCKLEN_8_OR_16 143 + +int ecryptfs_set_f_namelen(long *namelen, long lower_namelen, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat) +{ + struct blkcipher_desc desc; + struct mutex *tfm_mutex; + size_t cipher_blocksize; + int rc; + + if (!(mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)) { + (*namelen) = lower_namelen; + return 0; + } + + rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex, + mount_crypt_stat->global_default_fn_cipher_name); + if (unlikely(rc)) { + (*namelen) = 0; + return rc; + } + + mutex_lock(tfm_mutex); + cipher_blocksize = crypto_blkcipher_blocksize(desc.tfm); + mutex_unlock(tfm_mutex); + + /* Return an exact amount for the common cases */ + if (lower_namelen == NAME_MAX + && (cipher_blocksize == 8 || cipher_blocksize == 16)) { + (*namelen) = ENC_NAME_MAX_BLOCKLEN_8_OR_16; + return 0; + } + + /* Return a safe estimate for the uncommon cases */ + (*namelen) = lower_namelen; + (*namelen) -= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE; + /* Since this is the max decoded size, subtract 1 "decoded block" len */ + (*namelen) = ecryptfs_max_decoded_size(*namelen) - 3; + (*namelen) -= ECRYPTFS_TAG_70_MAX_METADATA_SIZE; + (*namelen) -= ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES; + /* Worst case is that the filename is padded nearly a full block size */ + (*namelen) -= cipher_blocksize - 1; + + if ((*namelen) < 0) + (*namelen) = 0; + + return 0; +} diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index a9f29b12fbf..867b64c5d84 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -151,12 +151,21 @@ ecryptfs_get_key_payload_data(struct key *key) * dentry name */ #define ECRYPTFS_TAG_73_PACKET_TYPE 0x49 /* FEK-encrypted filename as * metadata */ +#define ECRYPTFS_MIN_PKT_LEN_SIZE 1 /* Min size to specify packet length */ +#define ECRYPTFS_MAX_PKT_LEN_SIZE 2 /* Pass at least this many bytes to + * ecryptfs_parse_packet_length() and + * ecryptfs_write_packet_length() + */ /* Constraint: ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES >= * ECRYPTFS_MAX_IV_BYTES */ #define ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES 16 #define ECRYPTFS_NON_NULL 0x42 /* A reasonable substitute for NULL */ #define MD5_DIGEST_SIZE 16 #define ECRYPTFS_TAG_70_DIGEST_SIZE MD5_DIGEST_SIZE +#define ECRYPTFS_TAG_70_MIN_METADATA_SIZE (1 + ECRYPTFS_MIN_PKT_LEN_SIZE \ + + ECRYPTFS_SIG_SIZE + 1 + 1) +#define ECRYPTFS_TAG_70_MAX_METADATA_SIZE (1 + ECRYPTFS_MAX_PKT_LEN_SIZE \ + + ECRYPTFS_SIG_SIZE + 1 + 1) #define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FEK_ENCRYPTED." #define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX_SIZE 23 #define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FNEK_ENCRYPTED." @@ -696,6 +705,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, size_t *packet_size, struct ecryptfs_mount_crypt_stat *mount_crypt_stat, char *data, size_t max_packet_size); +int ecryptfs_set_f_namelen(long *namelen, long lower_namelen, + struct ecryptfs_mount_crypt_stat *mount_crypt_stat); int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat, loff_t offset); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 19a8ca4ab1d..ab35b113003 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -822,18 +822,6 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, size_t num_zeros = (PAGE_CACHE_SIZE - (ia->ia_size & ~PAGE_CACHE_MASK)); - - /* - * XXX(truncate) this should really happen at the begginning - * of ->setattr. But the code is too messy to that as part - * of a larger patch. ecryptfs is also totally missing out - * on the inode_change_ok check at the beginning of - * ->setattr while would include this. - */ - rc = inode_newsize_ok(inode, ia->ia_size); - if (rc) - goto out; - if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { truncate_setsize(inode, ia->ia_size); lower_ia->ia_size = ia->ia_size; @@ -883,6 +871,28 @@ out: return rc; } +static int ecryptfs_inode_newsize_ok(struct inode *inode, loff_t offset) +{ + struct ecryptfs_crypt_stat *crypt_stat; + loff_t lower_oldsize, lower_newsize; + + crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; + lower_oldsize = upper_size_to_lower_size(crypt_stat, + i_size_read(inode)); + lower_newsize = upper_size_to_lower_size(crypt_stat, offset); + if (lower_newsize > lower_oldsize) { + /* + * The eCryptfs inode and the new *lower* size are mixed here + * because we may not have the lower i_mutex held and/or it may + * not be appropriate to call inode_newsize_ok() with inodes + * from other filesystems. + */ + return inode_newsize_ok(inode, lower_newsize); + } + + return 0; +} + /** * ecryptfs_truncate * @dentry: The ecryptfs layer dentry @@ -899,6 +909,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) struct iattr lower_ia = { .ia_valid = 0 }; int rc; + rc = ecryptfs_inode_newsize_ok(dentry->d_inode, new_length); + if (rc) + return rc; + rc = truncate_upper(dentry, &ia, &lower_ia); if (!rc && lower_ia.ia_valid & ATTR_SIZE) { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); @@ -978,6 +992,16 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) } } mutex_unlock(&crypt_stat->cs_mutex); + + rc = inode_change_ok(inode, ia); + if (rc) + goto out; + if (ia->ia_valid & ATTR_SIZE) { + rc = ecryptfs_inode_newsize_ok(inode, ia->ia_size); + if (rc) + goto out; + } + if (S_ISREG(inode->i_mode)) { rc = filemap_write_and_wait(inode->i_mapping); if (rc) @@ -1061,6 +1085,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, } rc = vfs_setxattr(lower_dentry, name, value, size, flags); + if (!rc) + fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode); out: return rc; } diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index ac1ad48c237..2333203a120 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -109,7 +109,7 @@ int ecryptfs_parse_packet_length(unsigned char *data, size_t *size, (*size) += ((unsigned char)(data[1]) + 192); (*length_size) = 2; } else if (data[0] == 255) { - /* Five-byte length; we're not supposed to see this */ + /* If support is added, adjust ECRYPTFS_MAX_PKT_LEN_SIZE */ ecryptfs_printk(KERN_ERR, "Five-byte packet length not " "supported\n"); rc = -EINVAL; @@ -126,7 +126,7 @@ out: /** * ecryptfs_write_packet_length * @dest: The byte array target into which to write the length. Must - * have at least 5 bytes allocated. + * have at least ECRYPTFS_MAX_PKT_LEN_SIZE bytes allocated. * @size: The length to write. * @packet_size_length: The number of bytes used to encode the packet * length is written to this address. @@ -146,6 +146,7 @@ int ecryptfs_write_packet_length(char *dest, size_t size, dest[1] = ((size - 192) % 256); (*packet_size_length) = 2; } else { + /* If support is added, adjust ECRYPTFS_MAX_PKT_LEN_SIZE */ rc = -EINVAL; ecryptfs_printk(KERN_WARNING, "Unsupported packet size: [%zd]\n", size); @@ -678,10 +679,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, * Octets N3-N4: Block-aligned encrypted filename * - Consists of a minimum number of random characters, a \0 * separator, and then the filename */ - s->max_packet_size = (1 /* Tag 70 identifier */ - + 3 /* Max Tag 70 packet size */ - + ECRYPTFS_SIG_SIZE /* FNEK sig */ - + 1 /* Cipher identifier */ + s->max_packet_size = (ECRYPTFS_TAG_70_MAX_METADATA_SIZE + s->block_aligned_filename_size); if (dest == NULL) { (*packet_size) = s->max_packet_size; @@ -933,10 +931,10 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size, goto out; } s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; - if (max_packet_size < (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)) { + if (max_packet_size < ECRYPTFS_TAG_70_MIN_METADATA_SIZE) { printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be " "at least [%d]\n", __func__, max_packet_size, - (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)); + ECRYPTFS_TAG_70_MIN_METADATA_SIZE); rc = -EINVAL; goto out; } diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 940a82e63dc..349209dc6a9 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -218,6 +218,29 @@ out_unlock: return rc; } +/* + * miscdevfs packet format: + * Octet 0: Type + * Octets 1-4: network byte order msg_ctx->counter + * Octets 5-N0: Size of struct ecryptfs_message to follow + * Octets N0-N1: struct ecryptfs_message (including data) + * + * Octets 5-N1 not written if the packet type does not include a message + */ +#define PKT_TYPE_SIZE 1 +#define PKT_CTR_SIZE 4 +#define MIN_NON_MSG_PKT_SIZE (PKT_TYPE_SIZE + PKT_CTR_SIZE) +#define MIN_MSG_PKT_SIZE (PKT_TYPE_SIZE + PKT_CTR_SIZE \ + + ECRYPTFS_MIN_PKT_LEN_SIZE) +/* 4 + ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES comes from tag 65 packet format */ +#define MAX_MSG_PKT_SIZE (PKT_TYPE_SIZE + PKT_CTR_SIZE \ + + ECRYPTFS_MAX_PKT_LEN_SIZE \ + + sizeof(struct ecryptfs_message) \ + + 4 + ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) +#define PKT_TYPE_OFFSET 0 +#define PKT_CTR_OFFSET PKT_TYPE_SIZE +#define PKT_LEN_OFFSET (PKT_TYPE_SIZE + PKT_CTR_SIZE) + /** * ecryptfs_miscdev_read - format and send message from queue * @file: fs/ecryptfs/euid miscdevfs handle (ignored) @@ -237,7 +260,7 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, struct ecryptfs_daemon *daemon; struct ecryptfs_msg_ctx *msg_ctx; size_t packet_length_size; - char packet_length[3]; + char packet_length[ECRYPTFS_MAX_PKT_LEN_SIZE]; size_t i; size_t total_length; uid_t euid = current_euid(); @@ -305,15 +328,8 @@ check_list: packet_length_size = 0; msg_ctx->msg_size = 0; } - /* miscdevfs packet format: - * Octet 0: Type - * Octets 1-4: network byte order msg_ctx->counter - * Octets 5-N0: Size of struct ecryptfs_message to follow - * Octets N0-N1: struct ecryptfs_message (including data) - * - * Octets 5-N1 not written if the packet type does not - * include a message */ - total_length = (1 + 4 + packet_length_size + msg_ctx->msg_size); + total_length = (PKT_TYPE_SIZE + PKT_CTR_SIZE + packet_length_size + + msg_ctx->msg_size); if (count < total_length) { rc = 0; printk(KERN_WARNING "%s: Only given user buffer of " @@ -324,9 +340,10 @@ check_list: rc = -EFAULT; if (put_user(msg_ctx->type, buf)) goto out_unlock_msg_ctx; - if (put_user(cpu_to_be32(msg_ctx->counter), (__be32 __user *)(buf + 1))) + if (put_user(cpu_to_be32(msg_ctx->counter), + (__be32 __user *)(&buf[PKT_CTR_OFFSET]))) goto out_unlock_msg_ctx; - i = 5; + i = PKT_TYPE_SIZE + PKT_CTR_SIZE; if (msg_ctx->msg) { if (copy_to_user(&buf[i], packet_length, packet_length_size)) goto out_unlock_msg_ctx; @@ -391,12 +408,6 @@ out: * @count: Amount of data in @buf * @ppos: Pointer to offset in file (ignored) * - * miscdevfs packet format: - * Octet 0: Type - * Octets 1-4: network byte order msg_ctx->counter (0's for non-response) - * Octets 5-N0: Size of struct ecryptfs_message to follow - * Octets N0-N1: struct ecryptfs_message (including data) - * * Returns the number of bytes read from @buf */ static ssize_t @@ -405,60 +416,78 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, { __be32 counter_nbo; u32 seq; - size_t packet_size, packet_size_length, i; - ssize_t sz = 0; + size_t packet_size, packet_size_length; char *data; uid_t euid = current_euid(); - int rc; + unsigned char packet_size_peek[ECRYPTFS_MAX_PKT_LEN_SIZE]; + ssize_t rc; - if (count == 0) - goto out; + if (count == 0) { + return 0; + } else if (count == MIN_NON_MSG_PKT_SIZE) { + /* Likely a harmless MSG_HELO or MSG_QUIT - no packet length */ + goto memdup; + } else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) { + printk(KERN_WARNING "%s: Acceptable packet size range is " + "[%d-%lu], but amount of data written is [%zu].", + __func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count); + return -EINVAL; + } + + if (copy_from_user(packet_size_peek, &buf[PKT_LEN_OFFSET], + sizeof(packet_size_peek))) { + printk(KERN_WARNING "%s: Error while inspecting packet size\n", + __func__); + return -EFAULT; + } + rc = ecryptfs_parse_packet_length(packet_size_peek, &packet_size, + &packet_size_length); + if (rc) { + printk(KERN_WARNING "%s: Error parsing packet length; " + "rc = [%zd]\n", __func__, rc); + return rc; + } + + if ((PKT_TYPE_SIZE + PKT_CTR_SIZE + packet_size_length + packet_size) + != count) { + printk(KERN_WARNING "%s: Invalid packet size [%zu]\n", __func__, + packet_size); + return -EINVAL; + } + +memdup: data = memdup_user(buf, count); if (IS_ERR(data)) { printk(KERN_ERR "%s: memdup_user returned error [%ld]\n", __func__, PTR_ERR(data)); - goto out; + return PTR_ERR(data); } - sz = count; - i = 0; - switch (data[i++]) { + switch (data[PKT_TYPE_OFFSET]) { case ECRYPTFS_MSG_RESPONSE: - if (count < (1 + 4 + 1 + sizeof(struct ecryptfs_message))) { + if (count < (MIN_MSG_PKT_SIZE + + sizeof(struct ecryptfs_message))) { printk(KERN_WARNING "%s: Minimum acceptable packet " "size is [%zd], but amount of data written is " "only [%zd]. Discarding response packet.\n", __func__, - (1 + 4 + 1 + sizeof(struct ecryptfs_message)), - count); + (MIN_MSG_PKT_SIZE + + sizeof(struct ecryptfs_message)), count); + rc = -EINVAL; goto out_free; } - memcpy(&counter_nbo, &data[i], 4); + memcpy(&counter_nbo, &data[PKT_CTR_OFFSET], PKT_CTR_SIZE); seq = be32_to_cpu(counter_nbo); - i += 4; - rc = ecryptfs_parse_packet_length(&data[i], &packet_size, - &packet_size_length); + rc = ecryptfs_miscdev_response( + &data[PKT_LEN_OFFSET + packet_size_length], + packet_size, euid, current_user_ns(), + task_pid(current), seq); if (rc) { - printk(KERN_WARNING "%s: Error parsing packet length; " - "rc = [%d]\n", __func__, rc); - goto out_free; - } - i += packet_size_length; - if ((1 + 4 + packet_size_length + packet_size) != count) { - printk(KERN_WARNING "%s: (1 + packet_size_length([%zd])" - " + packet_size([%zd]))([%zd]) != " - "count([%zd]). Invalid packet format.\n", - __func__, packet_size_length, packet_size, - (1 + packet_size_length + packet_size), count); - goto out_free; - } - rc = ecryptfs_miscdev_response(&data[i], packet_size, - euid, current_user_ns(), - task_pid(current), seq); - if (rc) printk(KERN_WARNING "%s: Failed to deliver miscdev " - "response to requesting operation; rc = [%d]\n", + "response to requesting operation; rc = [%zd]\n", __func__, rc); + goto out_free; + } break; case ECRYPTFS_MSG_HELO: case ECRYPTFS_MSG_QUIT: @@ -467,12 +496,13 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, ecryptfs_printk(KERN_WARNING, "Dropping miscdev " "message of unrecognized type [%d]\n", data[0]); - break; + rc = -EINVAL; + goto out_free; } + rc = count; out_free: kfree(data); -out: - return sz; + return rc; } diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 6a44148c5fb..a46b3a8fee1 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -57,6 +57,10 @@ struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index) * @page: Page that is locked before this call is made * * Returns zero on success; non-zero otherwise + * + * This is where we encrypt the data and pass the encrypted data to + * the lower filesystem. In OpenPGP-compatible mode, we operate on + * entire underlying packets. */ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc) { @@ -146,7 +150,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, /* This is a header extent */ char *page_virt; - page_virt = kmap_atomic(page, KM_USER0); + page_virt = kmap_atomic(page); memset(page_virt, 0, PAGE_CACHE_SIZE); /* TODO: Support more than one header extent */ if (view_extent_num == 0) { @@ -159,7 +163,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page, crypt_stat, &written); } - kunmap_atomic(page_virt, KM_USER0); + kunmap_atomic(page_virt); flush_dcache_page(page); if (rc) { printk(KERN_ERR "%s: Error reading xattr " @@ -481,10 +485,6 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode) * @copied: The amount of data copied * @page: The eCryptfs page * @fsdata: The fsdata (unused) - * - * This is where we encrypt the data and pass the encrypted data to - * the lower filesystem. In OpenPGP-compatible mode, we operate on - * entire underlying packets. */ static int ecryptfs_write_end(struct file *file, struct address_space *mapping, diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 3745f7c2b9c..b2a34a192f4 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -130,13 +130,18 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT); size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK); size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page); - size_t total_remaining_bytes = ((offset + size) - pos); + loff_t total_remaining_bytes = ((offset + size) - pos); + + if (fatal_signal_pending(current)) { + rc = -EINTR; + break; + } if (num_bytes > total_remaining_bytes) num_bytes = total_remaining_bytes; if (pos < offset) { /* remaining zeros to write, up to destination offset */ - size_t total_remaining_zeros = (offset - pos); + loff_t total_remaining_zeros = (offset - pos); if (num_bytes > total_remaining_zeros) num_bytes = total_remaining_zeros; @@ -151,7 +156,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, ecryptfs_page_idx, rc); goto out; } - ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); + ecryptfs_page_virt = kmap_atomic(ecryptfs_page); /* * pos: where we're now writing, offset: where the request was @@ -174,7 +179,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, (data + data_offset), num_bytes); data_offset += num_bytes; } - kunmap_atomic(ecryptfs_page_virt, KM_USER0); + kunmap_atomic(ecryptfs_page_virt); flush_dcache_page(ecryptfs_page); SetPageUptodate(ecryptfs_page); unlock_page(ecryptfs_page); @@ -193,15 +198,19 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset, } pos += num_bytes; } - if ((offset + size) > ecryptfs_file_size) { - i_size_write(ecryptfs_inode, (offset + size)); + if (pos > ecryptfs_file_size) { + i_size_write(ecryptfs_inode, pos); if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) { - rc = ecryptfs_write_inode_size_to_metadata( + int rc2; + + rc2 = ecryptfs_write_inode_size_to_metadata( ecryptfs_inode); - if (rc) { + if (rc2) { printk(KERN_ERR "Problem with " "ecryptfs_write_inode_size_to_metadata; " - "rc = [%d]\n", rc); + "rc = [%d]\n", rc2); + if (!rc) + rc = rc2; goto out; } } @@ -273,76 +282,3 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs, flush_dcache_page(page_for_ecryptfs); return rc; } - -#if 0 -/** - * ecryptfs_read - * @data: The virtual address into which to write the data read (and - * possibly decrypted) from the lower file - * @offset: The offset in the decrypted view of the file from which to - * read into @data - * @size: The number of bytes to read into @data - * @ecryptfs_file: The eCryptfs file from which to read - * - * Read an arbitrary amount of data from an arbitrary location in the - * eCryptfs page cache. This is done on an extent-by-extent basis; - * individual extents are decrypted and read from the lower page - * cache (via VFS reads). This function takes care of all the - * address translation to locations in the lower filesystem. - * - * Returns zero on success; non-zero otherwise - */ -int ecryptfs_read(char *data, loff_t offset, size_t size, - struct file *ecryptfs_file) -{ - struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode; - struct page *ecryptfs_page; - char *ecryptfs_page_virt; - loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode); - loff_t data_offset = 0; - loff_t pos; - int rc = 0; - - if ((offset + size) > ecryptfs_file_size) { - rc = -EINVAL; - printk(KERN_ERR "%s: Attempt to read data past the end of the " - "file; offset = [%lld]; size = [%td]; " - "ecryptfs_file_size = [%lld]\n", - __func__, offset, size, ecryptfs_file_size); - goto out; - } - pos = offset; - while (pos < (offset + size)) { - pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT); - size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK); - size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page); - size_t total_remaining_bytes = ((offset + size) - pos); - - if (num_bytes > total_remaining_bytes) - num_bytes = total_remaining_bytes; - ecryptfs_page = ecryptfs_get_locked_page(ecryptfs_inode, - ecryptfs_page_idx); - if (IS_ERR(ecryptfs_page)) { - rc = PTR_ERR(ecryptfs_page); - printk(KERN_ERR "%s: Error getting page at " - "index [%ld] from eCryptfs inode " - "mapping; rc = [%d]\n", __func__, - ecryptfs_page_idx, rc); - goto out; - } - ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0); - memcpy((data + data_offset), - ((char *)ecryptfs_page_virt + start_offset_in_page), - num_bytes); - kunmap_atomic(ecryptfs_page_virt, KM_USER0); - flush_dcache_page(ecryptfs_page); - SetPageUptodate(ecryptfs_page); - unlock_page(ecryptfs_page); - page_cache_release(ecryptfs_page); - pos += num_bytes; - data_offset += num_bytes; - } -out: - return rc; -} -#endif /* 0 */ diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 9df7fd6e0c3..cf152823bbf 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -30,6 +30,8 @@ #include <linux/seq_file.h> #include <linux/file.h> #include <linux/crypto.h> +#include <linux/statfs.h> +#include <linux/magic.h> #include "ecryptfs_kernel.h" struct kmem_cache *ecryptfs_inode_info_cache; @@ -102,10 +104,20 @@ static void ecryptfs_destroy_inode(struct inode *inode) static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); + int rc; if (!lower_dentry->d_sb->s_op->statfs) return -ENOSYS; - return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf); + + rc = lower_dentry->d_sb->s_op->statfs(lower_dentry, buf); + if (rc) + return rc; + + buf->f_type = ECRYPTFS_SUPER_MAGIC; + rc = ecryptfs_set_f_namelen(&buf->f_namelen, buf->f_namelen, + &ecryptfs_superblock_to_private(dentry->d_sb)->mount_crypt_stat); + + return rc; } /** diff --git a/fs/eventpoll.c b/fs/eventpoll.c index aabdfc38cf2..ea54cdef04d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -320,6 +320,11 @@ static inline int ep_is_linked(struct list_head *p) return !list_empty(p); } +static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p) +{ + return container_of(p, struct eppoll_entry, wait); +} + /* Get the "struct epitem" from a wait queue pointer */ static inline struct epitem *ep_item_from_wait(wait_queue_t *p) { @@ -467,6 +472,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq) put_cpu(); } +static void ep_remove_wait_queue(struct eppoll_entry *pwq) +{ + wait_queue_head_t *whead; + + rcu_read_lock(); + /* If it is cleared by POLLFREE, it should be rcu-safe */ + whead = rcu_dereference(pwq->whead); + if (whead) + remove_wait_queue(whead, &pwq->wait); + rcu_read_unlock(); +} + /* * This function unregisters poll callbacks from the associated file * descriptor. Must be called with "mtx" held (or "epmutex" if called from @@ -481,7 +498,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi) pwq = list_first_entry(lsthead, struct eppoll_entry, llink); list_del(&pwq->llink); - remove_wait_queue(pwq->whead, &pwq->wait); + ep_remove_wait_queue(pwq); kmem_cache_free(pwq_cache, pwq); } } @@ -842,6 +859,17 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; + if ((unsigned long)key & POLLFREE) { + ep_pwq_from_wait(wait)->whead = NULL; + /* + * whead = NULL above can race with ep_remove_wait_queue() + * which can do another remove_wait_queue() after us, so we + * can't use __remove_wait_queue(). whead->lock is held by + * the caller. + */ + list_del_init(&wait->task_list); + } + spin_lock_irqsave(&ep->lock, flags); /* diff --git a/fs/exec.c b/fs/exec.c index aeb135c7ff5..92ce83a11e9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1071,6 +1071,21 @@ void set_task_comm(struct task_struct *tsk, char *buf) perf_event_comm(tsk); } +static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len) +{ + int i, ch; + + /* Copies the binary name from after last slash */ + for (i = 0; (ch = *(fn++)) != '\0';) { + if (ch == '/') + i = 0; /* overwrite what we wrote */ + else + if (i < len - 1) + tcomm[i++] = ch; + } + tcomm[i] = '\0'; +} + int flush_old_exec(struct linux_binprm * bprm) { int retval; @@ -1085,6 +1100,7 @@ int flush_old_exec(struct linux_binprm * bprm) set_mm_exe_file(bprm->mm, bprm->file); + filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm)); /* * Release all of the old mmap stuff */ @@ -1116,10 +1132,6 @@ EXPORT_SYMBOL(would_dump); void setup_new_exec(struct linux_binprm * bprm) { - int i, ch; - const char *name; - char tcomm[sizeof(current->comm)]; - arch_pick_mmap_layout(current->mm); /* This is the point of no return */ @@ -1130,18 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); - name = bprm->filename; - - /* Copies the binary name from after last slash */ - for (i=0; (ch = *(name++)) != '\0';) { - if (ch == '/') - i = 0; /* overwrite what we wrote */ - else - if (i < (sizeof(tcomm) - 1)) - tcomm[i++] = ch; - } - tcomm[i] = '\0'; - set_task_comm(current, tcomm); + set_task_comm(current, bprm->tcomm); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 1089f760c84..2de655f5d62 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -77,10 +77,11 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) flags = flags & EXT2_FL_USER_MODIFIABLE; flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE; ei->i_flags = flags; - mutex_unlock(&inode->i_mutex); ext2_set_inode_flags(inode); inode->i_ctime = CURRENT_TIME_SEC; + mutex_unlock(&inode->i_mutex); + mark_inode_dirty(inode); setflags_out: mnt_drop_write_file(filp); @@ -88,20 +89,29 @@ setflags_out: } case EXT2_IOC_GETVERSION: return put_user(inode->i_generation, (int __user *) arg); - case EXT2_IOC_SETVERSION: + case EXT2_IOC_SETVERSION: { + __u32 generation; + if (!inode_owner_or_capable(inode)) return -EPERM; ret = mnt_want_write_file(filp); if (ret) return ret; - if (get_user(inode->i_generation, (int __user *) arg)) { + if (get_user(generation, (int __user *) arg)) { ret = -EFAULT; - } else { - inode->i_ctime = CURRENT_TIME_SEC; - mark_inode_dirty(inode); + goto setversion_out; } + + mutex_lock(&inode->i_mutex); + inode->i_ctime = CURRENT_TIME_SEC; + inode->i_generation = generation; + mutex_unlock(&inode->i_mutex); + + mark_inode_dirty(inode); +setversion_out: mnt_drop_write_file(filp); return ret; + } case EXT2_IOC_GETRSVSZ: if (test_opt(inode->i_sb, RESERVATION) && S_ISREG(inode->i_mode) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f855916657b..5b4a9362d5a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -53,14 +53,6 @@ struct wb_writeback_work { }; /* - * Include the creation of the trace points after defining the - * wb_writeback_work structure so that the definition remains local to this - * file. - */ -#define CREATE_TRACE_POINTS -#include <trace/events/writeback.h> - -/* * We don't actually have pdflush, but this one is exported though /proc... */ int nr_pdflush_threads; @@ -92,6 +84,14 @@ static inline struct inode *wb_inode(struct list_head *head) return list_entry(head, struct inode, i_wb_list); } +/* + * Include the creation of the trace points after defining the + * wb_writeback_work structure and inline functions so that the definition + * remains local to this file. + */ +#define CREATE_TRACE_POINTS +#include <trace/events/writeback.h> + /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ static void bdi_wakeup_flusher(struct backing_dev_info *bdi) { diff --git a/fs/inode.c b/fs/inode.c index fb10d86ffad..d3ebdbe723d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1651,7 +1651,7 @@ __setup("ihash_entries=", set_ihash_entries); */ void __init inode_init_early(void) { - int loop; + unsigned int loop; /* If hashes are distributed across NUMA nodes, defer * hash allocation until vmalloc space is available. @@ -1669,13 +1669,13 @@ void __init inode_init_early(void) &i_hash_mask, 0); - for (loop = 0; loop < (1 << i_hash_shift); loop++) + for (loop = 0; loop < (1U << i_hash_shift); loop++) INIT_HLIST_HEAD(&inode_hashtable[loop]); } void __init inode_init(void) { - int loop; + unsigned int loop; /* inode slab cache */ inode_cachep = kmem_cache_create("inode_cache", @@ -1699,7 +1699,7 @@ void __init inode_init(void) &i_hash_mask, 0); - for (loop = 0; loop < (1 << i_hash_shift); loop++) + for (loop = 0; loop < (1U << i_hash_shift); loop++) INIT_HLIST_HEAD(&inode_hashtable[loop]); } diff --git a/fs/ioprio.c b/fs/ioprio.c index f84b380d65e..0f1b9515213 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE); if (ioc) { ioc_ioprio_changed(ioc, ioprio); - put_io_context(ioc, NULL); + put_io_context(ioc); } return err; diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 5d1a00a5041..05f0754f2b4 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -453,8 +453,6 @@ out: * * Return <0 on error, 0 on success, 1 if there was nothing to clean up. * - * Called with the journal lock held. - * * This is the only part of the journaling code which really needs to be * aware of transaction aborts. Checkpointing involves writing to the * main filesystem area rather than to the journal, so it can proceed @@ -472,13 +470,14 @@ int cleanup_journal_tail(journal_t *journal) if (is_journal_aborted(journal)) return 1; - /* OK, work out the oldest transaction remaining in the log, and + /* + * OK, work out the oldest transaction remaining in the log, and * the log block it starts at. * * If the log is now empty, we need to work out which is the * next transaction ID we will write, and where it will - * start. */ - + * start. + */ spin_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); transaction = journal->j_checkpoint_transactions; @@ -504,7 +503,25 @@ int cleanup_journal_tail(journal_t *journal) spin_unlock(&journal->j_state_lock); return 1; } + spin_unlock(&journal->j_state_lock); + + /* + * We need to make sure that any blocks that were recently written out + * --- perhaps by log_do_checkpoint() --- are flushed out before we + * drop the transactions from the journal. It's unlikely this will be + * necessary, especially with an appropriately sized journal, but we + * need this to guarantee correctness. Fortunately + * cleanup_journal_tail() doesn't get called all that often. + */ + if (journal->j_flags & JFS_BARRIER) + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + spin_lock(&journal->j_state_lock); + if (!tid_gt(first_tid, journal->j_tail_sequence)) { + spin_unlock(&journal->j_state_lock); + /* Someone else cleaned up journal so return 0 */ + return 0; + } /* OK, update the superblock to recover the freed space. * Physical blocks come first: have we wrapped beyond the end of * the log? */ diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 5b43e96788e..008bf062fd2 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -20,6 +20,7 @@ #include <linux/fs.h> #include <linux/jbd.h> #include <linux/errno.h> +#include <linux/blkdev.h> #endif /* @@ -263,6 +264,9 @@ int journal_recover(journal_t *journal) err2 = sync_blockdev(journal->j_fs_dev); if (!err) err = err2; + /* Flush disk caches to get replayed data on the permanent storage */ + if (journal->j_flags & JFS_BARRIER) + blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); return err; } diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index a01cdad6aad..eafb8d37a6f 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c @@ -335,7 +335,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl void *ebuf; uint32_t ofs; size_t retlen; - int ret = -EIO; + int ret; unsigned long *wordebuf; ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen, diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index e97404d611e..9c501449450 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -152,9 +152,6 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd_can_have_bb(mtd)) - return NULL; - *ofs = 0; while (mtd_block_isbad(mtd, *ofs)) { *ofs += mtd->erasesize; @@ -172,9 +169,6 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs) filler_t *filler = logfs_mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd_can_have_bb(mtd)) - return NULL; - *ofs = mtd->size - mtd->erasesize; while (mtd_block_isbad(mtd, *ofs)) { *ofs -= mtd->erasesize; diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 501043e8966..3de7a32cadb 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -71,7 +71,7 @@ static int write_dir(struct inode *dir, struct logfs_disk_dentry *dd, static int write_inode(struct inode *inode) { - return __logfs_write_inode(inode, WF_LOCK); + return __logfs_write_inode(inode, NULL, WF_LOCK); } static s64 dir_seek_data(struct inode *inode, s64 pos) diff --git a/fs/logfs/file.c b/fs/logfs/file.c index b548c87a86f..3886cded283 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c @@ -230,7 +230,9 @@ int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) return ret; mutex_lock(&inode->i_mutex); + logfs_get_wblocks(sb, NULL, WF_LOCK); logfs_write_anchor(sb); + logfs_put_wblocks(sb, NULL, WF_LOCK); mutex_unlock(&inode->i_mutex); return 0; diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c index caa4419285d..d4efb061bdc 100644 --- a/fs/logfs/gc.c +++ b/fs/logfs/gc.c @@ -367,7 +367,7 @@ static struct gc_candidate *get_candidate(struct super_block *sb) int i, max_dist; struct gc_candidate *cand = NULL, *this; - max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS); + max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS - 1); for (i = max_dist; i >= 0; i--) { this = first_in_list(&super->s_low_list[i]); diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 388df1aa35e..a422f42238b 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -286,7 +286,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (logfs_inode(inode)->li_flags & LOGFS_IF_STILLBORN) return 0; - ret = __logfs_write_inode(inode, flags); + ret = __logfs_write_inode(inode, NULL, flags); LOGFS_BUG_ON(ret, inode->i_sb); return ret; } @@ -363,7 +363,9 @@ static void logfs_init_once(void *_li) static int logfs_sync_fs(struct super_block *sb, int wait) { + logfs_get_wblocks(sb, NULL, WF_LOCK); logfs_write_anchor(sb); + logfs_put_wblocks(sb, NULL, WF_LOCK); return 0; } diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 9da29706f91..1e1c369df22 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c @@ -612,7 +612,6 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type, if (len == 0) return logfs_write_header(super, header, 0, type); - BUG_ON(len > sb->s_blocksize); compr_len = logfs_compress(buf, data, len, sb->s_blocksize); if (compr_len < 0 || type == JE_ANCHOR) { memcpy(data, buf, len); diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 926373866a5..5f093760946 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -528,7 +528,7 @@ void logfs_destroy_inode_cache(void); void logfs_set_blocks(struct inode *inode, u64 no); /* these logically belong into inode.c but actually reside in readwrite.c */ int logfs_read_inode(struct inode *inode); -int __logfs_write_inode(struct inode *inode, long flags); +int __logfs_write_inode(struct inode *inode, struct page *, long flags); void logfs_evict_inode(struct inode *inode); /* journal.c */ @@ -577,6 +577,8 @@ void initialize_block_counters(struct page *page, struct logfs_block *block, __be64 *array, int page_is_empty); int logfs_exist_block(struct inode *inode, u64 bix); int get_page_reserve(struct inode *inode, struct page *page); +void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock); +void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock); extern struct logfs_block_ops indirect_block_ops; /* segment.c */ @@ -594,6 +596,7 @@ int logfs_init_mapping(struct super_block *sb); void logfs_sync_area(struct logfs_area *area); void logfs_sync_segments(struct super_block *sb); void freeseg(struct super_block *sb, u32 segno); +void free_areas(struct super_block *sb); /* area handling */ int logfs_init_areas(struct super_block *sb); diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 2ac4217b790..4153e65b014 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -244,8 +244,7 @@ static void preunlock_page(struct super_block *sb, struct page *page, int lock) * is waiting for s_write_mutex. We annotate this fact by setting PG_pre_locked * in addition to PG_locked. */ -static void logfs_get_wblocks(struct super_block *sb, struct page *page, - int lock) +void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock) { struct logfs_super *super = logfs_super(sb); @@ -260,8 +259,7 @@ static void logfs_get_wblocks(struct super_block *sb, struct page *page, } } -static void logfs_put_wblocks(struct super_block *sb, struct page *page, - int lock) +void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock) { struct logfs_super *super = logfs_super(sb); @@ -424,7 +422,7 @@ static void inode_write_block(struct logfs_block *block) if (inode->i_ino == LOGFS_INO_MASTER) logfs_write_anchor(inode->i_sb); else { - ret = __logfs_write_inode(inode, 0); + ret = __logfs_write_inode(inode, NULL, 0); /* see indirect_write_block comment */ BUG_ON(ret); } @@ -560,8 +558,13 @@ static void inode_free_block(struct super_block *sb, struct logfs_block *block) static void indirect_free_block(struct super_block *sb, struct logfs_block *block) { - ClearPagePrivate(block->page); - block->page->private = 0; + struct page *page = block->page; + + if (PagePrivate(page)) { + ClearPagePrivate(page); + page_cache_release(page); + set_page_private(page, 0); + } __free_block(sb, block); } @@ -650,8 +653,11 @@ static void alloc_data_block(struct inode *inode, struct page *page) logfs_unpack_index(page->index, &bix, &level); block = __alloc_block(inode->i_sb, inode->i_ino, bix, level); block->page = page; + SetPagePrivate(page); - page->private = (unsigned long)block; + page_cache_get(page); + set_page_private(page, (unsigned long) block); + block->ops = &indirect_block_ops; } @@ -1570,11 +1576,15 @@ int logfs_write_buf(struct inode *inode, struct page *page, long flags) static int __logfs_delete(struct inode *inode, struct page *page) { long flags = WF_DELETE; + int err; inode->i_ctime = inode->i_mtime = CURRENT_TIME; if (page->index < I0_BLOCKS) return logfs_write_direct(inode, page, flags); + err = grow_inode(inode, page->index, 0); + if (err) + return err; return logfs_write_rec(inode, page, page->index, 0, flags); } @@ -1623,7 +1633,7 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, if (inode->i_ino == LOGFS_INO_MASTER) logfs_write_anchor(inode->i_sb); else { - err = __logfs_write_inode(inode, flags); + err = __logfs_write_inode(inode, page, flags); } } } @@ -1873,7 +1883,7 @@ int logfs_truncate(struct inode *inode, u64 target) logfs_get_wblocks(sb, NULL, 1); err = __logfs_truncate(inode, size); if (!err) - err = __logfs_write_inode(inode, 0); + err = __logfs_write_inode(inode, NULL, 0); logfs_put_wblocks(sb, NULL, 1); } @@ -1901,8 +1911,11 @@ static void move_page_to_inode(struct inode *inode, struct page *page) li->li_block = block; block->page = NULL; - page->private = 0; - ClearPagePrivate(page); + if (PagePrivate(page)) { + ClearPagePrivate(page); + page_cache_release(page); + set_page_private(page, 0); + } } static void move_inode_to_page(struct page *page, struct inode *inode) @@ -1918,8 +1931,12 @@ static void move_inode_to_page(struct page *page, struct inode *inode) BUG_ON(PagePrivate(page)); block->ops = &indirect_block_ops; block->page = page; - page->private = (unsigned long)block; - SetPagePrivate(page); + + if (!PagePrivate(page)) { + SetPagePrivate(page); + page_cache_get(page); + set_page_private(page, (unsigned long) block); + } block->inode = NULL; li->li_block = NULL; @@ -2106,14 +2123,14 @@ void logfs_set_segment_unreserved(struct super_block *sb, u32 segno, u32 ec) ec_level); } -int __logfs_write_inode(struct inode *inode, long flags) +int __logfs_write_inode(struct inode *inode, struct page *page, long flags) { struct super_block *sb = inode->i_sb; int ret; - logfs_get_wblocks(sb, NULL, flags & WF_LOCK); + logfs_get_wblocks(sb, page, flags & WF_LOCK); ret = do_write_inode(inode); - logfs_put_wblocks(sb, NULL, flags & WF_LOCK); + logfs_put_wblocks(sb, page, flags & WF_LOCK); return ret; } diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 9d518735325..ab798ed1cc8 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -86,7 +86,11 @@ int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, BUG_ON(!page); /* FIXME: reserve a pool */ SetPageUptodate(page); memcpy(page_address(page) + offset, buf, copylen); - SetPagePrivate(page); + + if (!PagePrivate(page)) { + SetPagePrivate(page); + page_cache_get(page); + } page_cache_release(page); buf += copylen; @@ -110,7 +114,10 @@ static void pad_partial_page(struct logfs_area *area) page = get_mapping_page(sb, index, 0); BUG_ON(!page); /* FIXME: reserve a pool */ memset(page_address(page) + offset, 0xff, len); - SetPagePrivate(page); + if (!PagePrivate(page)) { + SetPagePrivate(page); + page_cache_get(page); + } page_cache_release(page); } } @@ -130,7 +137,10 @@ static void pad_full_pages(struct logfs_area *area) BUG_ON(!page); /* FIXME: reserve a pool */ SetPageUptodate(page); memset(page_address(page), 0xff, PAGE_CACHE_SIZE); - SetPagePrivate(page); + if (!PagePrivate(page)) { + SetPagePrivate(page); + page_cache_get(page); + } page_cache_release(page); index++; no_indizes--; @@ -485,8 +495,12 @@ static void move_btree_to_page(struct inode *inode, struct page *page, mempool_free(item, super->s_alias_pool); } block->page = page; - SetPagePrivate(page); - page->private = (unsigned long)block; + + if (!PagePrivate(page)) { + SetPagePrivate(page); + page_cache_get(page); + set_page_private(page, (unsigned long) block); + } block->ops = &indirect_block_ops; initialize_block_counters(page, block, data, 0); } @@ -536,8 +550,12 @@ void move_page_to_btree(struct page *page) list_add(&item->list, &block->item_list); } block->page = NULL; - ClearPagePrivate(page); - page->private = 0; + + if (PagePrivate(page)) { + ClearPagePrivate(page); + page_cache_release(page); + set_page_private(page, 0); + } block->ops = &btree_block_ops; err = alias_tree_insert(block->sb, block->ino, block->bix, block->level, block); @@ -702,7 +720,10 @@ void freeseg(struct super_block *sb, u32 segno) page = find_get_page(mapping, ofs >> PAGE_SHIFT); if (!page) continue; - ClearPagePrivate(page); + if (PagePrivate(page)) { + ClearPagePrivate(page); + page_cache_release(page); + } page_cache_release(page); } } @@ -841,6 +862,16 @@ static void free_area(struct logfs_area *area) kfree(area); } +void free_areas(struct super_block *sb) +{ + struct logfs_super *super = logfs_super(sb); + int i; + + for_each_area(i) + free_area(super->s_area[i]); + free_area(super->s_journal_area); +} + static struct logfs_area *alloc_area(struct super_block *sb) { struct logfs_area *area; @@ -923,10 +954,6 @@ err: void logfs_cleanup_areas(struct super_block *sb) { struct logfs_super *super = logfs_super(sb); - int i; btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias); - for_each_area(i) - free_area(super->s_area[i]); - free_area(super->s_journal_area); } diff --git a/fs/logfs/super.c b/fs/logfs/super.c index e795c234ea3..c9ee7f5d1ca 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -486,14 +486,15 @@ static void logfs_kill_sb(struct super_block *sb) /* Alias entries slow down mount, so evict as many as possible */ sync_filesystem(sb); logfs_write_anchor(sb); + free_areas(sb); /* * From this point on alias entries are simply dropped - and any * writes to the object store are considered bugs. */ - super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN; log_super("LogFS: Now in shutdown\n"); generic_shutdown_super(sb); + super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN; BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes); diff --git a/fs/namei.c b/fs/namei.c index 208c6aa4a98..a780ea515c4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1095,8 +1095,10 @@ static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentr struct dentry *old; /* Don't create child dentry for a dead directory. */ - if (unlikely(IS_DEADDIR(inode))) + if (unlikely(IS_DEADDIR(inode))) { + dput(dentry); return ERR_PTR(-ENOENT); + } old = inode->i_op->lookup(inode, dentry, nd); if (unlikely(old)) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f0c849c98fe..ec9f6ef6c5d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3575,8 +3575,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu } if (npages > 1) { /* for decoding across pages */ - args.acl_scratch = alloc_page(GFP_KERNEL); - if (!args.acl_scratch) + res.acl_scratch = alloc_page(GFP_KERNEL); + if (!res.acl_scratch) goto out_free; } args.acl_len = npages * PAGE_SIZE; @@ -3612,8 +3612,8 @@ out_free: for (i = 0; i < npages; i++) if (pages[i]) __free_page(pages[i]); - if (args.acl_scratch) - __free_page(args.acl_scratch); + if (res.acl_scratch) + __free_page(res.acl_scratch); return ret; } @@ -4883,8 +4883,10 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) clp->cl_rpcclient->cl_auth->au_flavor); res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); - if (unlikely(!res.server_scope)) - return -ENOMEM; + if (unlikely(!res.server_scope)) { + status = -ENOMEM; + goto out; + } status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (!status) @@ -4901,12 +4903,13 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) clp->server_scope = NULL; } - if (!clp->server_scope) + if (!clp->server_scope) { clp->server_scope = res.server_scope; - else - kfree(res.server_scope); + goto out; + } } - + kfree(res.server_scope); +out: dprintk("<-- %s status= %d\n", __func__, status); return status; } @@ -5008,37 +5011,53 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) return status; } +static struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags) +{ + return kcalloc(max_slots, sizeof(struct nfs4_slot), gfp_flags); +} + +static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl, + struct nfs4_slot *new, + u32 max_slots, + u32 ivalue) +{ + struct nfs4_slot *old = NULL; + u32 i; + + spin_lock(&tbl->slot_tbl_lock); + if (new) { + old = tbl->slots; + tbl->slots = new; + tbl->max_slots = max_slots; + } + tbl->highest_used_slotid = -1; /* no slot is currently used */ + for (i = 0; i < tbl->max_slots; i++) + tbl->slots[i].seq_nr = ivalue; + spin_unlock(&tbl->slot_tbl_lock); + kfree(old); +} + /* - * Reset a slot table + * (re)Initialise a slot table */ -static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, - int ivalue) +static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, + u32 ivalue) { struct nfs4_slot *new = NULL; - int i; - int ret = 0; + int ret = -ENOMEM; dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__, max_reqs, tbl->max_slots); /* Does the newly negotiated max_reqs match the existing slot table? */ if (max_reqs != tbl->max_slots) { - ret = -ENOMEM; - new = kmalloc(max_reqs * sizeof(struct nfs4_slot), - GFP_NOFS); + new = nfs4_alloc_slots(max_reqs, GFP_NOFS); if (!new) goto out; - ret = 0; - kfree(tbl->slots); } - spin_lock(&tbl->slot_tbl_lock); - if (new) { - tbl->slots = new; - tbl->max_slots = max_reqs; - } - for (i = 0; i < tbl->max_slots; ++i) - tbl->slots[i].seq_nr = ivalue; - spin_unlock(&tbl->slot_tbl_lock); + ret = 0; + + nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue); dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, tbl, tbl->slots, tbl->max_slots); out: @@ -5061,36 +5080,6 @@ static void nfs4_destroy_slot_tables(struct nfs4_session *session) } /* - * Initialize slot table - */ -static int nfs4_init_slot_table(struct nfs4_slot_table *tbl, - int max_slots, int ivalue) -{ - struct nfs4_slot *slot; - int ret = -ENOMEM; - - BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE); - - dprintk("--> %s: max_reqs=%u\n", __func__, max_slots); - - slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS); - if (!slot) - goto out; - ret = 0; - - spin_lock(&tbl->slot_tbl_lock); - tbl->max_slots = max_slots; - tbl->slots = slot; - tbl->highest_used_slotid = -1; /* no slot is currently used */ - spin_unlock(&tbl->slot_tbl_lock); - dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, - tbl, tbl->slots, tbl->max_slots); -out: - dprintk("<-- %s: return %d\n", __func__, ret); - return ret; -} - -/* * Initialize or reset the forechannel and backchannel tables */ static int nfs4_setup_session_slot_tables(struct nfs4_session *ses) @@ -5101,25 +5090,16 @@ static int nfs4_setup_session_slot_tables(struct nfs4_session *ses) dprintk("--> %s\n", __func__); /* Fore channel */ tbl = &ses->fc_slot_table; - if (tbl->slots == NULL) { - status = nfs4_init_slot_table(tbl, ses->fc_attrs.max_reqs, 1); - if (status) /* -ENOMEM */ - return status; - } else { - status = nfs4_reset_slot_table(tbl, ses->fc_attrs.max_reqs, 1); - if (status) - return status; - } + status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1); + if (status) /* -ENOMEM */ + return status; /* Back channel */ tbl = &ses->bc_slot_table; - if (tbl->slots == NULL) { - status = nfs4_init_slot_table(tbl, ses->bc_attrs.max_reqs, 0); - if (status) - /* Fore and back channel share a connection so get - * both slot tables or neither */ - nfs4_destroy_slot_tables(ses); - } else - status = nfs4_reset_slot_table(tbl, ses->bc_attrs.max_reqs, 0); + status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0); + if (status && tbl->slots == NULL) + /* Fore and back channel share a connection so get + * both slot tables or neither */ + nfs4_destroy_slot_tables(ses); return status; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a53f33b4ac3..45392032e7b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1132,6 +1132,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4 { struct nfs_client *clp = server->nfs_client; + if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags)) + nfs_async_inode_return_delegation(state->inode, &state->stateid); nfs4_state_mark_reclaim_nograce(clp, state); nfs4_schedule_state_manager(clp); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 95e92e43840..33bd8d0f745 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2522,7 +2522,6 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, xdr_inline_pages(&req->rq_rcv_buf, replen << 2, args->acl_pages, args->acl_pgbase, args->acl_len); - xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE); encode_nops(&hdr); } @@ -6032,6 +6031,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr, struct compound_hdr hdr; int status; + if (res->acl_scratch != NULL) { + void *p = page_address(res->acl_scratch); + xdr_set_scratch_buffer(xdr, p, PAGE_SIZE); + } status = decode_compound_hdr(xdr, &hdr); if (status) goto out; diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 886649627c3..2a70fce70c6 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -603,6 +603,8 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, nsegs = argv[4].v_nmembs; if (argv[4].v_size != argsz[4]) goto out; + if (nsegs > UINT_MAX / sizeof(__u64)) + goto out; /* * argv[4] points to segment numbers this ioctl cleans. We diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index be244692550..a9856e3eaaf 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1053,7 +1053,7 @@ static int ocfs2_rename(struct inode *old_dir, handle_t *handle = NULL; struct buffer_head *old_dir_bh = NULL; struct buffer_head *new_dir_bh = NULL; - nlink_t old_dir_nlink = old_dir->i_nlink; + u32 old_dir_nlink = old_dir->i_nlink; struct ocfs2_dinode *old_di; struct ocfs2_dir_lookup_result old_inode_dot_dot_res = { NULL, }; struct ocfs2_dir_lookup_result target_lookup_res = { NULL, }; diff --git a/fs/proc/base.c b/fs/proc/base.c index 9cde9edf9c4..d4548dd49b0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } -static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode) -{ - struct mm_struct *mm; - int err; - - err = mutex_lock_killable(&task->signal->cred_guard_mutex); - if (err) - return ERR_PTR(err); - - mm = get_task_mm(task); - if (mm && mm != current->mm && - !ptrace_may_access(task, mode)) { - mmput(mm); - mm = ERR_PTR(-EACCES); - } - mutex_unlock(&task->signal->cred_guard_mutex); - - return mm; -} - struct mm_struct *mm_for_maps(struct task_struct *task) { return mm_access(task, PTRACE_MODE_READ); @@ -711,6 +691,13 @@ static int mem_open(struct inode* inode, struct file* file) if (IS_ERR(mm)) return PTR_ERR(mm); + if (mm) { + /* ensure this mm_struct can't be freed */ + atomic_inc(&mm->mm_count); + /* but do not pin its memory */ + mmput(mm); + } + /* OK to pass negative loff_t, we can catch out-of-range */ file->f_mode |= FMODE_UNSIGNED_OFFSET; file->private_data = mm; @@ -718,57 +705,13 @@ static int mem_open(struct inode* inode, struct file* file) return 0; } -static ssize_t mem_read(struct file * file, char __user * buf, - size_t count, loff_t *ppos) +static ssize_t mem_rw(struct file *file, char __user *buf, + size_t count, loff_t *ppos, int write) { - int ret; - char *page; - unsigned long src = *ppos; struct mm_struct *mm = file->private_data; - - if (!mm) - return 0; - - page = (char *)__get_free_page(GFP_TEMPORARY); - if (!page) - return -ENOMEM; - - ret = 0; - - while (count > 0) { - int this_len, retval; - - this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - retval = access_remote_vm(mm, src, page, this_len, 0); - if (!retval) { - if (!ret) - ret = -EIO; - break; - } - - if (copy_to_user(buf, page, retval)) { - ret = -EFAULT; - break; - } - - ret += retval; - src += retval; - buf += retval; - count -= retval; - } - *ppos = src; - - free_page((unsigned long) page); - return ret; -} - -static ssize_t mem_write(struct file * file, const char __user *buf, - size_t count, loff_t *ppos) -{ - int copied; + unsigned long addr = *ppos; + ssize_t copied; char *page; - unsigned long dst = *ppos; - struct mm_struct *mm = file->private_data; if (!mm) return 0; @@ -778,31 +721,54 @@ static ssize_t mem_write(struct file * file, const char __user *buf, return -ENOMEM; copied = 0; + if (!atomic_inc_not_zero(&mm->mm_users)) + goto free; + while (count > 0) { - int this_len, retval; + int this_len = min_t(int, count, PAGE_SIZE); - this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - if (copy_from_user(page, buf, this_len)) { + if (write && copy_from_user(page, buf, this_len)) { copied = -EFAULT; break; } - retval = access_remote_vm(mm, dst, page, this_len, 1); - if (!retval) { + + this_len = access_remote_vm(mm, addr, page, this_len, write); + if (!this_len) { if (!copied) copied = -EIO; break; } - copied += retval; - buf += retval; - dst += retval; - count -= retval; + + if (!write && copy_to_user(buf, page, this_len)) { + copied = -EFAULT; + break; + } + + buf += this_len; + addr += this_len; + copied += this_len; + count -= this_len; } - *ppos = dst; + *ppos = addr; + mmput(mm); +free: free_page((unsigned long) page); return copied; } +static ssize_t mem_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return mem_rw(file, buf, count, ppos, 0); +} + +static ssize_t mem_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + return mem_rw(file, (char __user*)buf, count, ppos, 1); +} + loff_t mem_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { @@ -822,8 +788,8 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig) static int mem_release(struct inode *inode, struct file *file) { struct mm_struct *mm = file->private_data; - - mmput(mm); + if (mm) + mmdrop(mm); return 0; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e418c5abdb0..7dcd2a25049 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -518,6 +518,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, if (!page) continue; + if (PageReserved(page)) + continue; + /* Clear accessed and referenced bits. */ ptep_test_and_clear_young(vma, addr, pte); ClearPageReferenced(page); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 5ec59b20cf7..46741970371 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2125,6 +2125,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, mutex_unlock(&dqopt->dqio_mutex); goto out_file_init; } + if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) + dqopt->info[type].dqi_flags |= DQF_SYS_FILE; mutex_unlock(&dqopt->dqio_mutex); spin_lock(&dq_state_lock); dqopt->flags |= dquot_state_flag(flags, type); @@ -2464,7 +2466,7 @@ int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) spin_lock(&dq_data_lock); ii->dqi_bgrace = mi->dqi_bgrace; ii->dqi_igrace = mi->dqi_igrace; - ii->dqi_flags = mi->dqi_flags & DQF_MASK; + ii->dqi_flags = mi->dqi_flags & DQF_GETINFO_MASK; ii->dqi_valid = IIF_ALL; spin_unlock(&dq_data_lock); mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex); @@ -2490,8 +2492,8 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) if (ii->dqi_valid & IIF_IGRACE) mi->dqi_igrace = ii->dqi_igrace; if (ii->dqi_valid & IIF_FLAGS) - mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | - (ii->dqi_flags & DQF_MASK); + mi->dqi_flags = (mi->dqi_flags & ~DQF_SETINFO_MASK) | + (ii->dqi_flags & DQF_SETINFO_MASK); spin_unlock(&dq_data_lock); mark_info_dirty(sb, type); /* Force write to disk */ diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 7898cd688a0..fc2c4388d12 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -292,11 +292,26 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, } } +/* Return 1 if 'cmd' will block on frozen filesystem */ +static int quotactl_cmd_write(int cmd) +{ + switch (cmd) { + case Q_GETFMT: + case Q_GETINFO: + case Q_SYNC: + case Q_XGETQSTAT: + case Q_XGETQUOTA: + case Q_XQUOTASYNC: + return 0; + } + return 1; +} + /* * look up a superblock on which quota ops will be performed * - use the name of a block device to find the superblock thereon */ -static struct super_block *quotactl_block(const char __user *special) +static struct super_block *quotactl_block(const char __user *special, int cmd) { #ifdef CONFIG_BLOCK struct block_device *bdev; @@ -309,7 +324,10 @@ static struct super_block *quotactl_block(const char __user *special) putname(tmp); if (IS_ERR(bdev)) return ERR_CAST(bdev); - sb = get_super(bdev); + if (quotactl_cmd_write(cmd)) + sb = get_super_thawed(bdev); + else + sb = get_super(bdev); bdput(bdev); if (!sb) return ERR_PTR(-ENODEV); @@ -361,7 +379,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, pathp = &path; } - sb = quotactl_block(special); + sb = quotactl_block(special, cmds); if (IS_ERR(sb)) { ret = PTR_ERR(sb); goto out; diff --git a/fs/select.c b/fs/select.c index d33418fdc85..e782258d0de 100644 --- a/fs/select.c +++ b/fs/select.c @@ -912,7 +912,7 @@ static long do_restart_poll(struct restart_block *restart_block) } SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, - long, timeout_msecs) + int, timeout_msecs) { struct timespec end_time, *to = NULL; int ret; diff --git a/fs/signalfd.c b/fs/signalfd.c index 492465b451d..7ae2a574cb2 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -30,6 +30,21 @@ #include <linux/signalfd.h> #include <linux/syscalls.h> +void signalfd_cleanup(struct sighand_struct *sighand) +{ + wait_queue_head_t *wqh = &sighand->signalfd_wqh; + /* + * The lockless check can race with remove_wait_queue() in progress, + * but in this case its caller should run under rcu_read_lock() and + * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return. + */ + if (likely(!waitqueue_active(wqh))) + return; + + /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */ + wake_up_poll(wqh, POLLHUP | POLLFREE); +} + struct signalfd_ctx { sigset_t sigmask; }; diff --git a/fs/super.c b/fs/super.c index 6015c02296b..6277ec6cb60 100644 --- a/fs/super.c +++ b/fs/super.c @@ -634,6 +634,28 @@ rescan: EXPORT_SYMBOL(get_super); /** + * get_super_thawed - get thawed superblock of a device + * @bdev: device to get the superblock for + * + * Scans the superblock list and finds the superblock of the file system + * mounted on the device. The superblock is returned once it is thawed + * (or immediately if it was not frozen). %NULL is returned if no match + * is found. + */ +struct super_block *get_super_thawed(struct block_device *bdev) +{ + while (1) { + struct super_block *s = get_super(bdev); + if (!s || s->s_frozen == SB_UNFROZEN) + return s; + up_read(&s->s_umount); + vfs_check_frozen(s, SB_FREEZE_WRITE); + put_super(s); + } +} +EXPORT_SYMBOL(get_super_thawed); + +/** * get_active_super - get an active reference to the superblock of a device * @bdev: device to get the superblock for * diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 62f4fb37789..00012e31829 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -493,6 +493,12 @@ int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr, const void *ns = NULL; int err; + if (!dir_sd) { + WARN(1, KERN_ERR "sysfs: kobject %s without dirent\n", + kobject_name(kobj)); + return -ENOENT; + } + err = 0; if (!sysfs_ns_type(dir_sd)) goto out; diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 4a802b4a905..85eb81683a2 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -318,8 +318,11 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha struct sysfs_addrm_cxt acxt; struct sysfs_dirent *sd; - if (!dir_sd) + if (!dir_sd) { + WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", + name); return -ENOENT; + } sysfs_addrm_start(&acxt, dir_sd); diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 292eff19803..ab7c53fe346 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -110,10 +110,4 @@ kmem_zone_destroy(kmem_zone_t *zone) extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); -static inline int -kmem_shake_allow(gfp_t gfp_mask) -{ - return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)); -} - #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index b4ff40b5f91..53db20ee3e7 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -63,82 +63,6 @@ int xfs_dqerror_mod = 33; static struct lock_class_key xfs_dquot_other_class; /* - * Allocate and initialize a dquot. We don't always allocate fresh memory; - * we try to reclaim a free dquot if the number of incore dquots are above - * a threshold. - * The only field inside the core that gets initialized at this point - * is the d_id field. The idea is to fill in the entire q_core - * when we read in the on disk dquot. - */ -STATIC xfs_dquot_t * -xfs_qm_dqinit( - xfs_mount_t *mp, - xfs_dqid_t id, - uint type) -{ - xfs_dquot_t *dqp; - boolean_t brandnewdquot; - - brandnewdquot = xfs_qm_dqalloc_incore(&dqp); - dqp->dq_flags = type; - dqp->q_core.d_id = cpu_to_be32(id); - dqp->q_mount = mp; - - /* - * No need to re-initialize these if this is a reclaimed dquot. - */ - if (brandnewdquot) { - INIT_LIST_HEAD(&dqp->q_freelist); - mutex_init(&dqp->q_qlock); - init_waitqueue_head(&dqp->q_pinwait); - - /* - * Because we want to use a counting completion, complete - * the flush completion once to allow a single access to - * the flush completion without blocking. - */ - init_completion(&dqp->q_flush); - complete(&dqp->q_flush); - - trace_xfs_dqinit(dqp); - } else { - /* - * Only the q_core portion was zeroed in dqreclaim_one(). - * So, we need to reset others. - */ - dqp->q_nrefs = 0; - dqp->q_blkno = 0; - INIT_LIST_HEAD(&dqp->q_mplist); - INIT_LIST_HEAD(&dqp->q_hashlist); - dqp->q_bufoffset = 0; - dqp->q_fileoffset = 0; - dqp->q_transp = NULL; - dqp->q_gdquot = NULL; - dqp->q_res_bcount = 0; - dqp->q_res_icount = 0; - dqp->q_res_rtbcount = 0; - atomic_set(&dqp->q_pincount, 0); - dqp->q_hash = NULL; - ASSERT(list_empty(&dqp->q_freelist)); - - trace_xfs_dqreuse(dqp); - } - - /* - * In either case we need to make sure group quotas have a different - * lock class than user quotas, to make sure lockdep knows we can - * locks of one of each at the same time. - */ - if (!(type & XFS_DQ_USER)) - lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); - - /* - * log item gets initialized later - */ - return (dqp); -} - -/* * This is called to free all the memory associated with a dquot */ void @@ -215,10 +139,10 @@ xfs_qm_adjust_dqtimers( if (!d->d_btimer) { if ((d->d_blk_softlimit && - (be64_to_cpu(d->d_bcount) >= + (be64_to_cpu(d->d_bcount) > be64_to_cpu(d->d_blk_softlimit))) || (d->d_blk_hardlimit && - (be64_to_cpu(d->d_bcount) >= + (be64_to_cpu(d->d_bcount) > be64_to_cpu(d->d_blk_hardlimit)))) { d->d_btimer = cpu_to_be32(get_seconds() + mp->m_quotainfo->qi_btimelimit); @@ -227,10 +151,10 @@ xfs_qm_adjust_dqtimers( } } else { if ((!d->d_blk_softlimit || - (be64_to_cpu(d->d_bcount) < + (be64_to_cpu(d->d_bcount) <= be64_to_cpu(d->d_blk_softlimit))) && (!d->d_blk_hardlimit || - (be64_to_cpu(d->d_bcount) < + (be64_to_cpu(d->d_bcount) <= be64_to_cpu(d->d_blk_hardlimit)))) { d->d_btimer = 0; } @@ -238,10 +162,10 @@ xfs_qm_adjust_dqtimers( if (!d->d_itimer) { if ((d->d_ino_softlimit && - (be64_to_cpu(d->d_icount) >= + (be64_to_cpu(d->d_icount) > be64_to_cpu(d->d_ino_softlimit))) || (d->d_ino_hardlimit && - (be64_to_cpu(d->d_icount) >= + (be64_to_cpu(d->d_icount) > be64_to_cpu(d->d_ino_hardlimit)))) { d->d_itimer = cpu_to_be32(get_seconds() + mp->m_quotainfo->qi_itimelimit); @@ -250,10 +174,10 @@ xfs_qm_adjust_dqtimers( } } else { if ((!d->d_ino_softlimit || - (be64_to_cpu(d->d_icount) < + (be64_to_cpu(d->d_icount) <= be64_to_cpu(d->d_ino_softlimit))) && (!d->d_ino_hardlimit || - (be64_to_cpu(d->d_icount) < + (be64_to_cpu(d->d_icount) <= be64_to_cpu(d->d_ino_hardlimit)))) { d->d_itimer = 0; } @@ -261,10 +185,10 @@ xfs_qm_adjust_dqtimers( if (!d->d_rtbtimer) { if ((d->d_rtb_softlimit && - (be64_to_cpu(d->d_rtbcount) >= + (be64_to_cpu(d->d_rtbcount) > be64_to_cpu(d->d_rtb_softlimit))) || (d->d_rtb_hardlimit && - (be64_to_cpu(d->d_rtbcount) >= + (be64_to_cpu(d->d_rtbcount) > be64_to_cpu(d->d_rtb_hardlimit)))) { d->d_rtbtimer = cpu_to_be32(get_seconds() + mp->m_quotainfo->qi_rtbtimelimit); @@ -273,10 +197,10 @@ xfs_qm_adjust_dqtimers( } } else { if ((!d->d_rtb_softlimit || - (be64_to_cpu(d->d_rtbcount) < + (be64_to_cpu(d->d_rtbcount) <= be64_to_cpu(d->d_rtb_softlimit))) && (!d->d_rtb_hardlimit || - (be64_to_cpu(d->d_rtbcount) < + (be64_to_cpu(d->d_rtbcount) <= be64_to_cpu(d->d_rtb_hardlimit)))) { d->d_rtbtimer = 0; } @@ -567,7 +491,32 @@ xfs_qm_dqread( int error; int cancelflags = 0; - dqp = xfs_qm_dqinit(mp, id, type); + + dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); + + dqp->dq_flags = type; + dqp->q_core.d_id = cpu_to_be32(id); + dqp->q_mount = mp; + INIT_LIST_HEAD(&dqp->q_freelist); + mutex_init(&dqp->q_qlock); + init_waitqueue_head(&dqp->q_pinwait); + + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&dqp->q_flush); + complete(&dqp->q_flush); + + /* + * Make sure group quotas have a different lock class than user + * quotas. + */ + if (!(type & XFS_DQ_USER)) + lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); + + atomic_inc(&xfs_Gqm->qm_totaldquots); trace_xfs_dqread(dqp); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 541a508adea..0ed9ee77937 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1489,7 +1489,7 @@ xlog_recover_add_to_cont_trans( old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; - ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u); + ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP); memcpy(&ptr[old_len], dp, len); /* d, s, l */ item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; @@ -1981,7 +1981,7 @@ xfs_qm_dqcheck( if (!errs && ddq->d_id) { if (ddq->d_blk_softlimit && - be64_to_cpu(ddq->d_bcount) >= + be64_to_cpu(ddq->d_bcount) > be64_to_cpu(ddq->d_blk_softlimit)) { if (!ddq->d_btimer) { if (flags & XFS_QMOPT_DOWARN) @@ -1992,7 +1992,7 @@ xfs_qm_dqcheck( } } if (ddq->d_ino_softlimit && - be64_to_cpu(ddq->d_icount) >= + be64_to_cpu(ddq->d_icount) > be64_to_cpu(ddq->d_ino_softlimit)) { if (!ddq->d_itimer) { if (flags & XFS_QMOPT_DOWARN) @@ -2003,7 +2003,7 @@ xfs_qm_dqcheck( } } if (ddq->d_rtb_softlimit && - be64_to_cpu(ddq->d_rtbcount) >= + be64_to_cpu(ddq->d_rtbcount) > be64_to_cpu(ddq->d_rtb_softlimit)) { if (!ddq->d_rtbtimer) { if (flags & XFS_QMOPT_DOWARN) diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 671f37eae1c..c436def733b 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -50,7 +50,6 @@ */ struct mutex xfs_Gqm_lock; struct xfs_qm *xfs_Gqm; -uint ndquot; kmem_zone_t *qm_dqzone; kmem_zone_t *qm_dqtrxzone; @@ -93,7 +92,6 @@ xfs_Gqm_init(void) goto out_free_udqhash; hsize /= sizeof(xfs_dqhash_t); - ndquot = hsize << 8; xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); xqm->qm_dqhashmask = hsize - 1; @@ -137,7 +135,6 @@ xfs_Gqm_init(void) xqm->qm_dqtrxzone = qm_dqtrxzone; atomic_set(&xqm->qm_totaldquots, 0); - xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; xqm->qm_nrefs = 0; return xqm; @@ -1600,216 +1597,150 @@ xfs_qm_init_quotainos( return 0; } +STATIC void +xfs_qm_dqfree_one( + struct xfs_dquot *dqp) +{ + struct xfs_mount *mp = dqp->q_mount; + struct xfs_quotainfo *qi = mp->m_quotainfo; + mutex_lock(&dqp->q_hash->qh_lock); + list_del_init(&dqp->q_hashlist); + dqp->q_hash->qh_version++; + mutex_unlock(&dqp->q_hash->qh_lock); -/* - * Pop the least recently used dquot off the freelist and recycle it. - */ -STATIC struct xfs_dquot * -xfs_qm_dqreclaim_one(void) + mutex_lock(&qi->qi_dqlist_lock); + list_del_init(&dqp->q_mplist); + qi->qi_dquots--; + qi->qi_dqreclaims++; + mutex_unlock(&qi->qi_dqlist_lock); + + xfs_qm_dqdestroy(dqp); +} + +STATIC void +xfs_qm_dqreclaim_one( + struct xfs_dquot *dqp, + struct list_head *dispose_list) { - struct xfs_dquot *dqp; - int restarts = 0; + struct xfs_mount *mp = dqp->q_mount; + int error; - mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); -restart: - list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { - struct xfs_mount *mp = dqp->q_mount; + if (!xfs_dqlock_nowait(dqp)) + goto out_busy; - if (!xfs_dqlock_nowait(dqp)) - continue; + /* + * This dquot has acquired a reference in the meantime remove it from + * the freelist and try again. + */ + if (dqp->q_nrefs) { + xfs_dqunlock(dqp); - /* - * This dquot has already been grabbed by dqlookup. - * Remove it from the freelist and try again. - */ - if (dqp->q_nrefs) { - trace_xfs_dqreclaim_want(dqp); - XQM_STATS_INC(xqmstats.xs_qm_dqwants); - - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - restarts++; - goto dqunlock; - } + trace_xfs_dqreclaim_want(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqwants); - ASSERT(dqp->q_hash); - ASSERT(!list_empty(&dqp->q_mplist)); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + return; + } - /* - * Try to grab the flush lock. If this dquot is in the process - * of getting flushed to disk, we don't want to reclaim it. - */ - if (!xfs_dqflock_nowait(dqp)) - goto dqunlock; + ASSERT(dqp->q_hash); + ASSERT(!list_empty(&dqp->q_mplist)); - /* - * We have the flush lock so we know that this is not in the - * process of being flushed. So, if this is dirty, flush it - * DELWRI so that we don't get a freelist infested with - * dirty dquots. - */ - if (XFS_DQ_IS_DIRTY(dqp)) { - int error; + /* + * Try to grab the flush lock. If this dquot is in the process of + * getting flushed to disk, we don't want to reclaim it. + */ + if (!xfs_dqflock_nowait(dqp)) + goto out_busy; - trace_xfs_dqreclaim_dirty(dqp); + /* + * We have the flush lock so we know that this is not in the + * process of being flushed. So, if this is dirty, flush it + * DELWRI so that we don't get a freelist infested with + * dirty dquots. + */ + if (XFS_DQ_IS_DIRTY(dqp)) { + trace_xfs_dqreclaim_dirty(dqp); - /* - * We flush it delayed write, so don't bother - * releasing the freelist lock. - */ - error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK); - if (error) { - xfs_warn(mp, "%s: dquot %p flush failed", - __func__, dqp); - } - goto dqunlock; + /* + * We flush it delayed write, so don't bother releasing the + * freelist lock. + */ + error = xfs_qm_dqflush(dqp, 0); + if (error) { + xfs_warn(mp, "%s: dquot %p flush failed", + __func__, dqp); } - xfs_dqfunlock(dqp); /* - * Prevent lookup now that we are going to reclaim the dquot. - * Once XFS_DQ_FREEING is set lookup won't touch the dquot, - * thus we can drop the lock now. + * Give the dquot another try on the freelist, as the + * flushing will take some time. */ - dqp->dq_flags |= XFS_DQ_FREEING; - xfs_dqunlock(dqp); - - mutex_lock(&dqp->q_hash->qh_lock); - list_del_init(&dqp->q_hashlist); - dqp->q_hash->qh_version++; - mutex_unlock(&dqp->q_hash->qh_lock); - - mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); - list_del_init(&dqp->q_mplist); - mp->m_quotainfo->qi_dquots--; - mp->m_quotainfo->qi_dqreclaims++; - mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); + goto out_busy; + } + xfs_dqfunlock(dqp); - ASSERT(dqp->q_nrefs == 0); - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; + /* + * Prevent lookups now that we are past the point of no return. + */ + dqp->dq_flags |= XFS_DQ_FREEING; + xfs_dqunlock(dqp); - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - return dqp; -dqunlock: - xfs_dqunlock(dqp); - if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - break; - goto restart; - } + ASSERT(dqp->q_nrefs == 0); + list_move_tail(&dqp->q_freelist, dispose_list); + xfs_Gqm->qm_dqfrlist_cnt--; - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - return NULL; -} + trace_xfs_dqreclaim_done(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); + return; -/* - * Traverse the freelist of dquots and attempt to reclaim a maximum of - * 'howmany' dquots. This operation races with dqlookup(), and attempts to - * favor the lookup function ... - */ -STATIC int -xfs_qm_shake_freelist( - int howmany) -{ - int nreclaimed = 0; - xfs_dquot_t *dqp; +out_busy: + xfs_dqunlock(dqp); - if (howmany <= 0) - return 0; + /* + * Move the dquot to the tail of the list so that we don't spin on it. + */ + list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); - while (nreclaimed < howmany) { - dqp = xfs_qm_dqreclaim_one(); - if (!dqp) - return nreclaimed; - xfs_qm_dqdestroy(dqp); - nreclaimed++; - } - return nreclaimed; + trace_xfs_dqreclaim_busy(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); } -/* - * The kmem_shake interface is invoked when memory is running low. - */ -/* ARGSUSED */ STATIC int xfs_qm_shake( - struct shrinker *shrink, - struct shrink_control *sc) + struct shrinker *shrink, + struct shrink_control *sc) { - int ndqused, nfree, n; - gfp_t gfp_mask = sc->gfp_mask; - - if (!kmem_shake_allow(gfp_mask)) - return 0; - if (!xfs_Gqm) - return 0; - - nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */ - /* incore dquots in all f/s's */ - ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; - - ASSERT(ndqused >= 0); + int nr_to_scan = sc->nr_to_scan; + LIST_HEAD (dispose_list); + struct xfs_dquot *dqp; - if (nfree <= ndqused && nfree < ndquot) + if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT)) return 0; + if (!nr_to_scan) + goto out; - ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */ - n = nfree - ndqused - ndquot; /* # over target */ - - return xfs_qm_shake_freelist(MAX(nfree, n)); -} - - -/*------------------------------------------------------------------*/ - -/* - * Return a new incore dquot. Depending on the number of - * dquots in the system, we either allocate a new one on the kernel heap, - * or reclaim a free one. - * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed - * to reclaim an existing one from the freelist. - */ -boolean_t -xfs_qm_dqalloc_incore( - xfs_dquot_t **O_dqpp) -{ - xfs_dquot_t *dqp; - - /* - * Check against high water mark to see if we want to pop - * a nincompoop dquot off the freelist. - */ - if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) { - /* - * Try to recycle a dquot from the freelist. - */ - if ((dqp = xfs_qm_dqreclaim_one())) { - XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); - /* - * Just zero the core here. The rest will get - * reinitialized by caller. XXX we shouldn't even - * do this zero ... - */ - memset(&dqp->q_core, 0, sizeof(dqp->q_core)); - *O_dqpp = dqp; - return B_FALSE; - } - XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + while (!list_empty(&xfs_Gqm->qm_dqfrlist)) { + if (nr_to_scan-- <= 0) + break; + dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot, + q_freelist); + xfs_qm_dqreclaim_one(dqp, &dispose_list); } + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - /* - * Allocate a brand new dquot on the kernel heap and return it - * to the caller to initialize. - */ - ASSERT(xfs_Gqm->qm_dqzone != NULL); - *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); - atomic_inc(&xfs_Gqm->qm_totaldquots); - - return B_TRUE; + while (!list_empty(&dispose_list)) { + dqp = list_first_entry(&dispose_list, struct xfs_dquot, + q_freelist); + list_del_init(&dqp->q_freelist); + xfs_qm_dqfree_one(dqp); + } +out: + return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure; } - /* * Start a transaction and write the incore superblock changes to * disk. flags parameter indicates which fields have changed. diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 9b4f3adefbc..9a9b997e1a0 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -26,24 +26,12 @@ struct xfs_qm; struct xfs_inode; -extern uint ndquot; extern struct mutex xfs_Gqm_lock; extern struct xfs_qm *xfs_Gqm; extern kmem_zone_t *qm_dqzone; extern kmem_zone_t *qm_dqtrxzone; /* - * Ditto, for xfs_qm_dqreclaim_one. - */ -#define XFS_QM_RECLAIM_MAX_RESTARTS 4 - -/* - * Ideal ratio of free to in use dquots. Quota manager makes an attempt - * to keep this balance. - */ -#define XFS_QM_DQFREE_RATIO 2 - -/* * Dquot hashtable constants/threshold values. */ #define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t)) @@ -74,7 +62,6 @@ typedef struct xfs_qm { int qm_dqfrlist_cnt; atomic_t qm_totaldquots; /* total incore dquots */ uint qm_nrefs; /* file systems with quota on */ - int qm_dqfree_ratio;/* ratio of free to inuse dquots */ kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ } xfs_qm_t; @@ -143,7 +130,6 @@ extern int xfs_qm_quotacheck(xfs_mount_t *); extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); /* dquot stuff */ -extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **); extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c index 8671a0b3264..5729ba57087 100644 --- a/fs/xfs/xfs_qm_stats.c +++ b/fs/xfs/xfs_qm_stats.c @@ -42,9 +42,9 @@ static int xqm_proc_show(struct seq_file *m, void *v) { /* maximum; incore; ratio free to inuse; freelist */ seq_printf(m, "%d\t%d\t%d\t%u\n", - ndquot, + 0, xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, - xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, + 0, xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); return 0; } diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index eafbcff81f3..711a86e39ff 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -813,11 +813,11 @@ xfs_qm_export_dquot( (XFS_IS_OQUOTA_ENFORCED(mp) && (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && dst->d_id != 0) { - if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && + if (((int) dst->d_bcount > (int) dst->d_blk_softlimit) && (dst->d_blk_softlimit > 0)) { ASSERT(dst->d_btimer != 0); } - if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) && + if (((int) dst->d_icount > (int) dst->d_ino_softlimit) && (dst->d_ino_softlimit > 0)) { ASSERT(dst->d_itimer != 0); } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 6b6df5802e9..bb134a81993 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -733,11 +733,10 @@ DEFINE_EVENT(xfs_dquot_class, name, \ DEFINE_DQUOT_EVENT(xfs_dqadjust); DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); -DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_done); DEFINE_DQUOT_EVENT(xfs_dqattach_found); DEFINE_DQUOT_EVENT(xfs_dqattach_get); -DEFINE_DQUOT_EVENT(xfs_dqinit); -DEFINE_DQUOT_EVENT(xfs_dqreuse); DEFINE_DQUOT_EVENT(xfs_dqalloc); DEFINE_DQUOT_EVENT(xfs_dqtobp_read); DEFINE_DQUOT_EVENT(xfs_dqread); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 329b06aba1c..7adcdf15ae0 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1151,8 +1151,8 @@ xfs_trans_add_item( { struct xfs_log_item_desc *lidp; - ASSERT(lip->li_mountp = tp->t_mountp); - ASSERT(lip->li_ailp = tp->t_mountp->m_ail); + ASSERT(lip->li_mountp == tp->t_mountp); + ASSERT(lip->li_ailp == tp->t_mountp->m_ail); lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS); diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 4d00ee67792..c4ba366d24e 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -649,12 +649,12 @@ xfs_trans_dqresv( * nblks. */ if (hardlimit > 0ULL && - hardlimit <= nblks + *resbcountp) { + hardlimit < nblks + *resbcountp) { xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); goto error_return; } if (softlimit > 0ULL && - softlimit <= nblks + *resbcountp) { + softlimit < nblks + *resbcountp) { if ((timer != 0 && get_seconds() > timer) || (warns != 0 && warns >= warnlimit)) { xfs_quota_warn(mp, dqp, @@ -677,11 +677,13 @@ xfs_trans_dqresv( if (!softlimit) softlimit = q->qi_isoftlimit; - if (hardlimit > 0ULL && count >= hardlimit) { + if (hardlimit > 0ULL && + hardlimit < ninos + count) { xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); goto error_return; } - if (softlimit > 0ULL && count >= softlimit) { + if (softlimit > 0ULL && + softlimit < ninos + count) { if ((timer != 0 && get_seconds() > timer) || (warns != 0 && warns >= warnlimit)) { xfs_quota_warn(mp, dqp, diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 0cf52da9d24..ebdb88840a4 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -131,7 +131,8 @@ xfs_readlink( __func__, (unsigned long long) ip->i_ino, (long long) pathlen); ASSERT(0); - return XFS_ERROR(EFSCORRUPTED); + error = XFS_ERROR(EFSCORRUPTED); + goto out; } |