aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c24
-rw-r--r--fs/autofs4/autofs_i.h1
-rw-r--r--fs/autofs4/dev-ioctl.c1
-rw-r--r--fs/autofs4/expire.c2
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/waitq.c22
-rw-r--r--fs/binfmt_aout.c14
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/bio.c10
-rw-r--r--fs/btrfs/backref.c14
-rw-r--r--fs/btrfs/check-integrity.c13
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/disk-io.c19
-rw-r--r--fs/btrfs/extent-tree.c100
-rw-r--r--fs/btrfs/extent_io.c115
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/extent_map.h4
-rw-r--r--fs/btrfs/file.c29
-rw-r--r--fs/btrfs/free-space-cache.c6
-rw-r--r--fs/btrfs/inode-map.c6
-rw-r--r--fs/btrfs/inode.c50
-rw-r--r--fs/btrfs/ioctl.c61
-rw-r--r--fs/btrfs/reada.c2
-rw-r--r--fs/btrfs/scrub.c8
-rw-r--r--fs/btrfs/transaction.c16
-rw-r--r--fs/btrfs/tree-log.c6
-rw-r--r--fs/btrfs/volumes.c33
-rw-r--r--fs/ceph/caps.c4
-rw-r--r--fs/ceph/dir.c4
-rw-r--r--fs/ceph/mds_client.c10
-rw-r--r--fs/ceph/mds_client.h7
-rw-r--r--fs/ceph/xattr.c4
-rw-r--r--fs/cifs/Kconfig4
-rw-r--r--fs/cifs/connect.c23
-rw-r--r--fs/cifs/dir.c22
-rw-r--r--fs/cifs/inode.c28
-rw-r--r--fs/cifs/sess.c11
-rw-r--r--fs/compat.c56
-rw-r--r--fs/dcache.c41
-rw-r--r--fs/direct-io.c4
-rw-r--r--fs/ecryptfs/crypto.c68
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h6
-rw-r--r--fs/ecryptfs/inode.c2
-rw-r--r--fs/ecryptfs/keystore.c9
-rw-r--r--fs/ecryptfs/miscdev.c2
-rw-r--r--fs/ecryptfs/mmap.c4
-rw-r--r--fs/ecryptfs/read_write.c4
-rw-r--r--fs/ecryptfs/super.c14
-rw-r--r--fs/eventpoll.c30
-rw-r--r--fs/exec.c51
-rw-r--r--fs/fs-writeback.c16
-rw-r--r--fs/gfs2/glock.c14
-rw-r--r--fs/gfs2/inode.c5
-rw-r--r--fs/gfs2/ops_fstype.c5
-rw-r--r--fs/gfs2/rgrp.c13
-rw-r--r--fs/inode.c8
-rw-r--r--fs/ioprio.c2
-rw-r--r--fs/jffs2/erase.c2
-rw-r--r--fs/logfs/dev_mtd.c6
-rw-r--r--fs/logfs/dir.c2
-rw-r--r--fs/logfs/file.c2
-rw-r--r--fs/logfs/gc.c2
-rw-r--r--fs/logfs/inode.c4
-rw-r--r--fs/logfs/journal.c1
-rw-r--r--fs/logfs/logfs.h5
-rw-r--r--fs/logfs/readwrite.c51
-rw-r--r--fs/logfs/segment.c51
-rw-r--r--fs/logfs/super.c3
-rw-r--r--fs/namei.c70
-rw-r--r--fs/nfs/nfs4proc.c130
-rw-r--r--fs/nfs/nfs4state.c2
-rw-r--r--fs/nfs/nfs4xdr.c5
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/ntfs/attrib.c6
-rw-r--r--fs/ntfs/mft.c6
-rw-r--r--fs/ntfs/super.c4
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/proc/base.c126
-rw-r--r--fs/quota/quota.c24
-rw-r--r--fs/select.c2
-rw-r--r--fs/signalfd.c15
-rw-r--r--fs/super.c22
-rw-r--r--fs/sysfs/file.c6
-rw-r--r--fs/sysfs/inode.c5
-rw-r--r--fs/xfs/kmem.h6
-rw-r--r--fs/xfs/xfs_dquot.c127
-rw-r--r--fs/xfs/xfs_log_recover.c8
-rw-r--r--fs/xfs/xfs_qm.c291
-rw-r--r--fs/xfs/xfs_qm.h14
-rw-r--r--fs/xfs/xfs_qm_stats.c4
-rw-r--r--fs/xfs/xfs_qm_syscalls.c4
-rw-r--r--fs/xfs/xfs_trace.h5
-rw-r--r--fs/xfs/xfs_trans.c4
-rw-r--r--fs/xfs/xfs_trans_dquot.c10
95 files changed, 1164 insertions, 901 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 969beb0e223..b9d64d89a04 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -228,12 +228,6 @@ static void __put_ioctx(struct kioctx *ctx)
call_rcu(&ctx->rcu_head, ctx_rcu_free);
}
-static inline void get_ioctx(struct kioctx *kioctx)
-{
- BUG_ON(atomic_read(&kioctx->users) <= 0);
- atomic_inc(&kioctx->users);
-}
-
static inline int try_get_ioctx(struct kioctx *kioctx)
{
return atomic_inc_not_zero(&kioctx->users);
@@ -273,7 +267,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
mm = ctx->mm = current->mm;
atomic_inc(&mm->mm_count);
- atomic_set(&ctx->users, 1);
+ atomic_set(&ctx->users, 2);
spin_lock_init(&ctx->ctx_lock);
spin_lock_init(&ctx->ring_info.ring_lock);
init_waitqueue_head(&ctx->wait);
@@ -490,6 +484,8 @@ static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch)
kmem_cache_free(kiocb_cachep, req);
ctx->reqs_active--;
}
+ if (unlikely(!ctx->reqs_active && ctx->dead))
+ wake_up_all(&ctx->wait);
spin_unlock_irq(&ctx->ctx_lock);
}
@@ -607,11 +603,16 @@ static void aio_fput_routine(struct work_struct *data)
fput(req->ki_filp);
/* Link the iocb into the context's free list */
+ rcu_read_lock();
spin_lock_irq(&ctx->ctx_lock);
really_put_req(ctx, req);
+ /*
+ * at that point ctx might've been killed, but actual
+ * freeing is RCU'd
+ */
spin_unlock_irq(&ctx->ctx_lock);
+ rcu_read_unlock();
- put_ioctx(ctx);
spin_lock_irq(&fput_lock);
}
spin_unlock_irq(&fput_lock);
@@ -642,7 +643,6 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
* this function will be executed w/out any aio kthread wakeup.
*/
if (unlikely(!fput_atomic(req->ki_filp))) {
- get_ioctx(ctx);
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
spin_unlock(&fput_lock);
@@ -1336,10 +1336,10 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
ret = PTR_ERR(ioctx);
if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
- if (!ret)
+ if (!ret) {
+ put_ioctx(ioctx);
return 0;
-
- get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
+ }
io_destroy(ioctx);
}
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d8d8e7ba6a1..eb1cc92cd67 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -110,6 +110,7 @@ struct autofs_sb_info {
int sub_version;
int min_proto;
int max_proto;
+ int compat_daemon;
unsigned long exp_timeout;
unsigned int type;
int reghost_enabled;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 76741d8d778..85f1fcdb30e 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -385,6 +385,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
sbi->pipefd = pipefd;
sbi->pipe = pipe;
sbi->catatonic = 0;
+ sbi->compat_daemon = is_compat_task();
}
out:
mutex_unlock(&sbi->wq_mutex);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 450f529a4ea..1feb68ecef9 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -124,6 +124,7 @@ start:
/* Negative dentry - try next */
if (!simple_positive(q)) {
spin_unlock(&p->d_lock);
+ lock_set_subclass(&q->d_lock.dep_map, 0, _RET_IP_);
p = q;
goto again;
}
@@ -186,6 +187,7 @@ again:
/* Negative dentry - try next */
if (!simple_positive(ret)) {
spin_unlock(&p->d_lock);
+ lock_set_subclass(&ret->d_lock.dep_map, 0, _RET_IP_);
p = ret;
goto again;
}
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index e16980b00b8..06858d95512 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -19,6 +19,7 @@
#include <linux/parser.h>
#include <linux/bitops.h>
#include <linux/magic.h>
+#include <linux/compat.h>
#include "autofs_i.h"
#include <linux/module.h>
@@ -224,6 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
set_autofs_type_indirect(&sbi->type);
sbi->min_proto = 0;
sbi->max_proto = 0;
+ sbi->compat_daemon = is_compat_task();
mutex_init(&sbi->wq_mutex);
mutex_init(&sbi->pipe_mutex);
spin_lock_init(&sbi->fs_lock);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index da8876d38a7..9c098db4334 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -91,7 +91,24 @@ static int autofs4_write(struct autofs_sb_info *sbi,
return (bytes > 0);
}
-
+
+/*
+ * The autofs_v5 packet was misdesigned.
+ *
+ * The packets are identical on x86-32 and x86-64, but have different
+ * alignment. Which means that 'sizeof()' will give different results.
+ * Fix it up for the case of running 32-bit user mode on a 64-bit kernel.
+ */
+static noinline size_t autofs_v5_packet_size(struct autofs_sb_info *sbi)
+{
+ size_t pktsz = sizeof(struct autofs_v5_packet);
+#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT)
+ if (sbi->compat_daemon > 0)
+ pktsz -= 4;
+#endif
+ return pktsz;
+}
+
static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
struct autofs_wait_queue *wq,
int type)
@@ -155,8 +172,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
{
struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
- pktsz = sizeof(*packet);
-
+ pktsz = autofs_v5_packet_size(sbi);
packet->wait_queue_token = wq->wait_queue_token;
packet->len = wq->name.len;
memcpy(packet->name, wq->name.name, wq->name.len);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index a6395bdb26a..1ff94054d35 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -259,6 +259,13 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
current->mm->free_area_cache = current->mm->mmap_base;
current->mm->cached_hole_size = 0;
+ retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
+ if (retval < 0) {
+ /* Someone check-me: is this error path enough? */
+ send_sig(SIGKILL, current, 0);
+ return retval;
+ }
+
install_exec_creds(bprm);
current->flags &= ~PF_FORKNOEXEC;
@@ -352,13 +359,6 @@ beyond_if:
return retval;
}
- retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
- if (retval < 0) {
- /* Someone check-me: is this error path enough? */
- send_sig(SIGKILL, current, 0);
- return retval;
- }
-
current->mm->start_stack =
(unsigned long) create_aout_tables((char __user *) bprm->p, bprm);
#ifdef __alpha__
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index bcb884e2d61..07d096c4992 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1421,7 +1421,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
for (i = 1; i < view->n; ++i) {
const struct user_regset *regset = &view->regsets[i];
do_thread_regset_writeback(t->task, regset);
- if (regset->core_note_type &&
+ if (regset->core_note_type && regset->get &&
(!regset->active || regset->active(t->task, regset))) {
int ret;
size_t size = regset->n * regset->size;
diff --git a/fs/bio.c b/fs/bio.c
index b1fe82cf88c..b980ecde026 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -505,13 +505,9 @@ EXPORT_SYMBOL(bio_clone);
int bio_get_nr_vecs(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
- int nr_pages;
-
- nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
- if (nr_pages > queue_max_segments(q))
- nr_pages = queue_max_segments(q);
-
- return nr_pages;
+ return min_t(unsigned,
+ queue_max_segments(q),
+ queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
}
EXPORT_SYMBOL(bio_get_nr_vecs);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index b9a843226de..0436c12da8c 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -297,7 +297,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
struct btrfs_delayed_extent_op *extent_op = head->extent_op;
struct rb_node *n = &head->node.rb_node;
int sgn;
- int ret;
+ int ret = 0;
if (extent_op && extent_op->update_key)
btrfs_disk_key_to_cpu(info_key, &extent_op->key);
@@ -392,7 +392,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
struct btrfs_key *info_key, int *info_level,
struct list_head *prefs)
{
- int ret;
+ int ret = 0;
int slot;
struct extent_buffer *leaf;
struct btrfs_key key;
@@ -583,7 +583,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct btrfs_key info_key = { 0 };
struct btrfs_delayed_ref_root *delayed_refs = NULL;
- struct btrfs_delayed_ref_head *head = NULL;
+ struct btrfs_delayed_ref_head *head;
int info_level = 0;
int ret;
struct list_head prefs_delayed;
@@ -607,6 +607,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
* at a specified point in time
*/
again:
+ head = NULL;
+
ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
if (ret < 0)
goto out;
@@ -635,8 +637,10 @@ again:
goto again;
}
ret = __add_delayed_refs(head, seq, &info_key, &prefs_delayed);
- if (ret)
+ if (ret) {
+ spin_unlock(&delayed_refs->lock);
goto out;
+ }
}
spin_unlock(&delayed_refs->lock);
@@ -892,6 +896,8 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
if (eb != eb_in)
free_extent_buffer(eb);
ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
+ if (ret > 0)
+ ret = -ENOENT;
if (ret)
break;
next_inum = found_key.offset;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index ad0b3ba735b..d986824bb2b 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -644,7 +644,7 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
static int btrfsic_process_superblock(struct btrfsic_state *state,
struct btrfs_fs_devices *fs_devices)
{
- int ret;
+ int ret = 0;
struct btrfs_super_block *selected_super;
struct list_head *dev_head = &fs_devices->devices;
struct btrfs_device *device;
@@ -1662,7 +1662,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
&state->block_hashtable);
if (NULL != block) {
- u64 bytenr;
+ u64 bytenr = 0;
struct list_head *elem_ref_to;
struct list_head *tmp_ref_to;
@@ -2777,9 +2777,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
printk(KERN_INFO
"submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
" size=%lu, data=%p, bdev=%p)\n",
- rw, bh->b_blocknr,
- (unsigned long long)dev_bytenr, bh->b_size,
- bh->b_data, bh->b_bdev);
+ rw, (unsigned long)bh->b_blocknr,
+ (unsigned long long)dev_bytenr,
+ (unsigned long)bh->b_size, bh->b_data,
+ bh->b_bdev);
btrfsic_process_written_block(dev_state, dev_bytenr,
bh->b_data, bh->b_size, NULL,
NULL, bh, rw);
@@ -2844,7 +2845,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
printk(KERN_INFO
"submit_bio(rw=0x%x, bi_vcnt=%u,"
" bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
- rw, bio->bi_vcnt, bio->bi_sector,
+ rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
(unsigned long long)dev_bytenr,
bio->bi_bdev);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 14f1c5a0b2d..d02c27cd14c 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -588,6 +588,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
page_offset(bio->bi_io_vec->bv_page),
PAGE_CACHE_SIZE);
read_unlock(&em_tree->lock);
+ if (!em)
+ return -EIO;
compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 27ebe61d3cc..80b6486fd5e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -886,7 +886,7 @@ struct btrfs_block_rsv {
u64 reserved;
struct btrfs_space_info *space_info;
spinlock_t lock;
- unsigned int full:1;
+ unsigned int full;
};
/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7aa9cd36bf1..534266fe505 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -962,6 +962,13 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
tree = &BTRFS_I(page->mapping->host)->io_tree;
map = &BTRFS_I(page->mapping->host)->extent_tree;
+ /*
+ * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
+ * slab allocation from alloc_extent_state down the callchain where
+ * it'd hit a BUG_ON as those flags are not allowed.
+ */
+ gfp_flags &= ~GFP_SLAB_BUG_MASK;
+
ret = try_release_extent_state(map, tree, page, gfp_flags);
if (!ret)
return 0;
@@ -2253,6 +2260,12 @@ int open_ctree(struct super_block *sb,
goto fail_sb_buffer;
}
+ if (sectorsize < PAGE_SIZE) {
+ printk(KERN_WARNING "btrfs: Incompatible sector size "
+ "found on %s\n", sb->s_id);
+ goto fail_sb_buffer;
+ }
+
mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_sys_array(tree_root);
mutex_unlock(&fs_info->chunk_mutex);
@@ -2294,6 +2307,12 @@ int open_ctree(struct super_block *sb,
btrfs_close_extra_devices(fs_devices);
+ if (!fs_devices->latest_bdev) {
+ printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
+ sb->s_id);
+ goto fail_tree_roots;
+ }
+
retry_root_backup:
blocksize = btrfs_level_size(tree_root,
btrfs_super_root_level(disk_super));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 700879ed64c..37e0a800d34 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -34,23 +34,24 @@
#include "locking.h"
#include "free-space-cache.h"
-/* control flags for do_chunk_alloc's force field
+/*
+ * control flags for do_chunk_alloc's force field
* CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
* if we really need one.
*
- * CHUNK_ALLOC_FORCE means it must try to allocate one
- *
* CHUNK_ALLOC_LIMITED means to only try and allocate one
* if we have very few chunks already allocated. This is
* used as part of the clustering code to help make sure
* we have a good pool of storage to cluster in, without
* filling the FS with empty chunks
*
+ * CHUNK_ALLOC_FORCE means it must try to allocate one
+ *
*/
enum {
CHUNK_ALLOC_NO_FORCE = 0,
- CHUNK_ALLOC_FORCE = 1,
- CHUNK_ALLOC_LIMITED = 2,
+ CHUNK_ALLOC_LIMITED = 1,
+ CHUNK_ALLOC_FORCE = 2,
};
/*
@@ -3311,7 +3312,8 @@ commit_trans:
}
data_sinfo->bytes_may_use += bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
- (u64)data_sinfo, bytes, 1);
+ (u64)(unsigned long)data_sinfo,
+ bytes, 1);
spin_unlock(&data_sinfo->lock);
return 0;
@@ -3332,7 +3334,8 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_may_use -= bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
- (u64)data_sinfo, bytes, 0);
+ (u64)(unsigned long)data_sinfo,
+ bytes, 0);
spin_unlock(&data_sinfo->lock);
}
@@ -3414,7 +3417,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
again:
spin_lock(&space_info->lock);
- if (space_info->force_alloc)
+ if (force < space_info->force_alloc)
force = space_info->force_alloc;
if (space_info->full) {
spin_unlock(&space_info->lock);
@@ -3610,12 +3613,15 @@ static int may_commit_transaction(struct btrfs_root *root,
if (space_info != delayed_rsv->space_info)
return -ENOSPC;
+ spin_lock(&space_info->lock);
spin_lock(&delayed_rsv->lock);
- if (delayed_rsv->size < bytes) {
+ if (space_info->bytes_pinned + delayed_rsv->size < bytes) {
spin_unlock(&delayed_rsv->lock);
+ spin_unlock(&space_info->lock);
return -ENOSPC;
}
spin_unlock(&delayed_rsv->lock);
+ spin_unlock(&space_info->lock);
commit:
trans = btrfs_join_transaction(root);
@@ -3694,9 +3700,9 @@ again:
if (used + orig_bytes <= space_info->total_bytes) {
space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(root->fs_info,
- "space_info",
- (u64)space_info,
- orig_bytes, 1);
+ "space_info",
+ (u64)(unsigned long)space_info,
+ orig_bytes, 1);
ret = 0;
} else {
/*
@@ -3765,9 +3771,9 @@ again:
if (used + num_bytes < space_info->total_bytes + avail) {
space_info->bytes_may_use += orig_bytes;
trace_btrfs_space_reservation(root->fs_info,
- "space_info",
- (u64)space_info,
- orig_bytes, 1);
+ "space_info",
+ (u64)(unsigned long)space_info,
+ orig_bytes, 1);
ret = 0;
} else {
wait_ordered = true;
@@ -3912,8 +3918,8 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
spin_lock(&space_info->lock);
space_info->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
- (u64)space_info,
- num_bytes, 0);
+ (u64)(unsigned long)space_info,
+ num_bytes, 0);
space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
@@ -4104,7 +4110,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
num_bytes += div64_u64(data_used + meta_used, 50);
if (num_bytes * 3 > meta_used)
- num_bytes = div64_u64(meta_used, 3);
+ num_bytes = div64_u64(meta_used, 3) * 2;
return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
}
@@ -4131,14 +4137,14 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
block_rsv->reserved += num_bytes;
sinfo->bytes_may_use += num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
- (u64)sinfo, num_bytes, 1);
+ (u64)(unsigned long)sinfo, num_bytes, 1);
}
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
- (u64)sinfo, num_bytes, 0);
+ (u64)(unsigned long)sinfo, num_bytes, 0);
sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
@@ -4191,7 +4197,8 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
if (!trans->bytes_reserved)
return;
- trace_btrfs_space_reservation(root->fs_info, "transaction", (u64)trans,
+ trace_btrfs_space_reservation(root->fs_info, "transaction",
+ (u64)(unsigned long)trans,
trans->bytes_reserved, 0);
btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
trans->bytes_reserved = 0;
@@ -4709,9 +4716,9 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_reserved += num_bytes;
if (reserve == RESERVE_ALLOC) {
trace_btrfs_space_reservation(cache->fs_info,
- "space_info",
- (u64)space_info,
- num_bytes, 0);
+ "space_info",
+ (u64)(unsigned long)space_info,
+ num_bytes, 0);
space_info->bytes_may_use -= num_bytes;
}
}
@@ -5794,6 +5801,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
u64 search_end, struct btrfs_key *ins,
u64 data)
{
+ bool final_tried = false;
int ret;
u64 search_start = 0;
@@ -5813,22 +5821,25 @@ again:
search_start, search_end, hint_byte,
ins, data);
- if (ret == -ENOSPC && num_bytes > min_alloc_size) {
- num_bytes = num_bytes >> 1;
- num_bytes = num_bytes & ~(root->sectorsize - 1);
- num_bytes = max(num_bytes, min_alloc_size);
- do_chunk_alloc(trans, root->fs_info->extent_root,
- num_bytes, data, CHUNK_ALLOC_FORCE);
- goto again;
- }
- if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
- struct btrfs_space_info *sinfo;
-
- sinfo = __find_space_info(root->fs_info, data);
- printk(KERN_ERR "btrfs allocation failed flags %llu, "
- "wanted %llu\n", (unsigned long long)data,
- (unsigned long long)num_bytes);
- dump_space_info(sinfo, num_bytes, 1);
+ if (ret == -ENOSPC) {
+ if (!final_tried) {
+ num_bytes = num_bytes >> 1;
+ num_bytes = num_bytes & ~(root->sectorsize - 1);
+ num_bytes = max(num_bytes, min_alloc_size);
+ do_chunk_alloc(trans, root->fs_info->extent_root,
+ num_bytes, data, CHUNK_ALLOC_FORCE);
+ if (num_bytes == min_alloc_size)
+ final_tried = true;
+ goto again;
+ } else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ struct btrfs_space_info *sinfo;
+
+ sinfo = __find_space_info(root->fs_info, data);
+ printk(KERN_ERR "btrfs allocation failed flags %llu, "
+ "wanted %llu\n", (unsigned long long)data,
+ (unsigned long long)num_bytes);
+ dump_space_info(sinfo, num_bytes, 1);
+ }
}
trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
@@ -7881,9 +7892,16 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
u64 start;
u64 end;
u64 trimmed = 0;
+ u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
int ret = 0;
- cache = btrfs_lookup_block_group(fs_info, range->start);
+ /*
+ * try to trim all FS space, our block group may start from non-zero.
+ */
+ if (range->len == total_bytes)
+ cache = btrfs_lookup_first_block_group(fs_info, range->start);
+ else
+ cache = btrfs_lookup_block_group(fs_info, range->start);
while (cache) {
if (cache->key.objectid >= (range->start + range->len)) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 9d09a4f8187..a55fbe6252d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -513,6 +513,15 @@ hit_next:
WARN_ON(state->end < start);
last_end = state->end;
+ if (state->end < end && !need_resched())
+ next_node = rb_next(&state->rb_node);
+ else
+ next_node = NULL;
+
+ /* the state doesn't have the wanted bits, go ahead */
+ if (!(state->state & bits))
+ goto next;
+
/*
* | ---- desired range ---- |
* | state | or
@@ -565,20 +574,15 @@ hit_next:
goto out;
}
- if (state->end < end && prealloc && !need_resched())
- next_node = rb_next(&state->rb_node);
- else
- next_node = NULL;
-
set |= clear_state_bit(tree, state, &bits, wake);
+next:
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
if (start <= end && next_node) {
state = rb_entry(next_node, struct extent_state,
rb_node);
- if (state->start == start)
- goto hit_next;
+ goto hit_next;
}
goto search_again;
@@ -961,8 +965,6 @@ hit_next:
set_state_bits(tree, state, &bits);
clear_state_bit(tree, state, &clear_bits, 0);
-
- merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
@@ -1007,7 +1009,6 @@ hit_next:
if (state->end <= end) {
set_state_bits(tree, state, &bits);
clear_state_bit(tree, state, &clear_bits, 0);
- merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
@@ -1068,8 +1069,6 @@ hit_next:
set_state_bits(tree, prealloc, &bits);
clear_state_bit(tree, prealloc, &clear_bits, 0);
-
- merge_state(tree, prealloc);
prealloc = NULL;
goto out;
}
@@ -2154,13 +2153,46 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
"this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
failrec->this_mirror, num_copies, failrec->in_validation);
- tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror,
- failrec->bio_flags, 0);
- return 0;
+ ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
+ failrec->this_mirror,
+ failrec->bio_flags, 0);
+ return ret;
}
/* lots and lots of room for performance fixes in the end_bio funcs */
+int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
+{
+ int uptodate = (err == 0);
+ struct extent_io_tree *tree;
+ int ret;
+
+ tree = &BTRFS_I(page->mapping->host)->io_tree;
+
+ if (tree->ops && tree->ops->writepage_end_io_hook) {
+ ret = tree->ops->writepage_end_io_hook(page, start,
+ end, NULL, uptodate);
+ if (ret)
+ uptodate = 0;
+ }
+
+ if (!uptodate && tree->ops &&
+ tree->ops->writepage_io_failed_hook) {
+ ret = tree->ops->writepage_io_failed_hook(NULL, page,
+ start, end, NULL);
+ /* Writeback already completed */
+ if (ret == 0)
+ return 1;
+ }
+
+ if (!uptodate) {
+ clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
+ ClearPageUptodate(page);
+ SetPageError(page);
+ }
+ return 0;
+}
+
/*
* after a writepage IO is done, we need to:
* clear the uptodate bits on error
@@ -2172,13 +2204,11 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
*/
static void end_bio_extent_writepage(struct bio *bio, int err)
{
- int uptodate = err == 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct extent_io_tree *tree;
u64 start;
u64 end;
int whole_page;
- int ret;
do {
struct page *page = bvec->bv_page;
@@ -2195,28 +2225,9 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
- if (tree->ops && tree->ops->writepage_end_io_hook) {
- ret = tree->ops->writepage_end_io_hook(page, start,
- end, NULL, uptodate);
- if (ret)
- uptodate = 0;
- }
-
- if (!uptodate && tree->ops &&
- tree->ops->writepage_io_failed_hook) {
- ret = tree->ops->writepage_io_failed_hook(bio, page,
- start, end, NULL);
- if (ret == 0) {
- uptodate = (err == 0);
- continue;
- }
- }
- if (!uptodate) {
- clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
- ClearPageUptodate(page);
- SetPageError(page);
- }
+ if (end_extent_writepage(page, err, start, end))
+ continue;
if (whole_page)
end_page_writeback(page);
@@ -2779,9 +2790,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
delalloc_start = delalloc_end + 1;
continue;
}
- tree->ops->fill_delalloc(inode, page, delalloc_start,
- delalloc_end, &page_started,
- &nr_written);
+ ret = tree->ops->fill_delalloc(inode, page,
+ delalloc_start,
+ delalloc_end,
+ &page_started,
+ &nr_written);
+ BUG_ON(ret);
/*
* delalloc_end is already one less than the total
* length, so we don't subtract one from
@@ -2818,8 +2832,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (tree->ops && tree->ops->writepage_start_hook) {
ret = tree->ops->writepage_start_hook(page, start,
page_end);
- if (ret == -EAGAIN) {
- redirty_page_for_writepage(wbc, page);
+ if (ret) {
+ /* Fixup worker will requeue */
+ if (ret == -EBUSY)
+ wbc->pages_skipped++;
+ else
+ redirty_page_for_writepage(wbc, page);
update_nr_written(page, wbc, nr_written);
unlock_page(page);
ret = 0;
@@ -3289,7 +3307,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
len = end - start + 1;
write_lock(&map->lock);
em = lookup_extent_mapping(map, start, len);
- if (IS_ERR_OR_NULL(em)) {
+ if (!em) {
write_unlock(&map->lock);
break;
}
@@ -3853,10 +3871,9 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
num_pages = num_extent_pages(eb->start, eb->len);
clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
- if (eb_straddles_pages(eb)) {
- clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
- cached_state, GFP_NOFS);
- }
+ clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+ cached_state, GFP_NOFS);
+
for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
if (page)
@@ -3909,6 +3926,8 @@ int extent_range_uptodate(struct extent_io_tree *tree,
while (start <= end) {
index = start >> PAGE_CACHE_SHIFT;
page = find_get_page(tree->mapping, index);
+ if (!page)
+ return 1;
uptodate = PageUptodate(page);
page_cache_release(page);
if (!uptodate) {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bc6a042cb6f..cecc3518c12 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -319,4 +319,5 @@ struct btrfs_mapping_tree;
int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
u64 length, u64 logical, struct page *page,
int mirror_num);
+int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
#endif
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 33a7890b1f4..1195f09761f 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -26,8 +26,8 @@ struct extent_map {
unsigned long flags;
struct block_device *bdev;
atomic_t refs;
- unsigned int in_tree:1;
- unsigned int compress_type:4;
+ unsigned int in_tree;
+ unsigned int compress_type;
};
struct extent_map_tree {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 859ba2dd889..e8d06b6b919 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1605,6 +1605,14 @@ static long btrfs_fallocate(struct file *file, int mode,
return -EOPNOTSUPP;
/*
+ * Make sure we have enough space before we do the
+ * allocation.
+ */
+ ret = btrfs_check_data_free_space(inode, len);
+ if (ret)
+ return ret;
+
+ /*
* wait for ordered IO before we have any locks. We'll loop again
* below with the locks held.
*/
@@ -1667,27 +1675,12 @@ static long btrfs_fallocate(struct file *file, int mode,
if (em->block_start == EXTENT_MAP_HOLE ||
(cur_offset >= inode->i_size &&
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
-
- /*
- * Make sure we have enough space before we do the
- * allocation.
- */
- ret = btrfs_check_data_free_space(inode, last_byte -
- cur_offset);
- if (ret) {
- free_extent_map(em);
- break;
- }
-
ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
last_byte - cur_offset,
1 << inode->i_blkbits,
offset + len,
&alloc_hint);
- /* Let go of our reservation. */
- btrfs_free_reserved_data_space(inode, last_byte -
- cur_offset);
if (ret < 0) {
free_extent_map(em);
break;
@@ -1715,6 +1708,8 @@ static long btrfs_fallocate(struct file *file, int mode,
&cached_state, GFP_NOFS);
out:
mutex_unlock(&inode->i_mutex);
+ /* Let go of our reservation. */
+ btrfs_free_reserved_data_space(inode, len);
return ret;
}
@@ -1761,7 +1756,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
start - root->sectorsize,
root->sectorsize, 0);
if (IS_ERR(em)) {
- ret = -ENXIO;
+ ret = PTR_ERR(em);
goto out;
}
last_end = em->start + em->len;
@@ -1773,7 +1768,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
while (1) {
em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
if (IS_ERR(em)) {
- ret = -ENXIO;
+ ret = PTR_ERR(em);
break;
}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d20ff87ca60..710ea380c7e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -777,6 +777,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
spin_lock(&block_group->lock);
if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
spin_unlock(&block_group->lock);
+ btrfs_free_path(path);
goto out;
}
spin_unlock(&block_group->lock);
@@ -2242,7 +2243,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
if (entry->bitmap) {
ret = btrfs_alloc_from_bitmap(block_group,
cluster, entry, bytes,
- min_start);
+ cluster->window_start);
if (ret == 0) {
node = rb_next(&entry->offset_index);
if (!node)
@@ -2251,6 +2252,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
offset_index);
continue;
}
+ cluster->window_start += bytes;
} else {
ret = entry->offset;
@@ -2475,7 +2477,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
}
list_for_each_entry(entry, bitmaps, list) {
- if (entry->bytes < min_bytes)
+ if (entry->bytes < bytes)
continue;
ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
bytes, cont1_bytes, min_bytes);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 213ffa86ce1..ee15d88b33d 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -438,7 +438,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
trans->bytes_reserved);
if (ret)
goto out;
- trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans,
+ trace_btrfs_space_reservation(root->fs_info, "ino_cache",
+ (u64)(unsigned long)trans,
trans->bytes_reserved, 1);
again:
inode = lookup_free_ino_inode(root, path);
@@ -500,7 +501,8 @@ again:
out_put:
iput(inode);
out_release:
- trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans,
+ trace_btrfs_space_reservation(root->fs_info, "ino_cache",
+ (u64)(unsigned long)trans,
trans->bytes_reserved, 0);
btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
out:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0da19a0ea00..892b34785cc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1555,6 +1555,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
struct inode *inode;
u64 page_start;
u64 page_end;
+ int ret;
fixup = container_of(work, struct btrfs_writepage_fixup, work);
page = fixup->page;
@@ -1582,12 +1583,21 @@ again:
page_end, &cached_state, GFP_NOFS);
unlock_page(page);
btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_put_ordered_extent(ordered);
goto again;
}
- BUG();
+ ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
+ if (ret) {
+ mapping_set_error(page->mapping, ret);
+ end_extent_writepage(page, ret, page_start, page_end);
+ ClearPageChecked(page);
+ goto out;
+ }
+
btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
ClearPageChecked(page);
+ set_page_dirty(page);
out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state, GFP_NOFS);
@@ -1630,7 +1640,7 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
fixup->work.func = btrfs_writepage_fixup_worker;
fixup->page = page;
btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
- return -EAGAIN;
+ return -EBUSY;
}
static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
@@ -4575,7 +4585,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
ret = btrfs_insert_dir_item(trans, root, name, name_len,
parent_inode, &key,
btrfs_inode_type(inode), index);
- BUG_ON(ret);
+ if (ret)
+ goto fail_dir_item;
btrfs_i_size_write(parent_inode, parent_inode->i_size +
name_len * 2);
@@ -4583,6 +4594,23 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
ret = btrfs_update_inode(trans, root, parent_inode);
}
return ret;
+
+fail_dir_item:
+ if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
+ u64 local_index;
+ int err;
+ err = btrfs_del_root_ref(trans, root->fs_info->tree_root,
+ key.objectid, root->root_key.objectid,
+ parent_ino, &local_index, name, name_len);
+
+ } else if (add_backref) {
+ u64 local_index;
+ int err;
+
+ err = btrfs_del_inode_ref(trans, root, name, name_len,
+ ino, parent_ino, &local_index);
+ }
+ return ret;
}
static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
@@ -6401,18 +6429,23 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
unsigned long zero_start;
loff_t size;
int ret;
+ int reserved = 0;
u64 page_start;
u64 page_end;
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
- if (!ret)
+ if (!ret) {
ret = btrfs_update_time(vma->vm_file);
+ reserved = 1;
+ }
if (ret) {
if (ret == -ENOMEM)
ret = VM_FAULT_OOM;
else /* -ENOSPC, -EIO, etc */
ret = VM_FAULT_SIGBUS;
- goto out;
+ if (reserved)
+ goto out;
+ goto out_noreserve;
}
ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
@@ -6495,6 +6528,7 @@ out_unlock:
unlock_page(page);
out:
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
+out_noreserve:
return ret;
}
@@ -6690,8 +6724,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
int err;
u64 index = 0;
- inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
- new_dirid, S_IFDIR | 0700, &index);
+ inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
+ new_dirid, new_dirid,
+ S_IFDIR | (~current_umask() & S_IRWXUGO),
+ &index);
if (IS_ERR(inode))
return PTR_ERR(inode);
inode->i_op = &btrfs_dir_inode_operations;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ab620014bcc..d8b54715c2d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -861,6 +861,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
int i_done;
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
+ struct extent_io_tree *tree;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
if (isize == 0)
@@ -871,18 +872,34 @@ static int cluster_pages_for_defrag(struct inode *inode,
num_pages << PAGE_CACHE_SHIFT);
if (ret)
return ret;
-again:
- ret = 0;
i_done = 0;
+ tree = &BTRFS_I(inode)->io_tree;
/* step one, lock all the pages */
for (i = 0; i < num_pages; i++) {
struct page *page;
+again:
page = find_or_create_page(inode->i_mapping,
- start_index + i, mask);
+ start_index + i, mask);
if (!page)
break;
+ page_start = page_offset(page);
+ page_end = page_start + PAGE_CACHE_SIZE - 1;
+ while (1) {
+ lock_extent(tree, page_start, page_end, GFP_NOFS);
+ ordered = btrfs_lookup_ordered_extent(inode,
+ page_start);
+ unlock_extent(tree, page_start, page_end, GFP_NOFS);
+ if (!ordered)
+ break;
+
+ unlock_page(page);
+ btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_put_ordered_extent(ordered);
+ lock_page(page);
+ }
+
if (!PageUptodate(page)) {
btrfs_readpage(NULL, page);
lock_page(page);
@@ -893,15 +910,22 @@ again:
break;
}
}
+
isize = i_size_read(inode);
file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
- if (!isize || page->index > file_end ||
- page->mapping != inode->i_mapping) {
+ if (!isize || page->index > file_end) {
/* whoops, we blew past eof, skip this page */
unlock_page(page);
page_cache_release(page);
break;
}
+
+ if (page->mapping != inode->i_mapping) {
+ unlock_page(page);
+ page_cache_release(page);
+ goto again;
+ }
+
pages[i] = page;
i_done++;
}
@@ -924,25 +948,6 @@ again:
lock_extent_bits(&BTRFS_I(inode)->io_tree,
page_start, page_end - 1, 0, &cached_state,
GFP_NOFS);
- ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1);
- if (ordered &&
- ordered->file_offset + ordered->len > page_start &&
- ordered->file_offset < page_end) {
- btrfs_put_ordered_extent(ordered);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree,
- page_start, page_end - 1,
- &cached_state, GFP_NOFS);
- for (i = 0; i < i_done; i++) {
- unlock_page(pages[i]);
- page_cache_release(pages[i]);
- }
- btrfs_wait_ordered_range(inode, page_start,
- page_end - page_start);
- goto again;
- }
- if (ordered)
- btrfs_put_ordered_extent(ordered);
-
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
@@ -1065,7 +1070,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
i = range->start >> PAGE_CACHE_SHIFT;
}
if (!max_to_defrag)
- max_to_defrag = last_index;
+ max_to_defrag = last_index + 1;
/*
* make writeback starts from i, so the defrag range can be
@@ -1327,6 +1332,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
goto out;
}
+ if (name[0] == '.' &&
+ (namelen == 1 || (name[1] == '.' && namelen == 2))) {
+ ret = -EEXIST;
+ goto out;
+ }
+
if (subvol) {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
NULL, transid, readonly);
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 2373b39a132..22db04550f6 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -305,7 +305,7 @@ again:
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&dev->reada_zones,
- (unsigned long)zone->end >> PAGE_CACHE_SHIFT,
+ (unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
zone);
spin_unlock(&fs_info->reada_lock);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 9770cc5bfb7..abc0fbffa51 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1367,7 +1367,8 @@ out:
}
static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
- u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
+ u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length,
+ u64 dev_offset)
{
struct btrfs_mapping_tree *map_tree =
&sdev->dev->dev_root->fs_info->mapping_tree;
@@ -1391,7 +1392,8 @@ static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
goto out;
for (i = 0; i < map->num_stripes; ++i) {
- if (map->stripes[i].dev == sdev->dev) {
+ if (map->stripes[i].dev == sdev->dev &&
+ map->stripes[i].physical == dev_offset) {
ret = scrub_stripe(sdev, map, i, chunk_offset, length);
if (ret)
goto out;
@@ -1487,7 +1489,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
break;
}
ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
- chunk_offset, length);
+ chunk_offset, length, found_key.offset);
btrfs_put_block_group(cache);
if (ret)
break;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 287a6728b1a..04b77e3ceb7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -327,7 +327,8 @@ again:
if (num_bytes) {
trace_btrfs_space_reservation(root->fs_info, "transaction",
- (u64)h, num_bytes, 1);
+ (u64)(unsigned long)h,
+ num_bytes, 1);
h->block_rsv = &root->fs_info->trans_block_rsv;
h->bytes_reserved = num_bytes;
}
@@ -915,7 +916,11 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
dentry->d_name.name, dentry->d_name.len,
parent_inode, &key,
BTRFS_FT_DIR, index);
- BUG_ON(ret);
+ if (ret) {
+ pending->error = -EEXIST;
+ dput(parent);
+ goto fail;
+ }
btrfs_i_size_write(parent_inode, parent_inode->i_size +
dentry->d_name.len * 2);
@@ -993,12 +998,9 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
{
struct btrfs_pending_snapshot *pending;
struct list_head *head = &trans->transaction->pending_snapshots;
- int ret;
- list_for_each_entry(pending, head, list) {
- ret = create_pending_snapshot(trans, fs_info, pending);
- BUG_ON(ret);
- }
+ list_for_each_entry(pending, head, list)
+ create_pending_snapshot(trans, fs_info, pending);
return 0;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index cb877e0886a..966cc74f5d6 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1957,7 +1957,8 @@ static int wait_log_commit(struct btrfs_trans_handle *trans,
finish_wait(&root->log_commit_wait[index], &wait);
mutex_lock(&root->log_mutex);
- } while (root->log_transid < transid + 2 &&
+ } while (root->fs_info->last_trans_log_full_commit !=
+ trans->transid && root->log_transid < transid + 2 &&
atomic_read(&root->log_commit[index]));
return 0;
}
@@ -1966,7 +1967,8 @@ static int wait_for_writer(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
DEFINE_WAIT(wait);
- while (atomic_read(&root->log_writers)) {
+ while (root->fs_info->last_trans_log_full_commit !=
+ trans->transid && atomic_read(&root->log_writers)) {
prepare_to_wait(&root->log_writer_wait,
&wait, TASK_UNINTERRUPTIBLE);
mutex_unlock(&root->log_mutex);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0b4e2af7954..ef41f285a47 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -459,12 +459,23 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
{
struct btrfs_device *device, *next;
+ struct block_device *latest_bdev = NULL;
+ u64 latest_devid = 0;
+ u64 latest_transid = 0;
+
mutex_lock(&uuid_mutex);
again:
/* This is the initialized path, it is safe to release the devices. */
list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
- if (device->in_fs_metadata)
+ if (device->in_fs_metadata) {
+ if (!latest_transid ||
+ device->generation > latest_transid) {
+ latest_devid = device->devid;
+ latest_transid = device->generation;
+ latest_bdev = device->bdev;
+ }
continue;
+ }
if (device->bdev) {
blkdev_put(device->bdev, device->mode);
@@ -487,6 +498,10 @@ again:
goto again;
}
+ fs_devices->latest_bdev = latest_bdev;
+ fs_devices->latest_devid = latest_devid;
+ fs_devices->latest_trans = latest_transid;
+
mutex_unlock(&uuid_mutex);
return 0;
}
@@ -1953,7 +1968,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
em = lookup_extent_mapping(em_tree, chunk_offset, 1);
read_unlock(&em_tree->lock);
- BUG_ON(em->start > chunk_offset ||
+ BUG_ON(!em || em->start > chunk_offset ||
em->start + em->len < chunk_offset);
map = (struct map_lookup *)em->bdev;
@@ -4356,6 +4371,20 @@ int btrfs_read_sys_array(struct btrfs_root *root)
return -ENOMEM;
btrfs_set_buffer_uptodate(sb);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
+ /*
+ * The sb extent buffer is artifical and just used to read the system array.
+ * btrfs_set_buffer_uptodate() call does not properly mark all it's
+ * pages up-to-date when the page is larger: extent does not cover the
+ * whole page and consequently check_page_uptodate does not find all
+ * the page's extents up-to-date (the hole beyond sb),
+ * write_extent_buffer then triggers a WARN_ON.
+ *
+ * Regular short extents go through mark_extent_buffer_dirty/writeback cycle,
+ * but sb spans only this function. Add an explicit SetPageUptodate call
+ * to silence the warning eg. on PowerPC 64.
+ */
+ if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
+ SetPageUptodate(sb->first_page);
write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
array_size = btrfs_super_sys_array_size(super_copy);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index b60fc8bfb3e..620daad201d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -641,10 +641,10 @@ static int __cap_is_valid(struct ceph_cap *cap)
unsigned long ttl;
u32 gen;
- spin_lock(&cap->session->s_cap_lock);
+ spin_lock(&cap->session->s_gen_ttl_lock);
gen = cap->session->s_cap_gen;
ttl = cap->session->s_cap_ttl;
- spin_unlock(&cap->session->s_cap_lock);
+ spin_unlock(&cap->session->s_gen_ttl_lock);
if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) {
dout("__cap_is_valid %p cap %p issued %s "
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 618246bc219..3e8094be460 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -975,10 +975,10 @@ static int dentry_lease_is_valid(struct dentry *dentry)
di = ceph_dentry(dentry);
if (di->lease_session) {
s = di->lease_session;
- spin_lock(&s->s_cap_lock);
+ spin_lock(&s->s_gen_ttl_lock);
gen = s->s_cap_gen;
ttl = s->s_cap_ttl;
- spin_unlock(&s->s_cap_lock);
+ spin_unlock(&s->s_gen_ttl_lock);
if (di->lease_gen == gen &&
time_before(jiffies, dentry->d_time) &&
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 23ab6a3f182..866e8d7ca37 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -262,6 +262,7 @@ static int parse_reply_info(struct ceph_msg *msg,
/* trace */
ceph_decode_32_safe(&p, end, len, bad);
if (len > 0) {
+ ceph_decode_need(&p, end, len, bad);
err = parse_reply_info_trace(&p, p+len, info, features);
if (err < 0)
goto out_bad;
@@ -270,6 +271,7 @@ static int parse_reply_info(struct ceph_msg *msg,
/* extra */
ceph_decode_32_safe(&p, end, len, bad);
if (len > 0) {
+ ceph_decode_need(&p, end, len, bad);
err = parse_reply_info_extra(&p, p+len, info, features);
if (err < 0)
goto out_bad;
@@ -398,9 +400,11 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
s->s_con.peer_name.num = cpu_to_le64(mds);
- spin_lock_init(&s->s_cap_lock);
+ spin_lock_init(&s->s_gen_ttl_lock);
s->s_cap_gen = 0;
s->s_cap_ttl = 0;
+
+ spin_lock_init(&s->s_cap_lock);
s->s_renew_requested = 0;
s->s_renew_seq = 0;
INIT_LIST_HEAD(&s->s_caps);
@@ -2326,10 +2330,10 @@ static void handle_session(struct ceph_mds_session *session,
case CEPH_SESSION_STALE:
pr_info("mds%d caps went stale, renewing\n",
session->s_mds);
- spin_lock(&session->s_cap_lock);
+ spin_lock(&session->s_gen_ttl_lock);
session->s_cap_gen++;
session->s_cap_ttl = 0;
- spin_unlock(&session->s_cap_lock);
+ spin_unlock(&session->s_gen_ttl_lock);
send_renew_caps(mdsc, session);
break;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index a50ca0e3947..8c7c04ebb59 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -117,10 +117,13 @@ struct ceph_mds_session {
void *s_authorizer_buf, *s_authorizer_reply_buf;
size_t s_authorizer_buf_len, s_authorizer_reply_buf_len;
- /* protected by s_cap_lock */
- spinlock_t s_cap_lock;
+ /* protected by s_gen_ttl_lock */
+ spinlock_t s_gen_ttl_lock;
u32 s_cap_gen; /* inc each time we get mds stale msg */
unsigned long s_cap_ttl; /* when session caps expire */
+
+ /* protected by s_cap_lock */
+ spinlock_t s_cap_lock;
struct list_head s_caps; /* all caps issued by this session */
int s_nr_caps, s_trim_caps;
int s_num_cap_releases;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 857214ae8c0..a76f697303d 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -111,8 +111,10 @@ static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
}
static struct ceph_vxattr_cb ceph_file_vxattrs[] = {
+ { true, "ceph.file.layout", ceph_vxattrcb_layout},
+ /* The following extended attribute name is deprecated */
{ true, "ceph.layout", ceph_vxattrcb_layout},
- { NULL, NULL }
+ { true, NULL, NULL }
};
static struct ceph_vxattr_cb *ceph_inode_vxattrs(struct inode *inode)
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 0554b00a7b3..2b243af70aa 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -139,7 +139,7 @@ config CIFS_DFS_UPCALL
points. If unsure, say N.
config CIFS_FSCACHE
- bool "Provide CIFS client caching support (EXPERIMENTAL)"
+ bool "Provide CIFS client caching support"
depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y
help
Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
@@ -147,7 +147,7 @@ config CIFS_FSCACHE
manager. If unsure, say N.
config CIFS_ACL
- bool "Provide CIFS ACL support (EXPERIMENTAL)"
+ bool "Provide CIFS ACL support"
depends on CIFS_XATTR && KEYS
help
Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 986709a8d90..602f77c304c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -773,10 +773,11 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
cifs_dump_mem("Bad SMB: ", buf,
min_t(unsigned int, server->total_read, 48));
- if (mid)
- handle_mid(mid, server, smb_buffer, length);
+ if (!mid)
+ return length;
- return length;
+ handle_mid(mid, server, smb_buffer, length);
+ return 0;
}
static int
@@ -2125,7 +2126,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
down_read(&key->sem);
upayload = key->payload.data;
if (IS_ERR_OR_NULL(upayload)) {
- rc = PTR_ERR(key);
+ rc = upayload ? PTR_ERR(upayload) : -EINVAL;
goto out_key_put;
}
@@ -2142,14 +2143,14 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
len = delim - payload;
if (len > MAX_USERNAME_SIZE || len <= 0) {
- cFYI(1, "Bad value from username search (len=%ld)", len);
+ cFYI(1, "Bad value from username search (len=%zd)", len);
rc = -EINVAL;
goto out_key_put;
}
vol->username = kstrndup(payload, len, GFP_KERNEL);
if (!vol->username) {
- cFYI(1, "Unable to allocate %ld bytes for username", len);
+ cFYI(1, "Unable to allocate %zd bytes for username", len);
rc = -ENOMEM;
goto out_key_put;
}
@@ -2157,7 +2158,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
len = key->datalen - (len + 1);
if (len > MAX_PASSWORD_SIZE || len <= 0) {
- cFYI(1, "Bad len for password search (len=%ld)", len);
+ cFYI(1, "Bad len for password search (len=%zd)", len);
rc = -EINVAL;
kfree(vol->username);
vol->username = NULL;
@@ -2167,7 +2168,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
++delim;
vol->password = kstrndup(delim, len, GFP_KERNEL);
if (!vol->password) {
- cFYI(1, "Unable to allocate %ld bytes for password", len);
+ cFYI(1, "Unable to allocate %zd bytes for password", len);
rc = -ENOMEM;
kfree(vol->username);
vol->username = NULL;
@@ -3857,10 +3858,8 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
struct smb_vol *vol_info;
vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL);
- if (vol_info == NULL) {
- tcon = ERR_PTR(-ENOMEM);
- goto out;
- }
+ if (vol_info == NULL)
+ return ERR_PTR(-ENOMEM);
vol_info->local_nls = cifs_sb->local_nls;
vol_info->linux_uid = fsuid;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index df8fecb5b99..bc7e24420ac 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -492,7 +492,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
{
int xid;
int rc = 0; /* to get around spurious gcc warning, set to zero here */
- __u32 oplock = 0;
+ __u32 oplock = enable_oplocks ? REQ_OPLOCK : 0;
__u16 fileHandle = 0;
bool posix_open = false;
struct cifs_sb_info *cifs_sb;
@@ -584,10 +584,26 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
* If either that or op not supported returned, follow
* the normal lookup.
*/
- if ((rc == 0) || (rc == -ENOENT))
+ switch (rc) {
+ case 0:
+ /*
+ * The server may allow us to open things like
+ * FIFOs, but the client isn't set up to deal
+ * with that. If it's not a regular file, just
+ * close it and proceed as if it were a normal
+ * lookup.
+ */
+ if (newInode && !S_ISREG(newInode->i_mode)) {
+ CIFSSMBClose(xid, pTcon, fileHandle);
+ break;
+ }
+ case -ENOENT:
posix_open = true;
- else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP))
+ case -EOPNOTSUPP:
+ break;
+ default:
pTcon->broken_posix_open = true;
+ }
}
if (!posix_open)
rc = cifs_get_inode_info_unix(&newInode, full_path,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a5f54b7d982..745da3d0653 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -534,6 +534,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
fattr->cf_dtype = DT_DIR;
+ /*
+ * Server can return wrong NumberOfLinks value for directories
+ * when Unix extensions are disabled - fake it.
+ */
+ fattr->cf_nlink = 2;
} else {
fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
fattr->cf_dtype = DT_REG;
@@ -541,9 +546,9 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
/* clear write bits if ATTR_READONLY is set */
if (fattr->cf_cifsattrs & ATTR_READONLY)
fattr->cf_mode &= ~(S_IWUGO);
- }
- fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
+ fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
+ }
fattr->cf_uid = cifs_sb->mnt_uid;
fattr->cf_gid = cifs_sb->mnt_gid;
@@ -1322,7 +1327,6 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
}
/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need
to set uid/gid */
- inc_nlink(inode);
cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
cifs_fill_uniqueid(inode->i_sb, &fattr);
@@ -1355,7 +1359,6 @@ mkdir_retry_old:
d_drop(direntry);
} else {
mkdir_get_info:
- inc_nlink(inode);
if (pTcon->unix_ext)
rc = cifs_get_inode_info_unix(&newinode, full_path,
inode->i_sb, xid);
@@ -1436,6 +1439,11 @@ mkdir_get_info:
}
}
mkdir_out:
+ /*
+ * Force revalidate to get parent dir info when needed since cached
+ * attributes are invalid now.
+ */
+ CIFS_I(inode)->time = 0;
kfree(full_path);
FreeXid(xid);
cifs_put_tlink(tlink);
@@ -1475,7 +1483,6 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
cifs_put_tlink(tlink);
if (!rc) {
- drop_nlink(inode);
spin_lock(&direntry->d_inode->i_lock);
i_size_write(direntry->d_inode, 0);
clear_nlink(direntry->d_inode);
@@ -1483,12 +1490,15 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
}
cifsInode = CIFS_I(direntry->d_inode);
- cifsInode->time = 0; /* force revalidate to go get info when
- needed */
+ /* force revalidate to go get info when needed */
+ cifsInode->time = 0;
cifsInode = CIFS_I(inode);
- cifsInode->time = 0; /* force revalidate to get parent dir info
- since cached search results now invalid */
+ /*
+ * Force revalidate to get parent dir info when needed since cached
+ * attributes are invalid now.
+ */
+ cifsInode->time = 0;
direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime =
current_fs_time(inode->i_sb);
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index d85efad5765..551d0c2b973 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -246,16 +246,15 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
/* copy user */
/* BB what about null user mounts - check that we do this BB */
/* copy user */
- if (ses->user_name != NULL)
+ if (ses->user_name != NULL) {
strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE);
+ bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
+ }
/* else null user mount */
-
- bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
*bcc_ptr = 0;
bcc_ptr++; /* account for null termination */
/* copy domain */
-
if (ses->domainName != NULL) {
strncpy(bcc_ptr, ses->domainName, 256);
bcc_ptr += strnlen(ses->domainName, 256);
@@ -395,6 +394,10 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
tioffset = le32_to_cpu(pblob->TargetInfoArray.BufferOffset);
tilen = le16_to_cpu(pblob->TargetInfoArray.Length);
+ if (tioffset > blob_len || tioffset + tilen > blob_len) {
+ cERROR(1, "tioffset + tilen too high %u + %u", tioffset, tilen);
+ return -EINVAL;
+ }
if (tilen) {
ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
if (!ses->auth_key.response) {
diff --git a/fs/compat.c b/fs/compat.c
index fa9d721ecfe..07880bae28a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -131,41 +131,35 @@ asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_tim
static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
{
- compat_ino_t ino = stat->ino;
- typeof(ubuf->st_uid) uid = 0;
- typeof(ubuf->st_gid) gid = 0;
- int err;
+ struct compat_stat tmp;
- SET_UID(uid, stat->uid);
- SET_GID(gid, stat->gid);
+ if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
+ return -EOVERFLOW;
- if ((u64) stat->size > MAX_NON_LFS ||
- !old_valid_dev(stat->dev) ||
- !old_valid_dev(stat->rdev))
+ memset(&tmp, 0, sizeof(tmp));
+ tmp.st_dev = old_encode_dev(stat->dev);
+ tmp.st_ino = stat->ino;
+ if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
return -EOVERFLOW;
- if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino)
+ tmp.st_mode = stat->mode;
+ tmp.st_nlink = stat->nlink;
+ if (tmp.st_nlink != stat->nlink)
return -EOVERFLOW;
-
- if (clear_user(ubuf, sizeof(*ubuf)))
- return -EFAULT;
-
- err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev);
- err |= __put_user(ino, &ubuf->st_ino);
- err |= __put_user(stat->mode, &ubuf->st_mode);
- err |= __put_user(stat->nlink, &ubuf->st_nlink);
- err |= __put_user(uid, &ubuf->st_uid);
- err |= __put_user(gid, &ubuf->st_gid);
- err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev);
- err |= __put_user(stat->size, &ubuf->st_size);
- err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime);
- err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec);
- err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime);
- err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
- err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
- err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
- err |= __put_user(stat->blksize, &ubuf->st_blksize);
- err |= __put_user(stat->blocks, &ubuf->st_blocks);
- return err;
+ SET_UID(tmp.st_uid, stat->uid);
+ SET_GID(tmp.st_gid, stat->gid);
+ tmp.st_rdev = old_encode_dev(stat->rdev);
+ if ((u64) stat->size > MAX_NON_LFS)
+ return -EOVERFLOW;
+ tmp.st_size = stat->size;
+ tmp.st_atime = stat->atime.tv_sec;
+ tmp.st_atime_nsec = stat->atime.tv_nsec;
+ tmp.st_mtime = stat->mtime.tv_sec;
+ tmp.st_mtime_nsec = stat->mtime.tv_nsec;
+ tmp.st_ctime = stat->ctime.tv_sec;
+ tmp.st_ctime_nsec = stat->ctime.tv_nsec;
+ tmp.st_blocks = stat->blocks;
+ tmp.st_blksize = stat->blksize;
+ return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}
asmlinkage long compat_sys_newstat(const char __user * filename,
diff --git a/fs/dcache.c b/fs/dcache.c
index 16a53cc2cc0..bcbdb33fcc2 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -104,7 +104,7 @@ static unsigned int d_hash_shift __read_mostly;
static struct hlist_bl_head *dentry_hashtable __read_mostly;
-static inline struct hlist_bl_head *d_hash(struct dentry *parent,
+static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
unsigned long hash)
{
hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
@@ -137,6 +137,26 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
}
#endif
+/*
+ * Compare 2 name strings, return 0 if they match, otherwise non-zero.
+ * The strings are both count bytes long, and count is non-zero.
+ */
+static inline int dentry_cmp(const unsigned char *cs, size_t scount,
+ const unsigned char *ct, size_t tcount)
+{
+ if (scount != tcount)
+ return 1;
+
+ do {
+ if (*cs != *ct)
+ return 1;
+ cs++;
+ ct++;
+ tcount--;
+ } while (tcount);
+ return 0;
+}
+
static void __d_free(struct rcu_head *head)
{
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
@@ -1717,8 +1737,9 @@ EXPORT_SYMBOL(d_add_ci);
* child is looked up. Thus, an interlocking stepping of sequence lock checks
* is formed, giving integrity down the path walk.
*/
-struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
- unsigned *seq, struct inode **inode)
+struct dentry *__d_lookup_rcu(const struct dentry *parent,
+ const struct qstr *name,
+ unsigned *seqp, struct inode **inode)
{
unsigned int len = name->len;
unsigned int hash = name->hash;
@@ -1748,6 +1769,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
* See Documentation/filesystems/path-lookup.txt for more details.
*/
hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
+ unsigned seq;
struct inode *i;
const char *tname;
int tlen;
@@ -1756,7 +1778,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
continue;
seqretry:
- *seq = read_seqcount_begin(&dentry->d_seq);
+ seq = read_seqcount_begin(&dentry->d_seq);
if (dentry->d_parent != parent)
continue;
if (d_unhashed(dentry))
@@ -1771,7 +1793,7 @@ seqretry:
* edge of memory when walking. If we could load this
* atomically some other way, we could drop this check.
*/
- if (read_seqcount_retry(&dentry->d_seq, *seq))
+ if (read_seqcount_retry(&dentry->d_seq, seq))
goto seqretry;
if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
if (parent->d_op->d_compare(parent, *inode,
@@ -1788,6 +1810,7 @@ seqretry:
* order to do anything useful with the returned dentry
* anyway.
*/
+ *seqp = seq;
*inode = i;
return dentry;
}
@@ -2968,7 +2991,7 @@ __setup("dhash_entries=", set_dhash_entries);
static void __init dcache_init_early(void)
{
- int loop;
+ unsigned int loop;
/* If hashes are distributed across NUMA nodes, defer
* hash allocation until vmalloc space is available.
@@ -2986,13 +3009,13 @@ static void __init dcache_init_early(void)
&d_hash_mask,
0);
- for (loop = 0; loop < (1 << d_hash_shift); loop++)
+ for (loop = 0; loop < (1U << d_hash_shift); loop++)
INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
}
static void __init dcache_init(void)
{
- int loop;
+ unsigned int loop;
/*
* A constructor could be added for stable state like the lists,
@@ -3016,7 +3039,7 @@ static void __init dcache_init(void)
&d_hash_mask,
0);
- for (loop = 0; loop < (1 << d_hash_shift); loop++)
+ for (loop = 0; loop < (1U << d_hash_shift); loop++)
INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
}
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 4a588dbd11b..f4aadd15b61 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -173,7 +173,7 @@ void inode_dio_wait(struct inode *inode)
if (atomic_read(&inode->i_dio_count))
__inode_dio_wait(inode);
}
-EXPORT_SYMBOL_GPL(inode_dio_wait);
+EXPORT_SYMBOL(inode_dio_wait);
/*
* inode_dio_done - signal finish of a direct I/O requests
@@ -187,7 +187,7 @@ void inode_dio_done(struct inode *inode)
if (atomic_dec_and_test(&inode->i_dio_count))
wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
}
-EXPORT_SYMBOL_GPL(inode_dio_done);
+EXPORT_SYMBOL(inode_dio_done);
/*
* How many pages are in the queue?
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 63ab2451064..ea993128155 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1990,6 +1990,17 @@ out:
return;
}
+static size_t ecryptfs_max_decoded_size(size_t encoded_size)
+{
+ /* Not exact; conservatively long. Every block of 4
+ * encoded characters decodes into a block of 3
+ * decoded characters. This segment of code provides
+ * the caller with the maximum amount of allocated
+ * space that @dst will need to point to in a
+ * subsequent call. */
+ return ((encoded_size + 1) * 3) / 4;
+}
+
/**
* ecryptfs_decode_from_filename
* @dst: If NULL, this function only sets @dst_size and returns. If
@@ -2008,13 +2019,7 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
size_t dst_byte_offset = 0;
if (dst == NULL) {
- /* Not exact; conservatively long. Every block of 4
- * encoded characters decodes into a block of 3
- * decoded characters. This segment of code provides
- * the caller with the maximum amount of allocated
- * space that @dst will need to point to in a
- * subsequent call. */
- (*dst_size) = (((src_size + 1) * 3) / 4);
+ (*dst_size) = ecryptfs_max_decoded_size(src_size);
goto out;
}
while (src_byte_offset < src_size) {
@@ -2239,3 +2244,52 @@ out_free:
out:
return rc;
}
+
+#define ENC_NAME_MAX_BLOCKLEN_8_OR_16 143
+
+int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
+{
+ struct blkcipher_desc desc;
+ struct mutex *tfm_mutex;
+ size_t cipher_blocksize;
+ int rc;
+
+ if (!(mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)) {
+ (*namelen) = lower_namelen;
+ return 0;
+ }
+
+ rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex,
+ mount_crypt_stat->global_default_fn_cipher_name);
+ if (unlikely(rc)) {
+ (*namelen) = 0;
+ return rc;
+ }
+
+ mutex_lock(tfm_mutex);
+ cipher_blocksize = crypto_blkcipher_blocksize(desc.tfm);
+ mutex_unlock(tfm_mutex);
+
+ /* Return an exact amount for the common cases */
+ if (lower_namelen == NAME_MAX
+ && (cipher_blocksize == 8 || cipher_blocksize == 16)) {
+ (*namelen) = ENC_NAME_MAX_BLOCKLEN_8_OR_16;
+ return 0;
+ }
+
+ /* Return a safe estimate for the uncommon cases */
+ (*namelen) = lower_namelen;
+ (*namelen) -= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE;
+ /* Since this is the max decoded size, subtract 1 "decoded block" len */
+ (*namelen) = ecryptfs_max_decoded_size(*namelen) - 3;
+ (*namelen) -= ECRYPTFS_TAG_70_MAX_METADATA_SIZE;
+ (*namelen) -= ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES;
+ /* Worst case is that the filename is padded nearly a full block size */
+ (*namelen) -= cipher_blocksize - 1;
+
+ if ((*namelen) < 0)
+ (*namelen) = 0;
+
+ return 0;
+}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index a2362df58ae..867b64c5d84 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -162,6 +162,10 @@ ecryptfs_get_key_payload_data(struct key *key)
#define ECRYPTFS_NON_NULL 0x42 /* A reasonable substitute for NULL */
#define MD5_DIGEST_SIZE 16
#define ECRYPTFS_TAG_70_DIGEST_SIZE MD5_DIGEST_SIZE
+#define ECRYPTFS_TAG_70_MIN_METADATA_SIZE (1 + ECRYPTFS_MIN_PKT_LEN_SIZE \
+ + ECRYPTFS_SIG_SIZE + 1 + 1)
+#define ECRYPTFS_TAG_70_MAX_METADATA_SIZE (1 + ECRYPTFS_MAX_PKT_LEN_SIZE \
+ + ECRYPTFS_SIG_SIZE + 1 + 1)
#define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FEK_ENCRYPTED."
#define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX_SIZE 23
#define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FNEK_ENCRYPTED."
@@ -701,6 +705,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
size_t *packet_size,
struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
char *data, size_t max_packet_size);
+int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
loff_t offset);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 19892d7d2ed..ab35b113003 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -1085,6 +1085,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
}
rc = vfs_setxattr(lower_dentry, name, value, size, flags);
+ if (!rc)
+ fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode);
out:
return rc;
}
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 8e3b943e330..2333203a120 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -679,10 +679,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
* Octets N3-N4: Block-aligned encrypted filename
* - Consists of a minimum number of random characters, a \0
* separator, and then the filename */
- s->max_packet_size = (1 /* Tag 70 identifier */
- + 3 /* Max Tag 70 packet size */
- + ECRYPTFS_SIG_SIZE /* FNEK sig */
- + 1 /* Cipher identifier */
+ s->max_packet_size = (ECRYPTFS_TAG_70_MAX_METADATA_SIZE
+ s->block_aligned_filename_size);
if (dest == NULL) {
(*packet_size) = s->max_packet_size;
@@ -934,10 +931,10 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
goto out;
}
s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- if (max_packet_size < (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)) {
+ if (max_packet_size < ECRYPTFS_TAG_70_MIN_METADATA_SIZE) {
printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be "
"at least [%d]\n", __func__, max_packet_size,
- (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1));
+ ECRYPTFS_TAG_70_MIN_METADATA_SIZE);
rc = -EINVAL;
goto out;
}
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 349209dc6a9..3a06f4043df 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -429,7 +429,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
goto memdup;
} else if (count < MIN_MSG_PKT_SIZE || count > MAX_MSG_PKT_SIZE) {
printk(KERN_WARNING "%s: Acceptable packet size range is "
- "[%d-%lu], but amount of data written is [%zu].",
+ "[%d-%zu], but amount of data written is [%zu].",
__func__, MIN_MSG_PKT_SIZE, MAX_MSG_PKT_SIZE, count);
return -EINVAL;
}
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 10ec695ccd6..a46b3a8fee1 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -150,7 +150,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
/* This is a header extent */
char *page_virt;
- page_virt = kmap_atomic(page, KM_USER0);
+ page_virt = kmap_atomic(page);
memset(page_virt, 0, PAGE_CACHE_SIZE);
/* TODO: Support more than one header extent */
if (view_extent_num == 0) {
@@ -163,7 +163,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
crypt_stat,
&written);
}
- kunmap_atomic(page_virt, KM_USER0);
+ kunmap_atomic(page_virt);
flush_dcache_page(page);
if (rc) {
printk(KERN_ERR "%s: Error reading xattr "
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 5c0106f7577..b2a34a192f4 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -156,7 +156,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
ecryptfs_page_idx, rc);
goto out;
}
- ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
+ ecryptfs_page_virt = kmap_atomic(ecryptfs_page);
/*
* pos: where we're now writing, offset: where the request was
@@ -179,7 +179,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
(data + data_offset), num_bytes);
data_offset += num_bytes;
}
- kunmap_atomic(ecryptfs_page_virt, KM_USER0);
+ kunmap_atomic(ecryptfs_page_virt);
flush_dcache_page(ecryptfs_page);
SetPageUptodate(ecryptfs_page);
unlock_page(ecryptfs_page);
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 9df7fd6e0c3..cf152823bbf 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -30,6 +30,8 @@
#include <linux/seq_file.h>
#include <linux/file.h>
#include <linux/crypto.h>
+#include <linux/statfs.h>
+#include <linux/magic.h>
#include "ecryptfs_kernel.h"
struct kmem_cache *ecryptfs_inode_info_cache;
@@ -102,10 +104,20 @@ static void ecryptfs_destroy_inode(struct inode *inode)
static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ int rc;
if (!lower_dentry->d_sb->s_op->statfs)
return -ENOSYS;
- return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf);
+
+ rc = lower_dentry->d_sb->s_op->statfs(lower_dentry, buf);
+ if (rc)
+ return rc;
+
+ buf->f_type = ECRYPTFS_SUPER_MAGIC;
+ rc = ecryptfs_set_f_namelen(&buf->f_namelen, buf->f_namelen,
+ &ecryptfs_superblock_to_private(dentry->d_sb)->mount_crypt_stat);
+
+ return rc;
}
/**
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index aabdfc38cf2..ea54cdef04d 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -320,6 +320,11 @@ static inline int ep_is_linked(struct list_head *p)
return !list_empty(p);
}
+static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
+{
+ return container_of(p, struct eppoll_entry, wait);
+}
+
/* Get the "struct epitem" from a wait queue pointer */
static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
{
@@ -467,6 +472,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
put_cpu();
}
+static void ep_remove_wait_queue(struct eppoll_entry *pwq)
+{
+ wait_queue_head_t *whead;
+
+ rcu_read_lock();
+ /* If it is cleared by POLLFREE, it should be rcu-safe */
+ whead = rcu_dereference(pwq->whead);
+ if (whead)
+ remove_wait_queue(whead, &pwq->wait);
+ rcu_read_unlock();
+}
+
/*
* This function unregisters poll callbacks from the associated file
* descriptor. Must be called with "mtx" held (or "epmutex" if called from
@@ -481,7 +498,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
list_del(&pwq->llink);
- remove_wait_queue(pwq->whead, &pwq->wait);
+ ep_remove_wait_queue(pwq);
kmem_cache_free(pwq_cache, pwq);
}
}
@@ -842,6 +859,17 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
+ if ((unsigned long)key & POLLFREE) {
+ ep_pwq_from_wait(wait)->whead = NULL;
+ /*
+ * whead = NULL above can race with ep_remove_wait_queue()
+ * which can do another remove_wait_queue() after us, so we
+ * can't use __remove_wait_queue(). whead->lock is held by
+ * the caller.
+ */
+ list_del_init(&wait->task_list);
+ }
+
spin_lock_irqsave(&ep->lock, flags);
/*
diff --git a/fs/exec.c b/fs/exec.c
index aeb135c7ff5..153dee14fe5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1071,6 +1071,21 @@ void set_task_comm(struct task_struct *tsk, char *buf)
perf_event_comm(tsk);
}
+static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len)
+{
+ int i, ch;
+
+ /* Copies the binary name from after last slash */
+ for (i = 0; (ch = *(fn++)) != '\0';) {
+ if (ch == '/')
+ i = 0; /* overwrite what we wrote */
+ else
+ if (i < len - 1)
+ tcomm[i++] = ch;
+ }
+ tcomm[i] = '\0';
+}
+
int flush_old_exec(struct linux_binprm * bprm)
{
int retval;
@@ -1085,6 +1100,7 @@ int flush_old_exec(struct linux_binprm * bprm)
set_mm_exe_file(bprm->mm, bprm->file);
+ filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm));
/*
* Release all of the old mmap stuff
*/
@@ -1116,10 +1132,6 @@ EXPORT_SYMBOL(would_dump);
void setup_new_exec(struct linux_binprm * bprm)
{
- int i, ch;
- const char *name;
- char tcomm[sizeof(current->comm)];
-
arch_pick_mmap_layout(current->mm);
/* This is the point of no return */
@@ -1130,18 +1142,7 @@ void setup_new_exec(struct linux_binprm * bprm)
else
set_dumpable(current->mm, suid_dumpable);
- name = bprm->filename;
-
- /* Copies the binary name from after last slash */
- for (i=0; (ch = *(name++)) != '\0';) {
- if (ch == '/')
- i = 0; /* overwrite what we wrote */
- else
- if (i < (sizeof(tcomm) - 1))
- tcomm[i++] = ch;
- }
- tcomm[i] = '\0';
- set_task_comm(current, tcomm);
+ set_task_comm(current, bprm->tcomm);
/* Set the new mm task size. We have to do that late because it may
* depend on TIF_32BIT which is only updated in flush_thread() on
@@ -1914,7 +1915,6 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
- struct completion *vfork_done;
int core_waiters = -EBUSY;
init_completion(&core_state->startup);
@@ -1926,22 +1926,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
core_waiters = zap_threads(tsk, mm, core_state, exit_code);
up_write(&mm->mmap_sem);
- if (unlikely(core_waiters < 0))
- goto fail;
-
- /*
- * Make sure nobody is waiting for us to release the VM,
- * otherwise we can deadlock when we wait on each other
- */
- vfork_done = tsk->vfork_done;
- if (vfork_done) {
- tsk->vfork_done = NULL;
- complete(vfork_done);
- }
-
- if (core_waiters)
+ if (core_waiters > 0)
wait_for_completion(&core_state->startup);
-fail:
+
return core_waiters;
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f855916657b..5b4a9362d5a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -53,14 +53,6 @@ struct wb_writeback_work {
};
/*
- * Include the creation of the trace points after defining the
- * wb_writeback_work structure so that the definition remains local to this
- * file.
- */
-#define CREATE_TRACE_POINTS
-#include <trace/events/writeback.h>
-
-/*
* We don't actually have pdflush, but this one is exported though /proc...
*/
int nr_pdflush_threads;
@@ -92,6 +84,14 @@ static inline struct inode *wb_inode(struct list_head *head)
return list_entry(head, struct inode, i_wb_list);
}
+/*
+ * Include the creation of the trace points after defining the
+ * wb_writeback_work structure and inline functions so that the definition
+ * remains local to this file.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
+
/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
{
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 376816fcd04..351a3e79778 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -167,14 +167,19 @@ void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
spin_unlock(&lru_lock);
}
-static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
+static void __gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
{
- spin_lock(&lru_lock);
if (!list_empty(&gl->gl_lru)) {
list_del_init(&gl->gl_lru);
atomic_dec(&lru_count);
clear_bit(GLF_LRU, &gl->gl_flags);
}
+}
+
+static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
+{
+ spin_lock(&lru_lock);
+ __gfs2_glock_remove_from_lru(gl);
spin_unlock(&lru_lock);
}
@@ -217,11 +222,12 @@ void gfs2_glock_put(struct gfs2_glock *gl)
struct gfs2_sbd *sdp = gl->gl_sbd;
struct address_space *mapping = gfs2_glock2aspace(gl);
- if (atomic_dec_and_test(&gl->gl_ref)) {
+ if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) {
+ __gfs2_glock_remove_from_lru(gl);
+ spin_unlock(&lru_lock);
spin_lock_bucket(gl->gl_hash);
hlist_bl_del_rcu(&gl->gl_list);
spin_unlock_bucket(gl->gl_hash);
- gfs2_glock_remove_from_lru(gl);
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
trace_gfs2_glock_put(gl);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index a7d611b93f0..56987460cda 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -391,10 +391,6 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
int error;
int dblocks = 1;
- error = gfs2_rindex_update(sdp);
- if (error)
- fs_warn(sdp, "rindex update returns %d\n", error);
-
error = gfs2_inplace_reserve(dip, RES_DINODE);
if (error)
goto out;
@@ -1043,6 +1039,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
if (!rgd)
goto out_inodes;
+
gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 6aacf3f230a..24f609c9ef9 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -800,6 +800,11 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
fs_err(sdp, "can't get quota file inode: %d\n", error);
goto fail_rindex;
}
+
+ error = gfs2_rindex_update(sdp);
+ if (error)
+ goto fail_qinode;
+
return 0;
fail_qinode:
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 981bfa32121..49ada95209d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -683,16 +683,21 @@ int gfs2_rindex_update(struct gfs2_sbd *sdp)
struct gfs2_glock *gl = ip->i_gl;
struct gfs2_holder ri_gh;
int error = 0;
+ int unlock_required = 0;
/* Read new copy from disk if we don't have the latest */
if (!sdp->sd_rindex_uptodate) {
mutex_lock(&sdp->sd_rindex_mutex);
- error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
- if (error)
- return error;
+ if (!gfs2_glock_is_locked_by_me(gl)) {
+ error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
+ if (error)
+ return error;
+ unlock_required = 1;
+ }
if (!sdp->sd_rindex_uptodate)
error = gfs2_ri_update(ip);
- gfs2_glock_dq_uninit(&ri_gh);
+ if (unlock_required)
+ gfs2_glock_dq_uninit(&ri_gh);
mutex_unlock(&sdp->sd_rindex_mutex);
}
diff --git a/fs/inode.c b/fs/inode.c
index fb10d86ffad..d3ebdbe723d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1651,7 +1651,7 @@ __setup("ihash_entries=", set_ihash_entries);
*/
void __init inode_init_early(void)
{
- int loop;
+ unsigned int loop;
/* If hashes are distributed across NUMA nodes, defer
* hash allocation until vmalloc space is available.
@@ -1669,13 +1669,13 @@ void __init inode_init_early(void)
&i_hash_mask,
0);
- for (loop = 0; loop < (1 << i_hash_shift); loop++)
+ for (loop = 0; loop < (1U << i_hash_shift); loop++)
INIT_HLIST_HEAD(&inode_hashtable[loop]);
}
void __init inode_init(void)
{
- int loop;
+ unsigned int loop;
/* inode slab cache */
inode_cachep = kmem_cache_create("inode_cache",
@@ -1699,7 +1699,7 @@ void __init inode_init(void)
&i_hash_mask,
0);
- for (loop = 0; loop < (1 << i_hash_shift); loop++)
+ for (loop = 0; loop < (1U << i_hash_shift); loop++)
INIT_HLIST_HEAD(&inode_hashtable[loop]);
}
diff --git a/fs/ioprio.c b/fs/ioprio.c
index f84b380d65e..0f1b9515213 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
if (ioc) {
ioc_ioprio_changed(ioc, ioprio);
- put_io_context(ioc, NULL);
+ put_io_context(ioc);
}
return err;
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index a01cdad6aad..eafb8d37a6f 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -335,7 +335,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
void *ebuf;
uint32_t ofs;
size_t retlen;
- int ret = -EIO;
+ int ret;
unsigned long *wordebuf;
ret = mtd_point(c->mtd, jeb->offset, c->sector_size, &retlen,
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index e97404d611e..9c501449450 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -152,9 +152,6 @@ static struct page *logfs_mtd_find_first_sb(struct super_block *sb, u64 *ofs)
filler_t *filler = logfs_mtd_readpage;
struct mtd_info *mtd = super->s_mtd;
- if (!mtd_can_have_bb(mtd))
- return NULL;
-
*ofs = 0;
while (mtd_block_isbad(mtd, *ofs)) {
*ofs += mtd->erasesize;
@@ -172,9 +169,6 @@ static struct page *logfs_mtd_find_last_sb(struct super_block *sb, u64 *ofs)
filler_t *filler = logfs_mtd_readpage;
struct mtd_info *mtd = super->s_mtd;
- if (!mtd_can_have_bb(mtd))
- return NULL;
-
*ofs = mtd->size - mtd->erasesize;
while (mtd_block_isbad(mtd, *ofs)) {
*ofs -= mtd->erasesize;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 501043e8966..3de7a32cadb 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -71,7 +71,7 @@ static int write_dir(struct inode *dir, struct logfs_disk_dentry *dd,
static int write_inode(struct inode *inode)
{
- return __logfs_write_inode(inode, WF_LOCK);
+ return __logfs_write_inode(inode, NULL, WF_LOCK);
}
static s64 dir_seek_data(struct inode *inode, s64 pos)
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index b548c87a86f..3886cded283 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -230,7 +230,9 @@ int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
return ret;
mutex_lock(&inode->i_mutex);
+ logfs_get_wblocks(sb, NULL, WF_LOCK);
logfs_write_anchor(sb);
+ logfs_put_wblocks(sb, NULL, WF_LOCK);
mutex_unlock(&inode->i_mutex);
return 0;
diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c
index caa4419285d..d4efb061bdc 100644
--- a/fs/logfs/gc.c
+++ b/fs/logfs/gc.c
@@ -367,7 +367,7 @@ static struct gc_candidate *get_candidate(struct super_block *sb)
int i, max_dist;
struct gc_candidate *cand = NULL, *this;
- max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS);
+ max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS - 1);
for (i = max_dist; i >= 0; i--) {
this = first_in_list(&super->s_low_list[i]);
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 388df1aa35e..a422f42238b 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -286,7 +286,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (logfs_inode(inode)->li_flags & LOGFS_IF_STILLBORN)
return 0;
- ret = __logfs_write_inode(inode, flags);
+ ret = __logfs_write_inode(inode, NULL, flags);
LOGFS_BUG_ON(ret, inode->i_sb);
return ret;
}
@@ -363,7 +363,9 @@ static void logfs_init_once(void *_li)
static int logfs_sync_fs(struct super_block *sb, int wait)
{
+ logfs_get_wblocks(sb, NULL, WF_LOCK);
logfs_write_anchor(sb);
+ logfs_put_wblocks(sb, NULL, WF_LOCK);
return 0;
}
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 9da29706f91..1e1c369df22 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -612,7 +612,6 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type,
if (len == 0)
return logfs_write_header(super, header, 0, type);
- BUG_ON(len > sb->s_blocksize);
compr_len = logfs_compress(buf, data, len, sb->s_blocksize);
if (compr_len < 0 || type == JE_ANCHOR) {
memcpy(data, buf, len);
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 926373866a5..5f093760946 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -528,7 +528,7 @@ void logfs_destroy_inode_cache(void);
void logfs_set_blocks(struct inode *inode, u64 no);
/* these logically belong into inode.c but actually reside in readwrite.c */
int logfs_read_inode(struct inode *inode);
-int __logfs_write_inode(struct inode *inode, long flags);
+int __logfs_write_inode(struct inode *inode, struct page *, long flags);
void logfs_evict_inode(struct inode *inode);
/* journal.c */
@@ -577,6 +577,8 @@ void initialize_block_counters(struct page *page, struct logfs_block *block,
__be64 *array, int page_is_empty);
int logfs_exist_block(struct inode *inode, u64 bix);
int get_page_reserve(struct inode *inode, struct page *page);
+void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock);
+void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock);
extern struct logfs_block_ops indirect_block_ops;
/* segment.c */
@@ -594,6 +596,7 @@ int logfs_init_mapping(struct super_block *sb);
void logfs_sync_area(struct logfs_area *area);
void logfs_sync_segments(struct super_block *sb);
void freeseg(struct super_block *sb, u32 segno);
+void free_areas(struct super_block *sb);
/* area handling */
int logfs_init_areas(struct super_block *sb);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 2ac4217b790..4153e65b014 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -244,8 +244,7 @@ static void preunlock_page(struct super_block *sb, struct page *page, int lock)
* is waiting for s_write_mutex. We annotate this fact by setting PG_pre_locked
* in addition to PG_locked.
*/
-static void logfs_get_wblocks(struct super_block *sb, struct page *page,
- int lock)
+void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock)
{
struct logfs_super *super = logfs_super(sb);
@@ -260,8 +259,7 @@ static void logfs_get_wblocks(struct super_block *sb, struct page *page,
}
}
-static void logfs_put_wblocks(struct super_block *sb, struct page *page,
- int lock)
+void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock)
{
struct logfs_super *super = logfs_super(sb);
@@ -424,7 +422,7 @@ static void inode_write_block(struct logfs_block *block)
if (inode->i_ino == LOGFS_INO_MASTER)
logfs_write_anchor(inode->i_sb);
else {
- ret = __logfs_write_inode(inode, 0);
+ ret = __logfs_write_inode(inode, NULL, 0);
/* see indirect_write_block comment */
BUG_ON(ret);
}
@@ -560,8 +558,13 @@ static void inode_free_block(struct super_block *sb, struct logfs_block *block)
static void indirect_free_block(struct super_block *sb,
struct logfs_block *block)
{
- ClearPagePrivate(block->page);
- block->page->private = 0;
+ struct page *page = block->page;
+
+ if (PagePrivate(page)) {
+ ClearPagePrivate(page);
+ page_cache_release(page);
+ set_page_private(page, 0);
+ }
__free_block(sb, block);
}
@@ -650,8 +653,11 @@ static void alloc_data_block(struct inode *inode, struct page *page)
logfs_unpack_index(page->index, &bix, &level);
block = __alloc_block(inode->i_sb, inode->i_ino, bix, level);
block->page = page;
+
SetPagePrivate(page);
- page->private = (unsigned long)block;
+ page_cache_get(page);
+ set_page_private(page, (unsigned long) block);
+
block->ops = &indirect_block_ops;
}
@@ -1570,11 +1576,15 @@ int logfs_write_buf(struct inode *inode, struct page *page, long flags)
static int __logfs_delete(struct inode *inode, struct page *page)
{
long flags = WF_DELETE;
+ int err;
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
if (page->index < I0_BLOCKS)
return logfs_write_direct(inode, page, flags);
+ err = grow_inode(inode, page->index, 0);
+ if (err)
+ return err;
return logfs_write_rec(inode, page, page->index, 0, flags);
}
@@ -1623,7 +1633,7 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs,
if (inode->i_ino == LOGFS_INO_MASTER)
logfs_write_anchor(inode->i_sb);
else {
- err = __logfs_write_inode(inode, flags);
+ err = __logfs_write_inode(inode, page, flags);
}
}
}
@@ -1873,7 +1883,7 @@ int logfs_truncate(struct inode *inode, u64 target)
logfs_get_wblocks(sb, NULL, 1);
err = __logfs_truncate(inode, size);
if (!err)
- err = __logfs_write_inode(inode, 0);
+ err = __logfs_write_inode(inode, NULL, 0);
logfs_put_wblocks(sb, NULL, 1);
}
@@ -1901,8 +1911,11 @@ static void move_page_to_inode(struct inode *inode, struct page *page)
li->li_block = block;
block->page = NULL;
- page->private = 0;
- ClearPagePrivate(page);
+ if (PagePrivate(page)) {
+ ClearPagePrivate(page);
+ page_cache_release(page);
+ set_page_private(page, 0);
+ }
}
static void move_inode_to_page(struct page *page, struct inode *inode)
@@ -1918,8 +1931,12 @@ static void move_inode_to_page(struct page *page, struct inode *inode)
BUG_ON(PagePrivate(page));
block->ops = &indirect_block_ops;
block->page = page;
- page->private = (unsigned long)block;
- SetPagePrivate(page);
+
+ if (!PagePrivate(page)) {
+ SetPagePrivate(page);
+ page_cache_get(page);
+ set_page_private(page, (unsigned long) block);
+ }
block->inode = NULL;
li->li_block = NULL;
@@ -2106,14 +2123,14 @@ void logfs_set_segment_unreserved(struct super_block *sb, u32 segno, u32 ec)
ec_level);
}
-int __logfs_write_inode(struct inode *inode, long flags)
+int __logfs_write_inode(struct inode *inode, struct page *page, long flags)
{
struct super_block *sb = inode->i_sb;
int ret;
- logfs_get_wblocks(sb, NULL, flags & WF_LOCK);
+ logfs_get_wblocks(sb, page, flags & WF_LOCK);
ret = do_write_inode(inode);
- logfs_put_wblocks(sb, NULL, flags & WF_LOCK);
+ logfs_put_wblocks(sb, page, flags & WF_LOCK);
return ret;
}
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index 9d518735325..ab798ed1cc8 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -86,7 +86,11 @@ int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
BUG_ON(!page); /* FIXME: reserve a pool */
SetPageUptodate(page);
memcpy(page_address(page) + offset, buf, copylen);
- SetPagePrivate(page);
+
+ if (!PagePrivate(page)) {
+ SetPagePrivate(page);
+ page_cache_get(page);
+ }
page_cache_release(page);
buf += copylen;
@@ -110,7 +114,10 @@ static void pad_partial_page(struct logfs_area *area)
page = get_mapping_page(sb, index, 0);
BUG_ON(!page); /* FIXME: reserve a pool */
memset(page_address(page) + offset, 0xff, len);
- SetPagePrivate(page);
+ if (!PagePrivate(page)) {
+ SetPagePrivate(page);
+ page_cache_get(page);
+ }
page_cache_release(page);
}
}
@@ -130,7 +137,10 @@ static void pad_full_pages(struct logfs_area *area)
BUG_ON(!page); /* FIXME: reserve a pool */
SetPageUptodate(page);
memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
- SetPagePrivate(page);
+ if (!PagePrivate(page)) {
+ SetPagePrivate(page);
+ page_cache_get(page);
+ }
page_cache_release(page);
index++;
no_indizes--;
@@ -485,8 +495,12 @@ static void move_btree_to_page(struct inode *inode, struct page *page,
mempool_free(item, super->s_alias_pool);
}
block->page = page;
- SetPagePrivate(page);
- page->private = (unsigned long)block;
+
+ if (!PagePrivate(page)) {
+ SetPagePrivate(page);
+ page_cache_get(page);
+ set_page_private(page, (unsigned long) block);
+ }
block->ops = &indirect_block_ops;
initialize_block_counters(page, block, data, 0);
}
@@ -536,8 +550,12 @@ void move_page_to_btree(struct page *page)
list_add(&item->list, &block->item_list);
}
block->page = NULL;
- ClearPagePrivate(page);
- page->private = 0;
+
+ if (PagePrivate(page)) {
+ ClearPagePrivate(page);
+ page_cache_release(page);
+ set_page_private(page, 0);
+ }
block->ops = &btree_block_ops;
err = alias_tree_insert(block->sb, block->ino, block->bix, block->level,
block);
@@ -702,7 +720,10 @@ void freeseg(struct super_block *sb, u32 segno)
page = find_get_page(mapping, ofs >> PAGE_SHIFT);
if (!page)
continue;
- ClearPagePrivate(page);
+ if (PagePrivate(page)) {
+ ClearPagePrivate(page);
+ page_cache_release(page);
+ }
page_cache_release(page);
}
}
@@ -841,6 +862,16 @@ static void free_area(struct logfs_area *area)
kfree(area);
}
+void free_areas(struct super_block *sb)
+{
+ struct logfs_super *super = logfs_super(sb);
+ int i;
+
+ for_each_area(i)
+ free_area(super->s_area[i]);
+ free_area(super->s_journal_area);
+}
+
static struct logfs_area *alloc_area(struct super_block *sb)
{
struct logfs_area *area;
@@ -923,10 +954,6 @@ err:
void logfs_cleanup_areas(struct super_block *sb)
{
struct logfs_super *super = logfs_super(sb);
- int i;
btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias);
- for_each_area(i)
- free_area(super->s_area[i]);
- free_area(super->s_journal_area);
}
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index e795c234ea3..c9ee7f5d1ca 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -486,14 +486,15 @@ static void logfs_kill_sb(struct super_block *sb)
/* Alias entries slow down mount, so evict as many as possible */
sync_filesystem(sb);
logfs_write_anchor(sb);
+ free_areas(sb);
/*
* From this point on alias entries are simply dropped - and any
* writes to the object store are considered bugs.
*/
- super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
log_super("LogFS: Now in shutdown\n");
generic_shutdown_super(sb);
+ super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes);
diff --git a/fs/namei.c b/fs/namei.c
index 208c6aa4a98..e2ba62820a0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1095,8 +1095,10 @@ static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentr
struct dentry *old;
/* Don't create child dentry for a dead directory. */
- if (unlikely(IS_DEADDIR(inode)))
+ if (unlikely(IS_DEADDIR(inode))) {
+ dput(dentry);
return ERR_PTR(-ENOENT);
+ }
old = inode->i_op->lookup(inode, dentry, nd);
if (unlikely(old)) {
@@ -1372,6 +1374,34 @@ static inline int can_lookup(struct inode *inode)
return 1;
}
+unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+{
+ unsigned long hash = init_name_hash();
+ while (len--)
+ hash = partial_name_hash(*name++, hash);
+ return end_name_hash(hash);
+}
+EXPORT_SYMBOL(full_name_hash);
+
+/*
+ * We know there's a real path component here of at least
+ * one character.
+ */
+static inline unsigned long hash_name(const char *name, unsigned int *hashp)
+{
+ unsigned long hash = init_name_hash();
+ unsigned long len = 0, c;
+
+ c = (unsigned char)*name;
+ do {
+ len++;
+ hash = partial_name_hash(c, hash);
+ c = (unsigned char)name[len];
+ } while (c && c != '/');
+ *hashp = end_name_hash(hash);
+ return len;
+}
+
/*
* Name resolution.
* This is the basic name resolution function, turning a pathname into
@@ -1392,31 +1422,22 @@ static int link_path_walk(const char *name, struct nameidata *nd)
/* At this point we know we have a real path component. */
for(;;) {
- unsigned long hash;
struct qstr this;
- unsigned int c;
+ long len;
int type;
err = may_lookup(nd);
if (err)
break;
+ len = hash_name(name, &this.hash);
this.name = name;
- c = *(const unsigned char *)name;
-
- hash = init_name_hash();
- do {
- name++;
- hash = partial_name_hash(c, hash);
- c = *(const unsigned char *)name;
- } while (c && (c != '/'));
- this.len = name - (const char *) this.name;
- this.hash = end_name_hash(hash);
+ this.len = len;
type = LAST_NORM;
- if (this.name[0] == '.') switch (this.len) {
+ if (name[0] == '.') switch (len) {
case 2:
- if (this.name[1] == '.') {
+ if (name[1] == '.') {
type = LAST_DOTDOT;
nd->flags |= LOOKUP_JUMPED;
}
@@ -1435,12 +1456,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
}
}
- /* remove trailing slashes? */
- if (!c)
+ if (!name[len])
goto last_component;
- while (*++name == '/');
- if (!*name)
+ /*
+ * If it wasn't NUL, we know it was '/'. Skip that
+ * slash, and continue until no more slashes.
+ */
+ do {
+ len++;
+ } while (unlikely(name[len] == '/'));
+ if (!name[len])
goto last_component;
+ name += len;
err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
if (err < 0)
@@ -1773,24 +1800,21 @@ static struct dentry *lookup_hash(struct nameidata *nd)
struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
{
struct qstr this;
- unsigned long hash;
unsigned int c;
WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
this.name = name;
this.len = len;
+ this.hash = full_name_hash(name, len);
if (!len)
return ERR_PTR(-EACCES);
- hash = init_name_hash();
while (len--) {
c = *(const unsigned char *)name++;
if (c == '/' || c == '\0')
return ERR_PTR(-EACCES);
- hash = partial_name_hash(c, hash);
}
- this.hash = end_name_hash(hash);
/*
* See if the low-level filesystem might want
* to use its own hash..
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f0c849c98fe..ec9f6ef6c5d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3575,8 +3575,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
}
if (npages > 1) {
/* for decoding across pages */
- args.acl_scratch = alloc_page(GFP_KERNEL);
- if (!args.acl_scratch)
+ res.acl_scratch = alloc_page(GFP_KERNEL);
+ if (!res.acl_scratch)
goto out_free;
}
args.acl_len = npages * PAGE_SIZE;
@@ -3612,8 +3612,8 @@ out_free:
for (i = 0; i < npages; i++)
if (pages[i])
__free_page(pages[i]);
- if (args.acl_scratch)
- __free_page(args.acl_scratch);
+ if (res.acl_scratch)
+ __free_page(res.acl_scratch);
return ret;
}
@@ -4883,8 +4883,10 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
clp->cl_rpcclient->cl_auth->au_flavor);
res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
- if (unlikely(!res.server_scope))
- return -ENOMEM;
+ if (unlikely(!res.server_scope)) {
+ status = -ENOMEM;
+ goto out;
+ }
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
if (!status)
@@ -4901,12 +4903,13 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
clp->server_scope = NULL;
}
- if (!clp->server_scope)
+ if (!clp->server_scope) {
clp->server_scope = res.server_scope;
- else
- kfree(res.server_scope);
+ goto out;
+ }
}
-
+ kfree(res.server_scope);
+out:
dprintk("<-- %s status= %d\n", __func__, status);
return status;
}
@@ -5008,37 +5011,53 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
return status;
}
+static struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags)
+{
+ return kcalloc(max_slots, sizeof(struct nfs4_slot), gfp_flags);
+}
+
+static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
+ struct nfs4_slot *new,
+ u32 max_slots,
+ u32 ivalue)
+{
+ struct nfs4_slot *old = NULL;
+ u32 i;
+
+ spin_lock(&tbl->slot_tbl_lock);
+ if (new) {
+ old = tbl->slots;
+ tbl->slots = new;
+ tbl->max_slots = max_slots;
+ }
+ tbl->highest_used_slotid = -1; /* no slot is currently used */
+ for (i = 0; i < tbl->max_slots; i++)
+ tbl->slots[i].seq_nr = ivalue;
+ spin_unlock(&tbl->slot_tbl_lock);
+ kfree(old);
+}
+
/*
- * Reset a slot table
+ * (re)Initialise a slot table
*/
-static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
- int ivalue)
+static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
+ u32 ivalue)
{
struct nfs4_slot *new = NULL;
- int i;
- int ret = 0;
+ int ret = -ENOMEM;
dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
max_reqs, tbl->max_slots);
/* Does the newly negotiated max_reqs match the existing slot table? */
if (max_reqs != tbl->max_slots) {
- ret = -ENOMEM;
- new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
- GFP_NOFS);
+ new = nfs4_alloc_slots(max_reqs, GFP_NOFS);
if (!new)
goto out;
- ret = 0;
- kfree(tbl->slots);
}
- spin_lock(&tbl->slot_tbl_lock);
- if (new) {
- tbl->slots = new;
- tbl->max_slots = max_reqs;
- }
- for (i = 0; i < tbl->max_slots; ++i)
- tbl->slots[i].seq_nr = ivalue;
- spin_unlock(&tbl->slot_tbl_lock);
+ ret = 0;
+
+ nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue);
dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
tbl, tbl->slots, tbl->max_slots);
out:
@@ -5061,36 +5080,6 @@ static void nfs4_destroy_slot_tables(struct nfs4_session *session)
}
/*
- * Initialize slot table
- */
-static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
- int max_slots, int ivalue)
-{
- struct nfs4_slot *slot;
- int ret = -ENOMEM;
-
- BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE);
-
- dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
-
- slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS);
- if (!slot)
- goto out;
- ret = 0;
-
- spin_lock(&tbl->slot_tbl_lock);
- tbl->max_slots = max_slots;
- tbl->slots = slot;
- tbl->highest_used_slotid = -1; /* no slot is currently used */
- spin_unlock(&tbl->slot_tbl_lock);
- dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
- tbl, tbl->slots, tbl->max_slots);
-out:
- dprintk("<-- %s: return %d\n", __func__, ret);
- return ret;
-}
-
-/*
* Initialize or reset the forechannel and backchannel tables
*/
static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
@@ -5101,25 +5090,16 @@ static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
dprintk("--> %s\n", __func__);
/* Fore channel */
tbl = &ses->fc_slot_table;
- if (tbl->slots == NULL) {
- status = nfs4_init_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
- if (status) /* -ENOMEM */
- return status;
- } else {
- status = nfs4_reset_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
- if (status)
- return status;
- }
+ status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
+ if (status) /* -ENOMEM */
+ return status;
/* Back channel */
tbl = &ses->bc_slot_table;
- if (tbl->slots == NULL) {
- status = nfs4_init_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
- if (status)
- /* Fore and back channel share a connection so get
- * both slot tables or neither */
- nfs4_destroy_slot_tables(ses);
- } else
- status = nfs4_reset_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
+ status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
+ if (status && tbl->slots == NULL)
+ /* Fore and back channel share a connection so get
+ * both slot tables or neither */
+ nfs4_destroy_slot_tables(ses);
return status;
}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a53f33b4ac3..45392032e7b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1132,6 +1132,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
{
struct nfs_client *clp = server->nfs_client;
+ if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags))
+ nfs_async_inode_return_delegation(state->inode, &state->stateid);
nfs4_state_mark_reclaim_nograce(clp, state);
nfs4_schedule_state_manager(clp);
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 95e92e43840..33bd8d0f745 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2522,7 +2522,6 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
args->acl_pages, args->acl_pgbase, args->acl_len);
- xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE);
encode_nops(&hdr);
}
@@ -6032,6 +6031,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
struct compound_hdr hdr;
int status;
+ if (res->acl_scratch != NULL) {
+ void *p = page_address(res->acl_scratch);
+ xdr_set_scratch_buffer(xdr, p, PAGE_SIZE);
+ }
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 886649627c3..2a70fce70c6 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -603,6 +603,8 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
nsegs = argv[4].v_nmembs;
if (argv[4].v_size != argsz[4])
goto out;
+ if (nsegs > UINT_MAX / sizeof(__u64))
+ goto out;
/*
* argv[4] points to segment numbers this ioctl cleans. We
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index f14fde2b03d..e0281992ddc 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1,7 +1,7 @@
/**
* attrib.c - NTFS attribute operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2007 Anton Altaparmakov
+ * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -345,10 +345,10 @@ LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
unsigned long flags;
bool is_retry = false;
+ BUG_ON(!ni);
ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, %s_locked.",
ni->mft_no, (unsigned long long)vcn,
write_locked ? "write" : "read");
- BUG_ON(!ni);
BUG_ON(!NInoNonResident(ni));
BUG_ON(vcn < 0);
if (!ni->runlist.rl) {
@@ -469,9 +469,9 @@ runlist_element *ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
int err = 0;
bool is_retry = false;
+ BUG_ON(!ni);
ntfs_debug("Entering for i_ino 0x%lx, vcn 0x%llx, with%s ctx.",
ni->mft_no, (unsigned long long)vcn, ctx ? "" : "out");
- BUG_ON(!ni);
BUG_ON(!NInoNonResident(ni));
BUG_ON(vcn < 0);
if (!ni->runlist.rl) {
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 382857f9c7d..3014a36a255 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -1,7 +1,7 @@
/**
* mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.
+ * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
* Copyright (c) 2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -1367,7 +1367,7 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
ntfs_error(vol->sb, "Failed to merge runlists for mft "
"bitmap.");
if (ntfs_cluster_free_from_rl(vol, rl2)) {
- ntfs_error(vol->sb, "Failed to dealocate "
+ ntfs_error(vol->sb, "Failed to deallocate "
"allocated cluster.%s", es);
NVolSetErrors(vol);
}
@@ -1805,7 +1805,7 @@ static int ntfs_mft_data_extend_allocation_nolock(ntfs_volume *vol)
ntfs_error(vol->sb, "Failed to merge runlists for mft data "
"attribute.");
if (ntfs_cluster_free_from_rl(vol, rl2)) {
- ntfs_error(vol->sb, "Failed to dealocate clusters "
+ ntfs_error(vol->sb, "Failed to deallocate clusters "
"from the mft data attribute.%s", es);
NVolSetErrors(vol);
}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 5a4a8af5c40..f907611cca7 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1,7 +1,7 @@
/*
* super.c - NTFS kernel super block handling. Part of the Linux-NTFS project.
*
- * Copyright (c) 2001-2011 Anton Altaparmakov and Tuxera Inc.
+ * Copyright (c) 2001-2012 Anton Altaparmakov and Tuxera Inc.
* Copyright (c) 2001,2002 Richard Russon
*
* This program/include file is free software; you can redistribute it and/or
@@ -1239,7 +1239,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
{
MFT_REF mref;
struct inode *vi;
- ntfs_inode *ni;
struct page *page;
u32 *kaddr, *kend;
ntfs_name *name = NULL;
@@ -1290,7 +1289,6 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
"is not the system volume.", i_size_read(vi));
goto iput_out;
}
- ni = NTFS_I(vi);
page = ntfs_map_page(vi->i_mapping, 0);
if (IS_ERR(page)) {
ntfs_error(vol->sb, "Failed to read from hiberfil.sys.");
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index be244692550..a9856e3eaaf 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1053,7 +1053,7 @@ static int ocfs2_rename(struct inode *old_dir,
handle_t *handle = NULL;
struct buffer_head *old_dir_bh = NULL;
struct buffer_head *new_dir_bh = NULL;
- nlink_t old_dir_nlink = old_dir->i_nlink;
+ u32 old_dir_nlink = old_dir->i_nlink;
struct ocfs2_dinode *old_di;
struct ocfs2_dir_lookup_result old_inode_dot_dot_res = { NULL, };
struct ocfs2_dir_lookup_result target_lookup_res = { NULL, };
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9cde9edf9c4..d4548dd49b0 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -198,26 +198,6 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}
-static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
-{
- struct mm_struct *mm;
- int err;
-
- err = mutex_lock_killable(&task->signal->cred_guard_mutex);
- if (err)
- return ERR_PTR(err);
-
- mm = get_task_mm(task);
- if (mm && mm != current->mm &&
- !ptrace_may_access(task, mode)) {
- mmput(mm);
- mm = ERR_PTR(-EACCES);
- }
- mutex_unlock(&task->signal->cred_guard_mutex);
-
- return mm;
-}
-
struct mm_struct *mm_for_maps(struct task_struct *task)
{
return mm_access(task, PTRACE_MODE_READ);
@@ -711,6 +691,13 @@ static int mem_open(struct inode* inode, struct file* file)
if (IS_ERR(mm))
return PTR_ERR(mm);
+ if (mm) {
+ /* ensure this mm_struct can't be freed */
+ atomic_inc(&mm->mm_count);
+ /* but do not pin its memory */
+ mmput(mm);
+ }
+
/* OK to pass negative loff_t, we can catch out-of-range */
file->f_mode |= FMODE_UNSIGNED_OFFSET;
file->private_data = mm;
@@ -718,57 +705,13 @@ static int mem_open(struct inode* inode, struct file* file)
return 0;
}
-static ssize_t mem_read(struct file * file, char __user * buf,
- size_t count, loff_t *ppos)
+static ssize_t mem_rw(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos, int write)
{
- int ret;
- char *page;
- unsigned long src = *ppos;
struct mm_struct *mm = file->private_data;
-
- if (!mm)
- return 0;
-
- page = (char *)__get_free_page(GFP_TEMPORARY);
- if (!page)
- return -ENOMEM;
-
- ret = 0;
-
- while (count > 0) {
- int this_len, retval;
-
- this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
- retval = access_remote_vm(mm, src, page, this_len, 0);
- if (!retval) {
- if (!ret)
- ret = -EIO;
- break;
- }
-
- if (copy_to_user(buf, page, retval)) {
- ret = -EFAULT;
- break;
- }
-
- ret += retval;
- src += retval;
- buf += retval;
- count -= retval;
- }
- *ppos = src;
-
- free_page((unsigned long) page);
- return ret;
-}
-
-static ssize_t mem_write(struct file * file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- int copied;
+ unsigned long addr = *ppos;
+ ssize_t copied;
char *page;
- unsigned long dst = *ppos;
- struct mm_struct *mm = file->private_data;
if (!mm)
return 0;
@@ -778,31 +721,54 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
return -ENOMEM;
copied = 0;
+ if (!atomic_inc_not_zero(&mm->mm_users))
+ goto free;
+
while (count > 0) {
- int this_len, retval;
+ int this_len = min_t(int, count, PAGE_SIZE);
- this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
- if (copy_from_user(page, buf, this_len)) {
+ if (write && copy_from_user(page, buf, this_len)) {
copied = -EFAULT;
break;
}
- retval = access_remote_vm(mm, dst, page, this_len, 1);
- if (!retval) {
+
+ this_len = access_remote_vm(mm, addr, page, this_len, write);
+ if (!this_len) {
if (!copied)
copied = -EIO;
break;
}
- copied += retval;
- buf += retval;
- dst += retval;
- count -= retval;
+
+ if (!write && copy_to_user(buf, page, this_len)) {
+ copied = -EFAULT;
+ break;
+ }
+
+ buf += this_len;
+ addr += this_len;
+ copied += this_len;
+ count -= this_len;
}
- *ppos = dst;
+ *ppos = addr;
+ mmput(mm);
+free:
free_page((unsigned long) page);
return copied;
}
+static ssize_t mem_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return mem_rw(file, buf, count, ppos, 0);
+}
+
+static ssize_t mem_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return mem_rw(file, (char __user*)buf, count, ppos, 1);
+}
+
loff_t mem_lseek(struct file *file, loff_t offset, int orig)
{
switch (orig) {
@@ -822,8 +788,8 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig)
static int mem_release(struct inode *inode, struct file *file)
{
struct mm_struct *mm = file->private_data;
-
- mmput(mm);
+ if (mm)
+ mmdrop(mm);
return 0;
}
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 7898cd688a0..fc2c4388d12 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -292,11 +292,26 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
}
}
+/* Return 1 if 'cmd' will block on frozen filesystem */
+static int quotactl_cmd_write(int cmd)
+{
+ switch (cmd) {
+ case Q_GETFMT:
+ case Q_GETINFO:
+ case Q_SYNC:
+ case Q_XGETQSTAT:
+ case Q_XGETQUOTA:
+ case Q_XQUOTASYNC:
+ return 0;
+ }
+ return 1;
+}
+
/*
* look up a superblock on which quota ops will be performed
* - use the name of a block device to find the superblock thereon
*/
-static struct super_block *quotactl_block(const char __user *special)
+static struct super_block *quotactl_block(const char __user *special, int cmd)
{
#ifdef CONFIG_BLOCK
struct block_device *bdev;
@@ -309,7 +324,10 @@ static struct super_block *quotactl_block(const char __user *special)
putname(tmp);
if (IS_ERR(bdev))
return ERR_CAST(bdev);
- sb = get_super(bdev);
+ if (quotactl_cmd_write(cmd))
+ sb = get_super_thawed(bdev);
+ else
+ sb = get_super(bdev);
bdput(bdev);
if (!sb)
return ERR_PTR(-ENODEV);
@@ -361,7 +379,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
pathp = &path;
}
- sb = quotactl_block(special);
+ sb = quotactl_block(special, cmds);
if (IS_ERR(sb)) {
ret = PTR_ERR(sb);
goto out;
diff --git a/fs/select.c b/fs/select.c
index d33418fdc85..e782258d0de 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -912,7 +912,7 @@ static long do_restart_poll(struct restart_block *restart_block)
}
SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
- long, timeout_msecs)
+ int, timeout_msecs)
{
struct timespec end_time, *to = NULL;
int ret;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 492465b451d..7ae2a574cb2 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -30,6 +30,21 @@
#include <linux/signalfd.h>
#include <linux/syscalls.h>
+void signalfd_cleanup(struct sighand_struct *sighand)
+{
+ wait_queue_head_t *wqh = &sighand->signalfd_wqh;
+ /*
+ * The lockless check can race with remove_wait_queue() in progress,
+ * but in this case its caller should run under rcu_read_lock() and
+ * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
+ */
+ if (likely(!waitqueue_active(wqh)))
+ return;
+
+ /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
+ wake_up_poll(wqh, POLLHUP | POLLFREE);
+}
+
struct signalfd_ctx {
sigset_t sigmask;
};
diff --git a/fs/super.c b/fs/super.c
index 6015c02296b..6277ec6cb60 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -634,6 +634,28 @@ rescan:
EXPORT_SYMBOL(get_super);
/**
+ * get_super_thawed - get thawed superblock of a device
+ * @bdev: device to get the superblock for
+ *
+ * Scans the superblock list and finds the superblock of the file system
+ * mounted on the device. The superblock is returned once it is thawed
+ * (or immediately if it was not frozen). %NULL is returned if no match
+ * is found.
+ */
+struct super_block *get_super_thawed(struct block_device *bdev)
+{
+ while (1) {
+ struct super_block *s = get_super(bdev);
+ if (!s || s->s_frozen == SB_UNFROZEN)
+ return s;
+ up_read(&s->s_umount);
+ vfs_check_frozen(s, SB_FREEZE_WRITE);
+ put_super(s);
+ }
+}
+EXPORT_SYMBOL(get_super_thawed);
+
+/**
* get_active_super - get an active reference to the superblock of a device
* @bdev: device to get the superblock for
*
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 62f4fb37789..00012e31829 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -493,6 +493,12 @@ int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr,
const void *ns = NULL;
int err;
+ if (!dir_sd) {
+ WARN(1, KERN_ERR "sysfs: kobject %s without dirent\n",
+ kobject_name(kobj));
+ return -ENOENT;
+ }
+
err = 0;
if (!sysfs_ns_type(dir_sd))
goto out;
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 4a802b4a905..85eb81683a2 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -318,8 +318,11 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha
struct sysfs_addrm_cxt acxt;
struct sysfs_dirent *sd;
- if (!dir_sd)
+ if (!dir_sd) {
+ WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
+ name);
return -ENOENT;
+ }
sysfs_addrm_start(&acxt, dir_sd);
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 292eff19803..ab7c53fe346 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -110,10 +110,4 @@ kmem_zone_destroy(kmem_zone_t *zone)
extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
-static inline int
-kmem_shake_allow(gfp_t gfp_mask)
-{
- return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
-}
-
#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index b4ff40b5f91..53db20ee3e7 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -63,82 +63,6 @@ int xfs_dqerror_mod = 33;
static struct lock_class_key xfs_dquot_other_class;
/*
- * Allocate and initialize a dquot. We don't always allocate fresh memory;
- * we try to reclaim a free dquot if the number of incore dquots are above
- * a threshold.
- * The only field inside the core that gets initialized at this point
- * is the d_id field. The idea is to fill in the entire q_core
- * when we read in the on disk dquot.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqinit(
- xfs_mount_t *mp,
- xfs_dqid_t id,
- uint type)
-{
- xfs_dquot_t *dqp;
- boolean_t brandnewdquot;
-
- brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
- dqp->dq_flags = type;
- dqp->q_core.d_id = cpu_to_be32(id);
- dqp->q_mount = mp;
-
- /*
- * No need to re-initialize these if this is a reclaimed dquot.
- */
- if (brandnewdquot) {
- INIT_LIST_HEAD(&dqp->q_freelist);
- mutex_init(&dqp->q_qlock);
- init_waitqueue_head(&dqp->q_pinwait);
-
- /*
- * Because we want to use a counting completion, complete
- * the flush completion once to allow a single access to
- * the flush completion without blocking.
- */
- init_completion(&dqp->q_flush);
- complete(&dqp->q_flush);
-
- trace_xfs_dqinit(dqp);
- } else {
- /*
- * Only the q_core portion was zeroed in dqreclaim_one().
- * So, we need to reset others.
- */
- dqp->q_nrefs = 0;
- dqp->q_blkno = 0;
- INIT_LIST_HEAD(&dqp->q_mplist);
- INIT_LIST_HEAD(&dqp->q_hashlist);
- dqp->q_bufoffset = 0;
- dqp->q_fileoffset = 0;
- dqp->q_transp = NULL;
- dqp->q_gdquot = NULL;
- dqp->q_res_bcount = 0;
- dqp->q_res_icount = 0;
- dqp->q_res_rtbcount = 0;
- atomic_set(&dqp->q_pincount, 0);
- dqp->q_hash = NULL;
- ASSERT(list_empty(&dqp->q_freelist));
-
- trace_xfs_dqreuse(dqp);
- }
-
- /*
- * In either case we need to make sure group quotas have a different
- * lock class than user quotas, to make sure lockdep knows we can
- * locks of one of each at the same time.
- */
- if (!(type & XFS_DQ_USER))
- lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
-
- /*
- * log item gets initialized later
- */
- return (dqp);
-}
-
-/*
* This is called to free all the memory associated with a dquot
*/
void
@@ -215,10 +139,10 @@ xfs_qm_adjust_dqtimers(
if (!d->d_btimer) {
if ((d->d_blk_softlimit &&
- (be64_to_cpu(d->d_bcount) >=
+ (be64_to_cpu(d->d_bcount) >
be64_to_cpu(d->d_blk_softlimit))) ||
(d->d_blk_hardlimit &&
- (be64_to_cpu(d->d_bcount) >=
+ (be64_to_cpu(d->d_bcount) >
be64_to_cpu(d->d_blk_hardlimit)))) {
d->d_btimer = cpu_to_be32(get_seconds() +
mp->m_quotainfo->qi_btimelimit);
@@ -227,10 +151,10 @@ xfs_qm_adjust_dqtimers(
}
} else {
if ((!d->d_blk_softlimit ||
- (be64_to_cpu(d->d_bcount) <
+ (be64_to_cpu(d->d_bcount) <=
be64_to_cpu(d->d_blk_softlimit))) &&
(!d->d_blk_hardlimit ||
- (be64_to_cpu(d->d_bcount) <
+ (be64_to_cpu(d->d_bcount) <=
be64_to_cpu(d->d_blk_hardlimit)))) {
d->d_btimer = 0;
}
@@ -238,10 +162,10 @@ xfs_qm_adjust_dqtimers(
if (!d->d_itimer) {
if ((d->d_ino_softlimit &&
- (be64_to_cpu(d->d_icount) >=
+ (be64_to_cpu(d->d_icount) >
be64_to_cpu(d->d_ino_softlimit))) ||
(d->d_ino_hardlimit &&
- (be64_to_cpu(d->d_icount) >=
+ (be64_to_cpu(d->d_icount) >
be64_to_cpu(d->d_ino_hardlimit)))) {
d->d_itimer = cpu_to_be32(get_seconds() +
mp->m_quotainfo->qi_itimelimit);
@@ -250,10 +174,10 @@ xfs_qm_adjust_dqtimers(
}
} else {
if ((!d->d_ino_softlimit ||
- (be64_to_cpu(d->d_icount) <
+ (be64_to_cpu(d->d_icount) <=
be64_to_cpu(d->d_ino_softlimit))) &&
(!d->d_ino_hardlimit ||
- (be64_to_cpu(d->d_icount) <
+ (be64_to_cpu(d->d_icount) <=
be64_to_cpu(d->d_ino_hardlimit)))) {
d->d_itimer = 0;
}
@@ -261,10 +185,10 @@ xfs_qm_adjust_dqtimers(
if (!d->d_rtbtimer) {
if ((d->d_rtb_softlimit &&
- (be64_to_cpu(d->d_rtbcount) >=
+ (be64_to_cpu(d->d_rtbcount) >
be64_to_cpu(d->d_rtb_softlimit))) ||
(d->d_rtb_hardlimit &&
- (be64_to_cpu(d->d_rtbcount) >=
+ (be64_to_cpu(d->d_rtbcount) >
be64_to_cpu(d->d_rtb_hardlimit)))) {
d->d_rtbtimer = cpu_to_be32(get_seconds() +
mp->m_quotainfo->qi_rtbtimelimit);
@@ -273,10 +197,10 @@ xfs_qm_adjust_dqtimers(
}
} else {
if ((!d->d_rtb_softlimit ||
- (be64_to_cpu(d->d_rtbcount) <
+ (be64_to_cpu(d->d_rtbcount) <=
be64_to_cpu(d->d_rtb_softlimit))) &&
(!d->d_rtb_hardlimit ||
- (be64_to_cpu(d->d_rtbcount) <
+ (be64_to_cpu(d->d_rtbcount) <=
be64_to_cpu(d->d_rtb_hardlimit)))) {
d->d_rtbtimer = 0;
}
@@ -567,7 +491,32 @@ xfs_qm_dqread(
int error;
int cancelflags = 0;
- dqp = xfs_qm_dqinit(mp, id, type);
+
+ dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+
+ dqp->dq_flags = type;
+ dqp->q_core.d_id = cpu_to_be32(id);
+ dqp->q_mount = mp;
+ INIT_LIST_HEAD(&dqp->q_freelist);
+ mutex_init(&dqp->q_qlock);
+ init_waitqueue_head(&dqp->q_pinwait);
+
+ /*
+ * Because we want to use a counting completion, complete
+ * the flush completion once to allow a single access to
+ * the flush completion without blocking.
+ */
+ init_completion(&dqp->q_flush);
+ complete(&dqp->q_flush);
+
+ /*
+ * Make sure group quotas have a different lock class than user
+ * quotas.
+ */
+ if (!(type & XFS_DQ_USER))
+ lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
+
+ atomic_inc(&xfs_Gqm->qm_totaldquots);
trace_xfs_dqread(dqp);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 541a508adea..0ed9ee77937 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1489,7 +1489,7 @@ xlog_recover_add_to_cont_trans(
old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
old_len = item->ri_buf[item->ri_cnt-1].i_len;
- ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u);
+ ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
memcpy(&ptr[old_len], dp, len); /* d, s, l */
item->ri_buf[item->ri_cnt-1].i_len += len;
item->ri_buf[item->ri_cnt-1].i_addr = ptr;
@@ -1981,7 +1981,7 @@ xfs_qm_dqcheck(
if (!errs && ddq->d_id) {
if (ddq->d_blk_softlimit &&
- be64_to_cpu(ddq->d_bcount) >=
+ be64_to_cpu(ddq->d_bcount) >
be64_to_cpu(ddq->d_blk_softlimit)) {
if (!ddq->d_btimer) {
if (flags & XFS_QMOPT_DOWARN)
@@ -1992,7 +1992,7 @@ xfs_qm_dqcheck(
}
}
if (ddq->d_ino_softlimit &&
- be64_to_cpu(ddq->d_icount) >=
+ be64_to_cpu(ddq->d_icount) >
be64_to_cpu(ddq->d_ino_softlimit)) {
if (!ddq->d_itimer) {
if (flags & XFS_QMOPT_DOWARN)
@@ -2003,7 +2003,7 @@ xfs_qm_dqcheck(
}
}
if (ddq->d_rtb_softlimit &&
- be64_to_cpu(ddq->d_rtbcount) >=
+ be64_to_cpu(ddq->d_rtbcount) >
be64_to_cpu(ddq->d_rtb_softlimit)) {
if (!ddq->d_rtbtimer) {
if (flags & XFS_QMOPT_DOWARN)
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 671f37eae1c..c436def733b 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -50,7 +50,6 @@
*/
struct mutex xfs_Gqm_lock;
struct xfs_qm *xfs_Gqm;
-uint ndquot;
kmem_zone_t *qm_dqzone;
kmem_zone_t *qm_dqtrxzone;
@@ -93,7 +92,6 @@ xfs_Gqm_init(void)
goto out_free_udqhash;
hsize /= sizeof(xfs_dqhash_t);
- ndquot = hsize << 8;
xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
xqm->qm_dqhashmask = hsize - 1;
@@ -137,7 +135,6 @@ xfs_Gqm_init(void)
xqm->qm_dqtrxzone = qm_dqtrxzone;
atomic_set(&xqm->qm_totaldquots, 0);
- xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
xqm->qm_nrefs = 0;
return xqm;
@@ -1600,216 +1597,150 @@ xfs_qm_init_quotainos(
return 0;
}
+STATIC void
+xfs_qm_dqfree_one(
+ struct xfs_dquot *dqp)
+{
+ struct xfs_mount *mp = dqp->q_mount;
+ struct xfs_quotainfo *qi = mp->m_quotainfo;
+ mutex_lock(&dqp->q_hash->qh_lock);
+ list_del_init(&dqp->q_hashlist);
+ dqp->q_hash->qh_version++;
+ mutex_unlock(&dqp->q_hash->qh_lock);
-/*
- * Pop the least recently used dquot off the freelist and recycle it.
- */
-STATIC struct xfs_dquot *
-xfs_qm_dqreclaim_one(void)
+ mutex_lock(&qi->qi_dqlist_lock);
+ list_del_init(&dqp->q_mplist);
+ qi->qi_dquots--;
+ qi->qi_dqreclaims++;
+ mutex_unlock(&qi->qi_dqlist_lock);
+
+ xfs_qm_dqdestroy(dqp);
+}
+
+STATIC void
+xfs_qm_dqreclaim_one(
+ struct xfs_dquot *dqp,
+ struct list_head *dispose_list)
{
- struct xfs_dquot *dqp;
- int restarts = 0;
+ struct xfs_mount *mp = dqp->q_mount;
+ int error;
- mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-restart:
- list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
- struct xfs_mount *mp = dqp->q_mount;
+ if (!xfs_dqlock_nowait(dqp))
+ goto out_busy;
- if (!xfs_dqlock_nowait(dqp))
- continue;
+ /*
+ * This dquot has acquired a reference in the meantime remove it from
+ * the freelist and try again.
+ */
+ if (dqp->q_nrefs) {
+ xfs_dqunlock(dqp);
- /*
- * This dquot has already been grabbed by dqlookup.
- * Remove it from the freelist and try again.
- */
- if (dqp->q_nrefs) {
- trace_xfs_dqreclaim_want(dqp);
- XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-
- list_del_init(&dqp->q_freelist);
- xfs_Gqm->qm_dqfrlist_cnt--;
- restarts++;
- goto dqunlock;
- }
+ trace_xfs_dqreclaim_want(dqp);
+ XQM_STATS_INC(xqmstats.xs_qm_dqwants);
- ASSERT(dqp->q_hash);
- ASSERT(!list_empty(&dqp->q_mplist));
+ list_del_init(&dqp->q_freelist);
+ xfs_Gqm->qm_dqfrlist_cnt--;
+ return;
+ }
- /*
- * Try to grab the flush lock. If this dquot is in the process
- * of getting flushed to disk, we don't want to reclaim it.
- */
- if (!xfs_dqflock_nowait(dqp))
- goto dqunlock;
+ ASSERT(dqp->q_hash);
+ ASSERT(!list_empty(&dqp->q_mplist));
- /*
- * We have the flush lock so we know that this is not in the
- * process of being flushed. So, if this is dirty, flush it
- * DELWRI so that we don't get a freelist infested with
- * dirty dquots.
- */
- if (XFS_DQ_IS_DIRTY(dqp)) {
- int error;
+ /*
+ * Try to grab the flush lock. If this dquot is in the process of
+ * getting flushed to disk, we don't want to reclaim it.
+ */
+ if (!xfs_dqflock_nowait(dqp))
+ goto out_busy;
- trace_xfs_dqreclaim_dirty(dqp);
+ /*
+ * We have the flush lock so we know that this is not in the
+ * process of being flushed. So, if this is dirty, flush it
+ * DELWRI so that we don't get a freelist infested with
+ * dirty dquots.
+ */
+ if (XFS_DQ_IS_DIRTY(dqp)) {
+ trace_xfs_dqreclaim_dirty(dqp);
- /*
- * We flush it delayed write, so don't bother
- * releasing the freelist lock.
- */
- error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
- if (error) {
- xfs_warn(mp, "%s: dquot %p flush failed",
- __func__, dqp);
- }
- goto dqunlock;
+ /*
+ * We flush it delayed write, so don't bother releasing the
+ * freelist lock.
+ */
+ error = xfs_qm_dqflush(dqp, 0);
+ if (error) {
+ xfs_warn(mp, "%s: dquot %p flush failed",
+ __func__, dqp);
}
- xfs_dqfunlock(dqp);
/*
- * Prevent lookup now that we are going to reclaim the dquot.
- * Once XFS_DQ_FREEING is set lookup won't touch the dquot,
- * thus we can drop the lock now.
+ * Give the dquot another try on the freelist, as the
+ * flushing will take some time.
*/
- dqp->dq_flags |= XFS_DQ_FREEING;
- xfs_dqunlock(dqp);
-
- mutex_lock(&dqp->q_hash->qh_lock);
- list_del_init(&dqp->q_hashlist);
- dqp->q_hash->qh_version++;
- mutex_unlock(&dqp->q_hash->qh_lock);
-
- mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
- list_del_init(&dqp->q_mplist);
- mp->m_quotainfo->qi_dquots--;
- mp->m_quotainfo->qi_dqreclaims++;
- mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+ goto out_busy;
+ }
+ xfs_dqfunlock(dqp);
- ASSERT(dqp->q_nrefs == 0);
- list_del_init(&dqp->q_freelist);
- xfs_Gqm->qm_dqfrlist_cnt--;
+ /*
+ * Prevent lookups now that we are past the point of no return.
+ */
+ dqp->dq_flags |= XFS_DQ_FREEING;
+ xfs_dqunlock(dqp);
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- return dqp;
-dqunlock:
- xfs_dqunlock(dqp);
- if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
- break;
- goto restart;
- }
+ ASSERT(dqp->q_nrefs == 0);
+ list_move_tail(&dqp->q_freelist, dispose_list);
+ xfs_Gqm->qm_dqfrlist_cnt--;
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- return NULL;
-}
+ trace_xfs_dqreclaim_done(dqp);
+ XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
+ return;
-/*
- * Traverse the freelist of dquots and attempt to reclaim a maximum of
- * 'howmany' dquots. This operation races with dqlookup(), and attempts to
- * favor the lookup function ...
- */
-STATIC int
-xfs_qm_shake_freelist(
- int howmany)
-{
- int nreclaimed = 0;
- xfs_dquot_t *dqp;
+out_busy:
+ xfs_dqunlock(dqp);
- if (howmany <= 0)
- return 0;
+ /*
+ * Move the dquot to the tail of the list so that we don't spin on it.
+ */
+ list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
- while (nreclaimed < howmany) {
- dqp = xfs_qm_dqreclaim_one();
- if (!dqp)
- return nreclaimed;
- xfs_qm_dqdestroy(dqp);
- nreclaimed++;
- }
- return nreclaimed;
+ trace_xfs_dqreclaim_busy(dqp);
+ XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
}
-/*
- * The kmem_shake interface is invoked when memory is running low.
- */
-/* ARGSUSED */
STATIC int
xfs_qm_shake(
- struct shrinker *shrink,
- struct shrink_control *sc)
+ struct shrinker *shrink,
+ struct shrink_control *sc)
{
- int ndqused, nfree, n;
- gfp_t gfp_mask = sc->gfp_mask;
-
- if (!kmem_shake_allow(gfp_mask))
- return 0;
- if (!xfs_Gqm)
- return 0;
-
- nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
- /* incore dquots in all f/s's */
- ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
-
- ASSERT(ndqused >= 0);
+ int nr_to_scan = sc->nr_to_scan;
+ LIST_HEAD (dispose_list);
+ struct xfs_dquot *dqp;
- if (nfree <= ndqused && nfree < ndquot)
+ if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
return 0;
+ if (!nr_to_scan)
+ goto out;
- ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */
- n = nfree - ndqused - ndquot; /* # over target */
-
- return xfs_qm_shake_freelist(MAX(nfree, n));
-}
-
-
-/*------------------------------------------------------------------*/
-
-/*
- * Return a new incore dquot. Depending on the number of
- * dquots in the system, we either allocate a new one on the kernel heap,
- * or reclaim a free one.
- * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
- * to reclaim an existing one from the freelist.
- */
-boolean_t
-xfs_qm_dqalloc_incore(
- xfs_dquot_t **O_dqpp)
-{
- xfs_dquot_t *dqp;
-
- /*
- * Check against high water mark to see if we want to pop
- * a nincompoop dquot off the freelist.
- */
- if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
- /*
- * Try to recycle a dquot from the freelist.
- */
- if ((dqp = xfs_qm_dqreclaim_one())) {
- XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
- /*
- * Just zero the core here. The rest will get
- * reinitialized by caller. XXX we shouldn't even
- * do this zero ...
- */
- memset(&dqp->q_core, 0, sizeof(dqp->q_core));
- *O_dqpp = dqp;
- return B_FALSE;
- }
- XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
+ mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+ while (!list_empty(&xfs_Gqm->qm_dqfrlist)) {
+ if (nr_to_scan-- <= 0)
+ break;
+ dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot,
+ q_freelist);
+ xfs_qm_dqreclaim_one(dqp, &dispose_list);
}
+ mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- /*
- * Allocate a brand new dquot on the kernel heap and return it
- * to the caller to initialize.
- */
- ASSERT(xfs_Gqm->qm_dqzone != NULL);
- *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
- atomic_inc(&xfs_Gqm->qm_totaldquots);
-
- return B_TRUE;
+ while (!list_empty(&dispose_list)) {
+ dqp = list_first_entry(&dispose_list, struct xfs_dquot,
+ q_freelist);
+ list_del_init(&dqp->q_freelist);
+ xfs_qm_dqfree_one(dqp);
+ }
+out:
+ return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure;
}
-
/*
* Start a transaction and write the incore superblock changes to
* disk. flags parameter indicates which fields have changed.
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 9b4f3adefbc..9a9b997e1a0 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -26,24 +26,12 @@
struct xfs_qm;
struct xfs_inode;
-extern uint ndquot;
extern struct mutex xfs_Gqm_lock;
extern struct xfs_qm *xfs_Gqm;
extern kmem_zone_t *qm_dqzone;
extern kmem_zone_t *qm_dqtrxzone;
/*
- * Ditto, for xfs_qm_dqreclaim_one.
- */
-#define XFS_QM_RECLAIM_MAX_RESTARTS 4
-
-/*
- * Ideal ratio of free to in use dquots. Quota manager makes an attempt
- * to keep this balance.
- */
-#define XFS_QM_DQFREE_RATIO 2
-
-/*
* Dquot hashtable constants/threshold values.
*/
#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t))
@@ -74,7 +62,6 @@ typedef struct xfs_qm {
int qm_dqfrlist_cnt;
atomic_t qm_totaldquots; /* total incore dquots */
uint qm_nrefs; /* file systems with quota on */
- int qm_dqfree_ratio;/* ratio of free to inuse dquots */
kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */
kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */
} xfs_qm_t;
@@ -143,7 +130,6 @@ extern int xfs_qm_quotacheck(xfs_mount_t *);
extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
/* dquot stuff */
-extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **);
extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint);
extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c
index 8671a0b3264..5729ba57087 100644
--- a/fs/xfs/xfs_qm_stats.c
+++ b/fs/xfs/xfs_qm_stats.c
@@ -42,9 +42,9 @@ static int xqm_proc_show(struct seq_file *m, void *v)
{
/* maximum; incore; ratio free to inuse; freelist */
seq_printf(m, "%d\t%d\t%d\t%u\n",
- ndquot,
+ 0,
xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
- xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
+ 0,
xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
return 0;
}
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index eafbcff81f3..711a86e39ff 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -813,11 +813,11 @@ xfs_qm_export_dquot(
(XFS_IS_OQUOTA_ENFORCED(mp) &&
(dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
dst->d_id != 0) {
- if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
+ if (((int) dst->d_bcount > (int) dst->d_blk_softlimit) &&
(dst->d_blk_softlimit > 0)) {
ASSERT(dst->d_btimer != 0);
}
- if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
+ if (((int) dst->d_icount > (int) dst->d_ino_softlimit) &&
(dst->d_ino_softlimit > 0)) {
ASSERT(dst->d_itimer != 0);
}
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 6b6df5802e9..bb134a81993 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -733,11 +733,10 @@ DEFINE_EVENT(xfs_dquot_class, name, \
DEFINE_DQUOT_EVENT(xfs_dqadjust);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_done);
DEFINE_DQUOT_EVENT(xfs_dqattach_found);
DEFINE_DQUOT_EVENT(xfs_dqattach_get);
-DEFINE_DQUOT_EVENT(xfs_dqinit);
-DEFINE_DQUOT_EVENT(xfs_dqreuse);
DEFINE_DQUOT_EVENT(xfs_dqalloc);
DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
DEFINE_DQUOT_EVENT(xfs_dqread);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 329b06aba1c..7adcdf15ae0 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1151,8 +1151,8 @@ xfs_trans_add_item(
{
struct xfs_log_item_desc *lidp;
- ASSERT(lip->li_mountp = tp->t_mountp);
- ASSERT(lip->li_ailp = tp->t_mountp->m_ail);
+ ASSERT(lip->li_mountp == tp->t_mountp);
+ ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS);
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 4d00ee67792..c4ba366d24e 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -649,12 +649,12 @@ xfs_trans_dqresv(
* nblks.
*/
if (hardlimit > 0ULL &&
- hardlimit <= nblks + *resbcountp) {
+ hardlimit < nblks + *resbcountp) {
xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
goto error_return;
}
if (softlimit > 0ULL &&
- softlimit <= nblks + *resbcountp) {
+ softlimit < nblks + *resbcountp) {
if ((timer != 0 && get_seconds() > timer) ||
(warns != 0 && warns >= warnlimit)) {
xfs_quota_warn(mp, dqp,
@@ -677,11 +677,13 @@ xfs_trans_dqresv(
if (!softlimit)
softlimit = q->qi_isoftlimit;
- if (hardlimit > 0ULL && count >= hardlimit) {
+ if (hardlimit > 0ULL &&
+ hardlimit < ninos + count) {
xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
goto error_return;
}
- if (softlimit > 0ULL && count >= softlimit) {
+ if (softlimit > 0ULL &&
+ softlimit < ninos + count) {
if ((timer != 0 && get_seconds() > timer) ||
(warns != 0 && warns >= warnlimit)) {
xfs_quota_warn(mp, dqp,