aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/aio.c4
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/extent-tree.c35
-rw-r--r--fs/btrfs/extent_io.c33
-rw-r--r--fs/btrfs/file.c114
-rw-r--r--fs/btrfs/inode.c9
-rw-r--r--fs/ceph/dir.c30
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/compat.c69
-rw-r--r--fs/dcache.c121
-rw-r--r--fs/exec.c18
-rw-r--r--fs/exofs/namei.c8
-rw-r--r--fs/exportfs/expfs.c11
-rw-r--r--fs/ext2/namei.c9
-rw-r--r--fs/ext3/namei.c7
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fcntl.c37
-rw-r--r--fs/fhandle.c265
-rw-r--r--fs/file_table.c55
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/dentry.c2
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/hfs/dir.c50
-rw-r--r--fs/inode.c32
-rw-r--r--fs/internal.h13
-rw-r--r--fs/isofs/export.c8
-rw-r--r--fs/jfs/namei.c5
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c1493
-rw-r--r--fs/namespace.c16
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/nfs4_fs.h10
-rw-r--r--fs/nfs/nfs4filelayoutdev.c4
-rw-r--r--fs/nfs/nfs4proc.c131
-rw-r--r--fs/nfs/nfs4state.c29
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsctl.c21
-rw-r--r--fs/nfsd/nfs4callback.c2
-rw-r--r--fs/nfsd/nfs4state.c13
-rw-r--r--fs/nfsd/nfs4xdr.c4
-rw-r--r--fs/nilfs2/namei.c8
-rw-r--r--fs/nilfs2/segment.c3
-rw-r--r--fs/ocfs2/dcache.c2
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/quota.h3
-rw-r--r--fs/ocfs2/quota_global.c27
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/ocfs2/super.c7
-rw-r--r--fs/open.c134
-rw-r--r--fs/partitions/osf.c12
-rw-r--r--fs/proc/base.c30
-rw-r--r--fs/proc/inode.c8
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_sysctl.c7
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/namei.c6
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/stat.c7
-rw-r--r--fs/statfs.c176
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/udf/namei.c18
-rw-r--r--fs/ufs/namei.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c11
-rw-r--r--fs/xfs/xfs_mru_cache.c2
80 files changed, 1818 insertions, 1485 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 3db9caa57ed..7cb53aafac1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
def_bool n
config EXPORTFS
- tristate
+ bool
config FILE_LOCKING
bool "Enable POSIX file locking API" if EXPERT
diff --git a/fs/Makefile b/fs/Makefile
index a7f7cef0c0c..ba01202844c 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
+obj-$(CONFIG_FHANDLE) += fhandle.o
+
obj-y += quota/
obj-$(CONFIG_PROC_FS) += proc/
diff --git a/fs/aio.c b/fs/aio.c
index 26869cde395..7f54f43b8f7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -85,7 +85,7 @@ static int __init aio_setup(void)
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
- aio_wq = create_workqueue("aio");
+ aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */
abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
BUG_ON(!aio_wq || !abe_pool);
@@ -577,7 +577,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
spin_unlock(&fput_lock);
- queue_work(aio_wq, &fput_work);
+ schedule_work(&fput_work);
} else {
req->ki_filp = NULL;
really_put_req(ctx, req);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6f820fa23df..7f78cc78fdd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
u64 disk_total; /* total bytes on disk, takes mirrors into
account */
+ /*
+ * we bump reservation progress every time we decrement
+ * bytes_reserved. This way people waiting for reservations
+ * know something good has happened and they can check
+ * for progress. The number here isn't to be trusted, it
+ * just shows reclaim activity
+ */
+ unsigned long reservation_progress;
+
int full; /* indicates that we cannot allocate any more
chunks for this space */
int force_alloc; /* set if we need to force a chunk alloc for
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ff27d7a477b..b4ffad859ad 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
int len = *max_len;
int type;
- if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) ||
- (connectable && len < BTRFS_FID_SIZE_CONNECTABLE))
+ if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
+ *max_len = BTRFS_FID_SIZE_CONNECTABLE;
return 255;
+ } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
+ *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
+ return 255;
+ }
len = BTRFS_FID_SIZE_NON_CONNECTABLE;
type = FILEID_BTRFS_WITHOUT_PARENT;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 588ff984987..7b3089b5c2d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
u64 max_reclaim;
u64 reclaimed = 0;
long time_left;
- int pause = 1;
int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0;
+ unsigned long progress;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
smp_mb();
reserved = space_info->bytes_reserved;
+ progress = space_info->reservation_progress;
if (reserved == 0)
return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
spin_lock(&space_info->lock);
- if (reserved > space_info->bytes_reserved) {
- loops = 0;
+ if (reserved > space_info->bytes_reserved)
reclaimed += reserved - space_info->bytes_reserved;
- } else {
- loops++;
- }
reserved = space_info->bytes_reserved;
spin_unlock(&space_info->lock);
+ loops++;
+
if (reserved == 0 || reclaimed >= max_reclaim)
break;
if (trans && trans->transaction->blocked)
return -EAGAIN;
- __set_current_state(TASK_INTERRUPTIBLE);
- time_left = schedule_timeout(pause);
+ time_left = schedule_timeout_interruptible(1);
/* We were interrupted, exit */
if (time_left)
break;
- pause <<= 1;
- if (pause > HZ / 10)
- pause = HZ / 10;
+ /* we've kicked the IO a few times, if anything has been freed,
+ * exit. There is no sense in looping here for a long time
+ * when we really need to commit the transaction, or there are
+ * just too many writers without enough free space
+ */
+
+ if (loops > 3) {
+ smp_mb();
+ if (progress != space_info->reservation_progress)
+ break;
+ }
}
return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
if (num_bytes) {
spin_lock(&space_info->lock);
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
}
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
if (block_rsv->reserved >= block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_reserved -= num_bytes;
+ sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
}
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
to_reserve = 0;
}
spin_unlock(&BTRFS_I(inode)->accounting_lock);
-
to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
btrfs_set_block_group_used(&cache->item, old_val);
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
cache->space_info->bytes_used += num_bytes;
cache->space_info->disk_used += num_bytes * factor;
spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
if (reserved) {
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
+ cache->space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
+ space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
if (ret) {
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_reserved -= buf->len;
+ cache->space_info->reservation_progress++;
spin_unlock(&cache->space_info->lock);
}
goto out;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fd3f172e94e..714adc4ac4c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3046,17 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
while (!end) {
- off = extent_map_end(em);
- if (off >= max)
- end = 1;
+ u64 offset_in_extent;
+
+ /* break if the extent we found is outside the range */
+ if (em->start >= max || extent_map_end(em) < off)
+ break;
+
+ /*
+ * get_extent may return an extent that starts before our
+ * requested range. We have to make sure the ranges
+ * we return to fiemap always move forward and don't
+ * overlap, so adjust the offsets here
+ */
+ em_start = max(em->start, off);
- em_start = em->start;
- em_len = em->len;
+ /*
+ * record the offset from the start of the extent
+ * for adjusting the disk offset below
+ */
+ offset_in_extent = em_start - em->start;
em_end = extent_map_end(em);
+ em_len = em_end - em_start;
emflags = em->flags;
disko = 0;
flags = 0;
+ /*
+ * bump off for our next call to get_extent
+ */
+ off = extent_map_end(em);
+ if (off >= max)
+ end = 1;
+
if (em->block_start == EXTENT_MAP_LAST_BYTE) {
end = 1;
flags |= FIEMAP_EXTENT_LAST;
@@ -3067,7 +3088,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
} else {
- disko = em->block_start;
+ disko = em->block_start + offset_in_extent;
}
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7084140d594..f447b783bb8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
/* Flush processor's dcache for this page */
flush_dcache_page(page);
+
+ /*
+ * if we get a partial write, we can end up with
+ * partially up to date pages. These add
+ * a lot of complexity, so make sure they don't
+ * happen by forcing this copy to be retried.
+ *
+ * The rest of the btrfs_file_write code will fall
+ * back to page at a time copies after we return 0.
+ */
+ if (!PageUptodate(page) && copied < count)
+ copied = 0;
+
iov_iter_advance(i, copied);
write_bytes -= copied;
total_copied += copied;
@@ -763,6 +776,27 @@ out:
}
/*
+ * on error we return an unlocked page and the error value
+ * on success we return a locked page and 0
+ */
+static int prepare_uptodate_page(struct page *page, u64 pos)
+{
+ int ret = 0;
+
+ if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
+ ret = btrfs_readpage(NULL, page);
+ if (ret)
+ return ret;
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ return -EIO;
+ }
+ }
+ return 0;
+}
+
+/*
* this gets pages into the page cache and locks them down, it also properly
* waits for data=ordered extents to finish before allowing the pages to be
* modified.
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
unsigned long index = pos >> PAGE_CACHE_SHIFT;
struct inode *inode = fdentry(file)->d_inode;
int err = 0;
+ int faili = 0;
u64 start_pos;
u64 last_pos;
@@ -794,15 +829,24 @@ again:
for (i = 0; i < num_pages; i++) {
pages[i] = grab_cache_page(inode->i_mapping, index + i);
if (!pages[i]) {
- int c;
- for (c = i - 1; c >= 0; c--) {
- unlock_page(pages[c]);
- page_cache_release(pages[c]);
- }
- return -ENOMEM;
+ faili = i - 1;
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ if (i == 0)
+ err = prepare_uptodate_page(pages[i], pos);
+ if (i == num_pages - 1)
+ err = prepare_uptodate_page(pages[i],
+ pos + write_bytes);
+ if (err) {
+ page_cache_release(pages[i]);
+ faili = i - 1;
+ goto fail;
}
wait_on_page_writeback(pages[i]);
}
+ err = 0;
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -842,6 +886,14 @@ again:
WARN_ON(!PageLocked(pages[i]));
}
return 0;
+fail:
+ while (faili >= 0) {
+ unlock_page(pages[faili]);
+ page_cache_release(pages[faili]);
+ faili--;
+ }
+ return err;
+
}
static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct page *pinned[2];
struct page **pages = NULL;
struct iov_iter i;
loff_t *ppos = &iocb->ki_pos;
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
- pinned[0] = NULL;
- pinned[1] = NULL;
-
start_pos = pos;
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
first_index = pos >> PAGE_CACHE_SHIFT;
last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
- /*
- * there are lots of better ways to do this, but this code
- * makes sure the first and last page in the file range are
- * up to date and ready for cow
- */
- if ((pos & (PAGE_CACHE_SIZE - 1))) {
- pinned[0] = grab_cache_page(inode->i_mapping, first_index);
- if (!PageUptodate(pinned[0])) {
- ret = btrfs_readpage(NULL, pinned[0]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[0]);
- } else {
- unlock_page(pinned[0]);
- }
- }
- if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
- pinned[1] = grab_cache_page(inode->i_mapping, last_index);
- if (!PageUptodate(pinned[1])) {
- ret = btrfs_readpage(NULL, pinned[1]);
- BUG_ON(ret);
- wait_on_page_locked(pinned[1]);
- } else {
- unlock_page(pinned[1]);
- }
- }
-
while (iov_iter_count(&i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
size_t write_bytes = min(iov_iter_count(&i),
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
copied = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, &i);
- dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+
+ /*
+ * if we have trouble faulting in the pages, fall
+ * back to one page at a time
+ */
+ if (copied < write_bytes)
+ nrptrs = 1;
+
+ if (copied == 0)
+ dirty_pages = 0;
+ else
+ dirty_pages = (copied + offset +
+ PAGE_CACHE_SIZE - 1) >>
+ PAGE_CACHE_SHIFT;
if (num_pages > dirty_pages) {
if (copied > 0)
@@ -1069,10 +1103,6 @@ out:
err = ret;
kfree(pages);
- if (pinned[0])
- page_cache_release(pinned[0]);
- if (pinned[1])
- page_cache_release(pinned[1]);
*ppos = pos;
/*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0efdb65953c..4a0107e1874 100644
--- a/