diff options
author | Nick Piggin <npiggin@suse.de> | 2009-01-04 12:00:53 -0800 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2009-01-18 10:35:43 -0800 |
commit | 8f0346e65d22136372207b6507bc646f9efb2804 (patch) | |
tree | 27a31a4376fb14c72fa702a27d6f5075662fd440 | |
parent | c9bb99e4921c92adfbfee5c17b2bfdd512da33c7 (diff) |
fs: symlink write_begin allocation context fix
commit 54566b2c1594c2326a645a3551f9d989f7ba3c5e upstream.
With the write_begin/write_end aops, page_symlink was broken because it
could no longer pass a GFP_NOFS type mask into the point where the
allocations happened. They are done in write_begin, which would always
assume that the filesystem can be entered from reclaim. This bug could
cause filesystem deadlocks.
The funny thing with having a gfp_t mask there is that it doesn't really
allow the caller to arbitrarily tinker with the context in which it can be
called. It couldn't ever be GFP_ATOMIC, for example, because it needs to
take the page lock. The only thing any callers care about is __GFP_FS
anyway, so turn that into a single flag.
Add a new flag for write_begin, AOP_FLAG_NOFS. Filesystems can now act on
this flag in their write_begin function. Change __grab_cache_page to
accept a nofs argument as well, to honour that flag (while we're there,
change the name to grab_cache_page_write_begin which is more instructive
and does away with random leading underscores).
This is really a more flexible way to go in the end anyway -- if a
filesystem happens to want any extra allocations aside from the pagecache
ones in ints write_begin function, it may now use GFP_KERNEL (rather than
GFP_NOFS) for common case allocations (eg. ocfs2_alloc_write_ctxt, for a
random example).
[kosaki.motohiro@jp.fujitsu.com: fix ubifs]
[kosaki.motohiro@jp.fujitsu.com: fix fuse]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <stable@kernel.org>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Cleaned up the calling convention: just pass in the AOP flags
untouched to the grab_cache_page_write_begin() function. That
just simplifies everybody, and may even allow future expansion of the
logic. - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r-- | fs/affs/file.c | 2 | ||||
-rw-r--r-- | fs/buffer.c | 4 | ||||
-rw-r--r-- | fs/ext3/inode.c | 2 | ||||
-rw-r--r-- | fs/ext3/namei.c | 3 | ||||
-rw-r--r-- | fs/ext4/inode.c | 4 | ||||
-rw-r--r-- | fs/ext4/namei.c | 3 | ||||
-rw-r--r-- | fs/fuse/file.c | 4 | ||||
-rw-r--r-- | fs/gfs2/ops_address.c | 2 | ||||
-rw-r--r-- | fs/hostfs/hostfs_kern.c | 2 | ||||
-rw-r--r-- | fs/jffs2/file.c | 2 | ||||
-rw-r--r-- | fs/libfs.c | 2 | ||||
-rw-r--r-- | fs/namei.c | 13 | ||||
-rw-r--r-- | fs/nfs/file.c | 2 | ||||
-rw-r--r-- | fs/reiserfs/inode.c | 2 | ||||
-rw-r--r-- | fs/smbfs/file.c | 2 | ||||
-rw-r--r-- | fs/ubifs/file.c | 9 | ||||
-rw-r--r-- | include/linux/fs.h | 5 | ||||
-rw-r--r-- | include/linux/pagemap.h | 3 | ||||
-rw-r--r-- | mm/filemap.c | 17 |
19 files changed, 48 insertions, 35 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c index 1377b1240b6..9246cb4aa01 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -628,7 +628,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping } index = pos >> PAGE_CACHE_SHIFT; - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; *pagep = page; diff --git a/fs/buffer.c b/fs/buffer.c index 95a165d3cfe..a542f970737 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1988,7 +1988,7 @@ int block_write_begin(struct file *file, struct address_space *mapping, page = *pagep; if (page == NULL) { ownpage = 1; - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { status = -ENOMEM; goto out; @@ -2494,7 +2494,7 @@ int nobh_write_begin(struct file *file, struct address_space *mapping, from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; *pagep = page; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 507d8689b11..c5f40a05bca 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1152,7 +1152,7 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping, to = from + len; retry: - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; *pagep = page; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index de13e919cd8..1f5538263b6 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2187,8 +2187,7 @@ retry: * We have a transaction open. All is sweetness. It also sets * i_size in generic_commit_write(). */ - err = __page_symlink(inode, symname, l, - mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + err = __page_symlink(inode, symname, l, 1); if (err) { drop_nlink(inode); ext3_mark_inode_dirty(handle, inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 846a79096c2..d77f674d393 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1370,7 +1370,7 @@ retry: goto out; } - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { ext4_journal_stop(handle); ret = -ENOMEM; @@ -2421,7 +2421,7 @@ retry: goto out; } - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { ext4_journal_stop(handle); ret = -ENOMEM; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 387ad98350c..d626533f3c1 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2216,8 +2216,7 @@ retry: * We have a transaction open. All is sweetness. It also sets * i_size in generic_commit_write(). */ - err = __page_symlink(inode, symname, l, - mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + err = __page_symlink(inode, symname, l, 1); if (err) { clear_nlink(inode); ext4_mark_inode_dirty(handle, inode); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2bada6bbc31..c8206dbc1d8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -644,7 +644,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping, { pgoff_t index = pos >> PAGE_CACHE_SHIFT; - *pagep = __grab_cache_page(mapping, index); + *pagep = grab_cache_page_write_begin(mapping, index, flags); if (!*pagep) return -ENOMEM; return 0; @@ -777,7 +777,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, break; err = -ENOMEM; - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, 0); if (!page) break; diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index e64a1b04117..c75df0624b5 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -675,7 +675,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, goto out_trans_fail; error = -ENOMEM; - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); *pagep = page; if (unlikely(!page)) goto out_endtrans; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index d6ecabf4d23..5e17cae9fe2 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -500,7 +500,7 @@ int hostfs_write_begin(struct file *file, struct address_space *mapping, { pgoff_t index = pos >> PAGE_CACHE_SHIFT; - *pagep = __grab_cache_page(mapping, index); + *pagep = grab_cache_page_write_begin(mapping, index, flags); if (!*pagep) return -ENOMEM; return 0; diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 5a98aa87c85..5edc2bf2058 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -132,7 +132,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, uint32_t pageofs = index << PAGE_CACHE_SHIFT; int ret = 0; - pg = __grab_cache_page(mapping, index); + pg = grab_cache_page_write_begin(mapping, index, flags); if (!pg) return -ENOMEM; *pagep = pg; diff --git a/fs/libfs.c b/fs/libfs.c index 1add676a19d..8fc2407d2fe 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -360,7 +360,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping, index = pos >> PAGE_CACHE_SHIFT; from = pos & (PAGE_CACHE_SIZE - 1); - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; diff --git a/fs/namei.c b/fs/namei.c index 50c21930c35..2259d21ae28 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2796,18 +2796,23 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) } } -int __page_symlink(struct inode *inode, const char *symname, int len, - gfp_t gfp_mask) +/* + * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS + */ +int __page_symlink(struct inode *inode, const char *symname, int len, int nofs) { struct address_space *mapping = inode->i_mapping; struct page *page; void *fsdata; int err; char *kaddr; + unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE; + if (nofs) + flags |= AOP_FLAG_NOFS; retry: err = pagecache_write_begin(NULL, mapping, 0, len-1, - AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); + flags, &page, &fsdata); if (err) goto fail; @@ -2831,7 +2836,7 @@ fail: int page_symlink(struct inode *inode, const char *symname, int len) { return __page_symlink(inode, symname, len, - mapping_gfp_mask(inode->i_mapping)); + !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS)); } const struct inode_operations page_symlink_inode_operations = { diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 78460657f5c..30541f0ea39 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -351,7 +351,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, file->f_path.dentry->d_name.name, mapping->host->i_ino, len, (long long) pos); - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; *pagep = page; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 5699171212a..8c2615e802c 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2565,7 +2565,7 @@ static int reiserfs_write_begin(struct file *file, } index = pos >> PAGE_CACHE_SHIFT; - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; *pagep = page; diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index e4f8d51a555..92d5e8ffb63 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -297,7 +297,7 @@ static int smb_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { pgoff_t index = pos >> PAGE_CACHE_SHIFT; - *pagep = __grab_cache_page(mapping, index); + *pagep = grab_cache_page_write_begin(mapping, index, flags); if (!*pagep) return -ENOMEM; return 0; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 3d698e2022b..40033dccbd9 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -213,7 +213,8 @@ static void release_existing_page_budget(struct ubifs_info *c) } static int write_begin_slow(struct address_space *mapping, - loff_t pos, unsigned len, struct page **pagep) + loff_t pos, unsigned len, struct page **pagep, + unsigned flags) { struct inode *inode = mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -241,7 +242,7 @@ static int write_begin_slow(struct address_space *mapping, if (unlikely(err)) return err; - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); if (unlikely(!page)) { ubifs_release_budget(c, &req); return -ENOMEM; @@ -432,7 +433,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, return -EROFS; /* Try out the fast-path part first */ - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); if (unlikely(!page)) return -ENOMEM; @@ -477,7 +478,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, unlock_page(page); page_cache_release(page); - return write_begin_slow(mapping, pos, len, pagep); + return write_begin_slow(mapping, pos, len, pagep, flags); } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 580b513668f..d621217149f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -403,6 +403,9 @@ enum positive_aop_returns { #define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ #define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ +#define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct + * helper code (eg buffer layer) + * to clear GFP_FS from alloc */ /* * oh the beauties of C type declarations. @@ -1959,7 +1962,7 @@ extern int page_readlink(struct dentry *, char __user *, int); extern void *page_follow_link_light(struct dentry *, struct nameidata *); extern void page_put_link(struct dentry *, struct nameidata *, void *); extern int __page_symlink(struct inode *inode, const char *symname, int len, - gfp_t gfp_mask); + int nofs); extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern int generic_readlink(struct dentry *, char __user *, int); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 5da31c12101..62efcebf876 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -213,7 +213,8 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, int tag, unsigned int nr_pages, struct page **pages); -struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index); +struct page *grab_cache_page_write_begin(struct address_space *mapping, + pgoff_t index, unsigned flags); /* * Returns locked page at given index in given cache, creating it if needed. diff --git a/mm/filemap.c b/mm/filemap.c index 1cd0b87861b..f3033d0f5e5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2033,7 +2033,7 @@ int pagecache_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; struct page *page; again: - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, flags); *pagep = page; if (!page) return -ENOMEM; @@ -2197,19 +2197,24 @@ EXPORT_SYMBOL(generic_file_direct_write); * Find or create a page at the given pagecache position. Return the locked * page. This function is specifically for buffered writes. */ -struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index) +struct page *grab_cache_page_write_begin(struct address_space *mapping, + pgoff_t index, unsigned flags) { int status; struct page *page; + gfp_t gfp_notmask = 0; + if (flags & AOP_FLAG_NOFS) + gfp_notmask = __GFP_FS; repeat: page = find_lock_page(mapping, index); if (likely(page)) return page; - page = page_cache_alloc(mapping); + page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask); if (!page) return NULL; - status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); + status = add_to_page_cache_lru(page, mapping, index, + GFP_KERNEL & ~gfp_notmask); if (unlikely(status)) { page_cache_release(page); if (status == -EEXIST) @@ -2218,7 +2223,7 @@ repeat: } return page; } -EXPORT_SYMBOL(__grab_cache_page); +EXPORT_SYMBOL(grab_cache_page_write_begin); static ssize_t generic_perform_write_2copy(struct file *file, struct iov_iter *i, loff_t pos) @@ -2263,7 +2268,7 @@ static ssize_t generic_perform_write_2copy(struct file *file, break; } - page = __grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, 0); if (!page) { status = -ENOMEM; break; |