diff options
Diffstat (limited to 'fs/gfs2')
36 files changed, 2113 insertions, 1628 deletions
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index f69ac0af549..3088e2a38e3 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -49,10 +49,6 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) if (!ip->i_eattr) return NULL; - acl = get_cached_acl(&ip->i_inode, type); - if (acl != ACL_NOT_CACHED) - return acl; - name = gfs2_acl_name(type); if (name == NULL) return ERR_PTR(-EINVAL); @@ -68,19 +64,7 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) return acl; } -static int gfs2_set_mode(struct inode *inode, umode_t mode) -{ - int error = 0; - - if (mode != inode->i_mode) { - inode->i_mode = mode; - mark_inode_dirty(inode); - } - - return error; -} - -static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl) +int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int error; int len; @@ -88,219 +72,50 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl) const char *name = gfs2_acl_name(type); BUG_ON(name == NULL); - len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); - if (len == 0) - return 0; - data = kmalloc(len, GFP_NOFS); - if (data == NULL) - return -ENOMEM; - error = posix_acl_to_xattr(&init_user_ns, acl, data, len); - if (error < 0) - goto out; - error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); - if (!error) - set_cached_acl(inode, type, acl); -out: - kfree(data); - return error; -} - -int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode) -{ - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct posix_acl *acl; - umode_t mode = inode->i_mode; - int error = 0; - - if (!sdp->sd_args.ar_posix_acl) - return 0; - if (S_ISLNK(inode->i_mode)) - return 0; - - acl = gfs2_get_acl(&dip->i_inode, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (!acl) { - mode &= ~current_umask(); - return gfs2_set_mode(inode, mode); - } - - if (S_ISDIR(inode->i_mode)) { - error = gfs2_acl_set(inode, ACL_TYPE_DEFAULT, acl); - if (error) - goto out; - } - - error = posix_acl_create(&acl, GFP_NOFS, &mode); - if (error < 0) - return error; - - if (error == 0) - goto munge; - - error = gfs2_acl_set(inode, ACL_TYPE_ACCESS, acl); - if (error) - goto out; -munge: - error = gfs2_set_mode(inode, mode); -out: - posix_acl_release(acl); - return error; -} - -int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) -{ - struct inode *inode = &ip->i_inode; - struct posix_acl *acl; - char *data; - unsigned int len; - int error; - - acl = gfs2_get_acl(&ip->i_inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (!acl) - return gfs2_setattr_simple(inode, attr); - - error = posix_acl_chmod(&acl, GFP_NOFS, attr->ia_mode); - if (error) - return error; - - len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); - data = kmalloc(len, GFP_NOFS); - error = -ENOMEM; - if (data == NULL) - goto out; - posix_acl_to_xattr(&init_user_ns, acl, data, len); - error = gfs2_xattr_acl_chmod(ip, attr, data); - kfree(data); - set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl); - -out: - posix_acl_release(acl); - return error; -} - -static int gfs2_acl_type(const char *name) -{ - if (strcmp(name, GFS2_POSIX_ACL_ACCESS) == 0) - return ACL_TYPE_ACCESS; - if (strcmp(name, GFS2_POSIX_ACL_DEFAULT) == 0) - return ACL_TYPE_DEFAULT; - return -EINVAL; -} - -static int gfs2_xattr_system_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int xtype) -{ - struct inode *inode = dentry->d_inode; - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct posix_acl *acl; - int type; - int error; - - if (!sdp->sd_args.ar_posix_acl) - return -EOPNOTSUPP; - - type = gfs2_acl_type(name); - if (type < 0) - return type; - - acl = gfs2_get_acl(inode, type); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl == NULL) - return -ENODATA; - - error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); - posix_acl_release(acl); - - return error; -} - -static int gfs2_xattr_system_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, - int xtype) -{ - struct inode *inode = dentry->d_inode; - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct posix_acl *acl = NULL; - int error = 0, type; - - if (!sdp->sd_args.ar_posix_acl) - return -EOPNOTSUPP; - type = gfs2_acl_type(name); - if (type < 0) - return type; - if (flags & XATTR_CREATE) - return -EINVAL; - if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) - return value ? -EACCES : 0; - if (!uid_eq(current_fsuid(), inode->i_uid) && !capable(CAP_FOWNER)) - return -EPERM; - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - if (!value) - goto set_acl; - - acl = posix_acl_from_xattr(&init_user_ns, value, size); - if (!acl) { - /* - * acl_set_file(3) may request that we set default ACLs with - * zero length -- defend (gracefully) against that here. - */ - goto out; - } - if (IS_ERR(acl)) { - error = PTR_ERR(acl); - goto out; - } - - error = posix_acl_valid(acl); - if (error) - goto out_release; - - error = -EINVAL; - if (acl->a_count > GFS2_ACL_MAX_ENTRIES) - goto out_release; + if (acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode))) + return -E2BIG; if (type == ACL_TYPE_ACCESS) { umode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + if (error < 0) + return error; - if (error <= 0) { - posix_acl_release(acl); + if (error == 0) acl = NULL; - if (error < 0) - return error; + if (mode != inode->i_mode) { + inode->i_mode = mode; + mark_inode_dirty(inode); } - - error = gfs2_set_mode(inode, mode); - if (error) - goto out_release; } -set_acl: - error = __gfs2_xattr_set(inode, name, value, size, 0, GFS2_EATYPE_SYS); - if (!error) { - if (acl) - set_cached_acl(inode, type, acl); - else - forget_cached_acl(inode, type); + if (acl) { + len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); + if (len == 0) + return 0; + data = kmalloc(len, GFP_NOFS); + if (data == NULL) + return -ENOMEM; + error = posix_acl_to_xattr(&init_user_ns, acl, data, len); + if (error < 0) + goto out; + } else { + data = NULL; + len = 0; } -out_release: - posix_acl_release(acl); + + error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); + if (error) + goto out; + + if (acl) + set_cached_acl(inode, type, acl); + else + forget_cached_acl(inode, type); out: + kfree(data); return error; } - -const struct xattr_handler gfs2_xattr_system_handler = { - .prefix = XATTR_SYSTEM_PREFIX, - .flags = GFS2_EATYPE_SYS, - .get = gfs2_xattr_system_get, - .set = gfs2_xattr_system_set, -}; - diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index 0da38dc7efe..2d65ec4cd4b 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -14,11 +14,9 @@ #define GFS2_POSIX_ACL_ACCESS "posix_acl_access" #define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" -#define GFS2_ACL_MAX_ENTRIES 25 +#define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12) extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type); -extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); -extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); -extern const struct xattr_handler gfs2_xattr_system_handler; +extern int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); #endif /* __ACL_DOT_H__ */ diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index ee48ad37d9c..805b37fed63 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -21,6 +21,7 @@ #include <linux/gfs2_ondisk.h> #include <linux/backing-dev.h> #include <linux/aio.h> +#include <trace/events/writeback.h> #include "gfs2.h" #include "incore.h" @@ -122,14 +123,13 @@ out: } /** - * gfs2_writeback_writepage - Write page for writeback mappings + * gfs2_writepage - Write page for writeback mappings * @page: The page * @wbc: The writeback control * */ -static int gfs2_writeback_writepage(struct page *page, - struct writeback_control *wbc) +static int gfs2_writepage(struct page *page, struct writeback_control *wbc) { int ret; @@ -141,32 +141,6 @@ static int gfs2_writeback_writepage(struct page *page, } /** - * gfs2_ordered_writepage - Write page for ordered data files - * @page: The page to write - * @wbc: The writeback control - * - */ - -static int gfs2_ordered_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - int ret; - - ret = gfs2_writepage_common(page, wbc); - if (ret <= 0) - return ret; - - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1); - return block_write_full_page(page, gfs2_get_block_noalloc, wbc); -} - -/** * __gfs2_jdata_writepage - The core of jdata writepage * @page: The page to write * @wbc: The writeback control @@ -257,13 +231,11 @@ static int gfs2_writepages(struct address_space *mapping, static int gfs2_write_jdata_pagevec(struct address_space *mapping, struct writeback_control *wbc, struct pagevec *pvec, - int nr_pages, pgoff_t end) + int nr_pages, pgoff_t end, + pgoff_t *done_index) { struct inode *inode = mapping->host; struct gfs2_sbd *sdp = GFS2_SB(inode); - loff_t i_size = i_size_read(inode); - pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; - unsigned offset = i_size & (PAGE_CACHE_SIZE-1); unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); int i; int ret; @@ -275,40 +247,83 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, for(i = 0; i < nr_pages; i++) { struct page *page = pvec->pages[i]; + /* + * At this point, the page may be truncated or + * invalidated (changing page->mapping to NULL), or + * even swizzled back from swapper_space to tmpfs file + * mapping. However, page->index will not change + * because we have a reference on the page. + */ + if (page->index > end) { + /* + * can't be range_cyclic (1st pass) because + * end == -1 in that case. + */ + ret = 1; + break; + } + + *done_index = page->index; + lock_page(page); if (unlikely(page->mapping != mapping)) { +continue_unlock: unlock_page(page); continue; } - if (!wbc->range_cyclic && page->index > end) { - ret = 1; - unlock_page(page); - continue; + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; } - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - continue; + if (PageWriteback(page)) { + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + else + goto continue_unlock; } - /* Is the page fully outside i_size? (truncate in progress) */ - if (page->index > end_index || (page->index == end_index && !offset)) { - page->mapping->a_ops->invalidatepage(page, 0, - PAGE_CACHE_SIZE); - unlock_page(page); - continue; - } + BUG_ON(PageWriteback(page)); + if (!clear_page_dirty_for_io(page)) + goto continue_unlock; + + trace_wbc_writepage(wbc, mapping->backing_dev_info); ret = __gfs2_jdata_writepage(page, wbc); + if (unlikely(ret)) { + if (ret == AOP_WRITEPAGE_ACTIVATE) { + unlock_page(page); + ret = 0; + } else { + + /* + * done_index is set past this page, + * so media errors will not choke + * background writeout for the entire + * file. This has consequences for + * range_cyclic semantics (ie. it may + * not be suitable for data integrity + * writeout). + */ + *done_index = page->index + 1; + ret = 1; + break; + } + } - if (ret || (--(wbc->nr_to_write) <= 0)) + /* + * We stop writing back only if we are not doing + * integrity sync. In case of integrity sync we have to + * keep going until we have written all the pages + * we tagged for writeback prior to entering this loop. + */ + if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { ret = 1; + break; + } + } gfs2_trans_end(sdp); return ret; @@ -333,51 +348,69 @@ static int gfs2_write_cache_jdata(struct address_space *mapping, int done = 0; struct pagevec pvec; int nr_pages; + pgoff_t uninitialized_var(writeback_index); pgoff_t index; pgoff_t end; - int scanned = 0; + pgoff_t done_index; + int cycled; int range_whole = 0; + int tag; pagevec_init(&pvec, 0); if (wbc->range_cyclic) { - index = mapping->writeback_index; /* Start from prev offset */ + writeback_index = mapping->writeback_index; /* prev offset */ + index = writeback_index; + if (index == 0) + cycled = 1; + else + cycled = 0; end = -1; } else { index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; - scanned = 1; + cycled = 1; /* ignore range_cyclic tests */ } + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) + tag = PAGECACHE_TAG_TOWRITE; + else + tag = PAGECACHE_TAG_DIRTY; retry: - while (!done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { - scanned = 1; - ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end); + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) + tag_pages_for_writeback(mapping, index, end); + done_index = index; + while (!done && (index <= end)) { + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end, &done_index); if (ret) done = 1; if (ret > 0) ret = 0; - pagevec_release(&pvec); cond_resched(); } - if (!scanned && !done) { + if (!cycled && !done) { /* + * range_cyclic: * We hit the last page and there is more work to be done: wrap * back to the start of the file */ - scanned = 1; + cycled = 1; index = 0; + end = writeback_index - 1; goto retry; } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) - mapping->writeback_index = index; + mapping->writeback_index = done_index; + return ret; } @@ -398,7 +431,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping, ret = gfs2_write_cache_jdata(mapping, wbc); if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) { - gfs2_log_flush(sdp, ip->i_gl); + gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); ret = gfs2_write_cache_jdata(mapping, wbc); } return ret; @@ -544,7 +577,6 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, p = kmap_atomic(page); memcpy(buf + copied, p + offset, amt); kunmap_atomic(p); - mark_page_accessed(page); page_cache_release(page); copied += amt; index++; @@ -638,12 +670,14 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); if (alloc_required) { + struct gfs2_alloc_parms ap = { .aflags = 0, }; error = gfs2_quota_lock_check(ip); if (error) goto out_unlock; requested = data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip, requested, 0); + ap.target = requested; + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_qunlock; } @@ -842,6 +876,8 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, unsigned int from = pos & (PAGE_CACHE_SIZE - 1); unsigned int to = from + len; int ret; + struct gfs2_trans *tr = current->journal_info; + BUG_ON(!tr); BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL); @@ -852,8 +888,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, goto failed; } - gfs2_trans_add_meta(ip->i_gl, dibh); - if (gfs2_is_stuffed(ip)) return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); @@ -861,6 +895,11 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, gfs2_page_add_databufs(ip, page, from, to); ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); + if (tr->tr_num_buf_new) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + else + gfs2_trans_add_meta(ip->i_gl, dibh); + if (inode == sdp->sd_rindex) { adjust_fs_space(inode); @@ -1001,11 +1040,11 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + struct address_space *mapping = inode->i_mapping; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int rv; @@ -1026,9 +1065,39 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, if (rv != 1) goto out; /* dio not valid, fall back to buffered i/o */ - rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, gfs2_get_block_direct, - NULL, NULL, 0); + /* + * Now since we are holding a deferred (CW) lock at this point, you + * might be wondering why this is ever needed. There is a case however + * where we've granted a deferred local lock against a cached exclusive + * glock. That is ok provided all granted local locks are deferred, but + * it also means that it is possible to encounter pages which are + * cached and possibly also mapped. So here we check for that and sort + * them out ahead of the dio. The glock state machine will take care of + * everything else. + * + * If in fact the cached glock state (gl->gl_state) is deferred (CW) in + * the first place, mapping->nr_pages will always be zero. + */ + if (mapping->nrpages) { + loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); + loff_t len = iov_iter_count(iter); + loff_t end = PAGE_ALIGN(offset + len) - 1; + + rv = 0; + if (len == 0) + goto out; + if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) + unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); + rv = filemap_write_and_wait_range(mapping, lstart, end); + if (rv) + goto out; + if (rw == WRITE) + truncate_inode_pages_range(mapping, lstart, end); + } + + rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, + iter, offset, + gfs2_get_block_direct, NULL, NULL, 0); out: gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); @@ -1070,30 +1139,22 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) bh = bh->b_this_page; } while(bh != head); spin_unlock(&sdp->sd_ail_lock); - gfs2_log_unlock(sdp); head = bh = page_buffers(page); do { - gfs2_log_lock(sdp); bd = bh->b_private; if (bd) { gfs2_assert_warn(sdp, bd->bd_bh == bh); - if (!list_empty(&bd->bd_list)) { - if (!buffer_pinned(bh)) - list_del_init(&bd->bd_list); - else - bd = NULL; - } - if (bd) - bd->bd_bh = NULL; + if (!list_empty(&bd->bd_list)) + list_del_init(&bd->bd_list); + bd->bd_bh = NULL; bh->b_private = NULL; - } - gfs2_log_unlock(sdp); - if (bd) kmem_cache_free(gfs2_bufdata_cachep, bd); + } bh = bh->b_this_page; } while (bh != head); + gfs2_log_unlock(sdp); return try_to_free_buffers(page); @@ -1107,7 +1168,7 @@ cannot_release: } static const struct address_space_operations gfs2_writeback_aops = { - .writepage = gfs2_writeback_writepage, + .writepage = gfs2_writepage, .writepages = gfs2_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, @@ -1123,7 +1184,7 @@ static const struct address_space_operations gfs2_writeback_aops = { }; static const struct address_space_operations gfs2_ordered_aops = { - .writepage = gfs2_ordered_writepage, + .writepage = gfs2_writepage, .writepages = gfs2_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 5e2f56fccf6..e6ee5b6e8d9 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -707,7 +707,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi * @top: The first pointer in the buffer * @bottom: One more than the last pointer * @height: the height this buffer is at - * @data: a pointer to a struct strip_mine + * @sm: a pointer to a struct strip_mine * * Returns: errno */ @@ -992,6 +992,8 @@ unlock: return err; } +#define GFS2_JTRUNC_REVOKES 8192 + /** * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files * @inode: The inode being truncated @@ -1003,8 +1005,6 @@ unlock: * if the number of pages being truncated gets too large. */ -#define GFS2_JTRUNC_REVOKES 8192 - static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) { struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -1016,7 +1016,7 @@ static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize chunk = oldsize - newsize; if (chunk > max_chunk) chunk = max_chunk; - truncate_pagecache(inode, oldsize, oldsize - chunk); + truncate_pagecache(inode, oldsize - chunk); oldsize -= chunk; gfs2_trans_end(sdp); error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); @@ -1067,7 +1067,7 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) if (journaled) error = gfs2_journaled_truncate(inode, oldsize, newsize); else - truncate_pagecache(inode, oldsize, newsize); + truncate_pagecache(inode, newsize); if (error) { brelse(dibh); @@ -1216,6 +1216,7 @@ static int do_grow(struct inode *inode, u64 size) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_alloc_parms ap = { .target = 1, }; struct buffer_head *dibh; int error; int unstuff = 0; @@ -1226,7 +1227,7 @@ static int do_grow(struct inode *inode, u64 size) if (error) return error; - error = gfs2_inplace_reserve(ip, 1, 0); + error = gfs2_inplace_reserve(ip, &ap); if (error) goto do_grow_qunlock; unstuff = 1; @@ -1279,6 +1280,7 @@ do_grow_qunlock: int gfs2_setattr_size(struct inode *inode, u64 newsize) { + struct gfs2_inode *ip = GFS2_I(inode); int ret; u64 oldsize; @@ -1294,7 +1296,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) inode_dio_wait(inode); - ret = gfs2_rs_alloc(GFS2_I(inode)); + ret = gfs2_rs_alloc(ip); if (ret) goto out; @@ -1304,6 +1306,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) goto out; } + gfs2_rs_deltree(ip->i_res); ret = do_shrink(inode, oldsize, newsize); out: put_write_access(inode); @@ -1325,6 +1328,121 @@ int gfs2_file_dealloc(struct gfs2_inode *ip) } /** + * gfs2_free_journal_extents - Free cached journal bmap info + * @jd: The journal + * + */ + +void gfs2_free_journal_extents(struct gfs2_jdesc *jd) +{ + struct gfs2_journal_extent *jext; + + while(!list_empty(&jd->extent_list)) { + jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list); + list_del(&jext->list); + kfree(jext); + } +} + +/** + * gfs2_add_jextent - Add or merge a new extent to extent cache + * @jd: The journal descriptor + * @lblock: The logical block at start of new extent + * @dblock: The physical block at start of new extent + * @blocks: Size of extent in fs blocks + * + * Returns: 0 on success or -ENOMEM + */ + +static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks) +{ + struct gfs2_journal_extent *jext; + + if (!list_empty(&jd->extent_list)) { + jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list); + if ((jext->dblock + jext->blocks) == dblock) { + jext->blocks += blocks; + return 0; + } + } + + jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS); + if (jext == NULL) + return -ENOMEM; + jext->dblock = dblock; + jext->lblock = lblock; + jext->blocks = blocks; + list_add_tail(&jext->list, &jd->extent_list); + jd->nr_extents++; + return 0; +} + +/** + * gfs2_map_journal_extents - Cache journal bmap info + * @sdp: The super block + * @jd: The journal to map + * + * Create a reusable "extent" mapping from all logical + * blocks to all physical blocks for the given journal. This will save + * us time when writing journal blocks. Most journals will have only one + * extent that maps all their logical blocks. That's because gfs2.mkfs + * arranges the journal blocks sequentially to maximize performance. + * So the extent would map the first block for the entire file length. + * However, gfs2_jadd can happen while file activity is happening, so + * those journals may not be sequential. Less likely is the case where + * the users created their own journals by mounting the metafs and + * laying it out. But it's still possible. These journals might have + * several extents. + * + * Returns: 0 on success, or error on failure + */ + +int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) +{ + u64 lblock = 0; + u64 lblock_stop; + struct gfs2_inode *ip = GFS2_I(jd->jd_inode); + struct buffer_head bh; + unsigned int shift = sdp->sd_sb.sb_bsize_shift; + u64 size; + int rc; + + lblock_stop = i_size_read(jd->jd_inode) >> shift; + size = (lblock_stop - lblock) << shift; + jd->nr_extents = 0; + WARN_ON(!list_empty(&jd->extent_list)); + + do { + bh.b_state = 0; + bh.b_blocknr = 0; + bh.b_size = size; + rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0); + if (rc || !buffer_mapped(&bh)) + goto fail; + rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift); + if (rc) + goto fail; + size -= bh.b_size; + lblock += (bh.b_size >> ip->i_inode.i_blkbits); + } while(size > 0); + + fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid, + jd->nr_extents); + return 0; + +fail: + fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n", + rc, jd->jd_jid, + (unsigned long long)(i_size_read(jd->jd_inode) - size), + jd->nr_extents); + fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n", + rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr, + bh.b_state, (unsigned long long)bh.b_size); + gfs2_free_journal_extents(jd); + return rc; +} + +/** * gfs2_write_alloc_required - figure out if a write will require an allocation * @ip: the file being written to * @offset: the offset to write to diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index 42fea03e2bd..81ded5e2aaa 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h @@ -55,5 +55,7 @@ extern int gfs2_truncatei_resume(struct gfs2_inode *ip); extern int gfs2_file_dealloc(struct gfs2_inode *ip); extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, unsigned int len); +extern int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd); +extern void gfs2_free_journal_extents(struct gfs2_jdesc *jd); #endif /* __BMAP_DOT_H__ */ diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index f2448ab2aac..d3a5d4e29ba 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -93,12 +93,9 @@ invalid_gunlock: if (!had_lock) gfs2_glock_dq_uninit(&d_gh); invalid: - if (inode && S_ISDIR(inode->i_mode)) { - if (have_submounts(dentry)) - goto valid; - shrink_dcache_parent(dentry); - } - d_drop(dentry); + if (check_submounts_and_drop(dentry) != 0) + goto valid; + dput(parent); return 0; diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 0cb4c1557f2..1a349f9a968 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -53,6 +53,8 @@ * but never before the maximum hash table size has been reached. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/buffer_head.h> @@ -507,8 +509,8 @@ static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset, goto error; return 0; error: - printk(KERN_WARNING "gfs2_check_dirent: %s (%s)\n", msg, - first ? "first in block" : "not first in block"); + pr_warn("%s: %s (%s)\n", + __func__, msg, first ? "first in block" : "not first in block"); return -EIO; } @@ -531,8 +533,7 @@ static int gfs2_dirent_offset(const void *buf) } return offset; wrong_type: - printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n", - be32_to_cpu(h->mh_type)); + pr_warn("%s: wrong block type %u\n", __func__, be32_to_cpu(h->mh_type)); return -1; } @@ -728,7 +729,7 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp); if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) { - /* printk(KERN_INFO "block num=%llu\n", leaf_no); */ + /* pr_info("block num=%llu\n", leaf_no); */ error = -EIO; } @@ -834,6 +835,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, struct gfs2_leaf *leaf; struct gfs2_dirent *dent; struct qstr name = { .name = "" }; + struct timespec tv = CURRENT_TIME; error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); if (error) @@ -850,7 +852,11 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, leaf->lf_entries = 0; leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); leaf->lf_next = 0; - memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved)); + leaf->lf_inode = cpu_to_be64(ip->i_no_addr); + leaf->lf_dist = cpu_to_be32(1); + leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); + leaf->lf_sec = cpu_to_be64(tv.tv_sec); + memset(leaf->lf_reserved2, 0, sizeof(leaf->lf_reserved2)); dent = (struct gfs2_dirent *)(leaf+1); gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent); *pbh = bh; @@ -1001,7 +1007,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth)); half_len = len >> 1; if (!half_len) { - printk(KERN_WARNING "i_depth %u lf_depth %u index %u\n", dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); + pr_warn("i_depth %u lf_depth %u index %u\n", + dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); gfs2_consist_inode(dip); error = -EIO; goto fail_brelse; @@ -1612,11 +1619,31 @@ out: return ret; } +/** + * dir_new_leaf - Add a new leaf onto hash chain + * @inode: The directory + * @name: The name we are adding + * + * This adds a new dir leaf onto an existing leaf when there is not + * enough space to add a new dir entry. This is a last resort after + * we've expanded the hash table to max size and also split existing + * leaf blocks, so it will only occur for very large directories. + * + * The dist parameter is set to 1 for leaf blocks directly attached + * to the hash table, 2 for one layer of indirection, 3 for two layers + * etc. We are thus able to tell the difference between an old leaf + * with dist set to zero (i.e. "don't know") and a new one where we + * set this information for debug/fsck purposes. + * + * Returns: 0 on success, or -ve on error + */ + static int dir_new_leaf(struct inode *inode, const struct qstr *name) { struct buffer_head *bh, *obh; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_leaf *leaf, *oleaf; + u32 dist = 1; int error; u32 index; u64 bn; @@ -1626,6 +1653,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) if (error) return error; do { + dist++; oleaf = (struct gfs2_leaf *)obh->b_data; bn = be64_to_cpu(oleaf->lf_next); if (!bn) @@ -1643,6 +1671,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) brelse(obh); return -ENOSPC; } + leaf->lf_dist = cpu_to_be32(dist); oleaf->lf_next = cpu_to_be64(bh->b_blocknr); brelse(bh); brelse(obh); @@ -1657,41 +1686,64 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) return 0; } +static u16 gfs2_inode_ra_len(const struct gfs2_inode *ip) +{ + u64 where = ip->i_no_addr + 1; + if (ip->i_eattr == where) + return 1; + return 0; +} + /** * gfs2_dir_add - Add new filename into directory - * @dip: The GFS2 inode - * @filename: The new name - * @inode: The inode number of the entry - * @type: The type of the entry + * @inode: The directory inode + * @name: The new name + * @nip: The GFS2 inode to be linked in to the directory + * @da: The directory addition info + * + * If the call to gfs2_diradd_alloc_required resulted in there being + * no need to allocate any new directory blocks, then it will contain + * a pointer to the directory entry and the bh in which it resides. We + * can use that without having to repeat the search. If there was no + * free space, then we must now create more space. * * Returns: 0 on success, error code on failure */ int gfs2_dir_add(struct inode *inode, const struct qstr *name, - const struct gfs2_inode *nip) + const struct gfs2_inode *nip, struct gfs2_diradd *da) { struct gfs2_inode *ip = GFS2_I(inode); - struct buffer_head *bh; - struct gfs2_dirent *dent; + struct buffer_head *bh = da->bh; + struct gfs2_dirent *dent = da->dent; + struct timespec tv; struct gfs2_leaf *leaf; int error; while(1) { - dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, - &bh); + if (da->bh == NULL) { + dent = gfs2_dirent_search(inode, name, + gfs2_dirent_find_space, &bh); + } if (dent) { if (IS_ERR(dent)) return PTR_ERR(dent); dent = gfs2_init_dirent(inode, dent, name, bh); gfs2_inum_out(nip, dent); dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); + dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip)); + tv = CURRENT_TIME; if (ip->i_diskflags & GFS2_DIF_EXHASH) { leaf = (struct gfs2_leaf *)bh->b_data; be16_add_cpu(&leaf->lf_entries, 1); + leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); + leaf->lf_sec = cpu_to_be64(tv.tv_sec); } + da->dent = NULL; + da->bh = NULL; brelse(bh); ip->i_entries++; - ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = tv; if (S_ISDIR(nip->i_inode.i_mode)) inc_nlink(&ip->i_inode); mark_inode_dirty(inode); @@ -1742,6 +1794,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) const struct qstr *name = &dentry->d_name; struct gfs2_dirent *dent, *prev = NULL; struct buffer_head *bh; + struct timespec tv = CURRENT_TIME; /* Returns _either_ the entry (if its first in block) or the previous entry otherwise */ @@ -1767,13 +1820,15 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) if (!entries) gfs2_consist_inode(dip); leaf->lf_entries = cpu_to_be16(--entries); + leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); + leaf->lf_sec = cpu_to_be64(tv.tv_sec); } brelse(bh); if (!dip->i_entries) gfs2_consist_inode(dip); dip->i_entries--; - dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; + dip->i_inode.i_mtime = dip->i_inode.i_ctime = tv; if (S_ISDIR(dentry->d_inode->i_mode)) drop_nlink(&dip->i_inode); mark_inode_dirty(&dip->i_inode); @@ -1859,7 +1914,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); - ht = kzalloc(size, GFP_NOFS); + ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN); if (ht == NULL) ht = vzalloc(size); if (!ht) @@ -2017,22 +2072,36 @@ out: * gfs2_diradd_alloc_required - find if adding entry will require an allocation * @ip: the file being written to * @filname: the filename that's going to be added + * @da: The structure to return dir alloc info * - * Returns: 1 if alloc required, 0 if not, -ve on error + * Returns: 0 if ok, -ve on error */ -int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name) +int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name, + struct gfs2_diradd *da) { + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + const unsigned int extra = sizeof(struct gfs2_dinode) - sizeof(struct gfs2_leaf); struct gfs2_dirent *dent; struct buffer_head *bh; + da->nr_blocks = 0; + da->bh = NULL; + da->dent = NULL; + dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh); if (!dent) { - return 1; + da->nr_blocks = sdp->sd_max_dirres; + if (!(ip->i_diskflags & GFS2_DIF_EXHASH) && + (GFS2_DIRENT_SIZE(name->len) < extra)) + da->nr_blocks = 1; + return 0; } if (IS_ERR(dent)) return PTR_ERR(dent); - brelse(bh); + da->bh = bh; + da->dent = dent; return 0; } diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 4f03bbd1873..126c65dda02 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -16,6 +16,14 @@ struct inode; struct gfs2_inode; struct gfs2_inum; +struct buffer_head; +struct gfs2_dirent; + +struct gfs2_diradd { + unsigned nr_blocks; + struct gfs2_dirent *dent; + struct buffer_head *bh; +}; extern struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename, @@ -23,7 +31,13 @@ extern struct inode *gfs2_dir_search(struct inode *dir, extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, const struct gfs2_inode *ip); extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, - const struct gfs2_inode *ip); + const struct gfs2_inode *ip, struct gfs2_diradd *da); +static inline void gfs2_dir_no_add(struct gfs2_diradd *da) +{ + if (da->bh) + brelse(da->bh); + da->bh = NULL; +} extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, struct file_ra_state *f_ra); @@ -33,7 +47,8 @@ extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); extern int gfs2_diradd_alloc_required(struct inode *dir, - const struct qstr *filename); + const struct qstr *filename, + struct gfs2_diradd *da); extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head **bhp); extern void gfs2_dir_hash_inval(struct gfs2_inode *ip); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 72c3866a732..26b3f952e6b 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -203,9 +203,9 @@ void gfs2_set_inode_flags(struct inode *inode) GFS2_DIF_INHERIT_JDATA) /** - * gfs2_set_flags - set flags on an inode - * @inode: The inode - * @flags: The flags to set + * do_gfs2_set_flags - set flags on an inode + * @filp: file pointer + * @reqflags: The flags to set * @mask: Indicates which flags are valid * */ @@ -256,7 +256,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) } if ((flags ^ new_flags) & GFS2_DIF_JDATA) { if (flags & GFS2_DIF_JDATA) - gfs2_log_flush(sdp, ip->i_gl); + gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); error = filemap_fdatawrite(inode->i_mapping); if (error) goto out; @@ -318,7 +318,7 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /** * gfs2_size_hint - Give a hint to the size of a write request - * @file: The struct file + * @filep: The struct file * @offset: The file offset of the write * @size: The length of the write * @@ -371,7 +371,7 @@ static int gfs2_allocate_page_backing(struct page *page) /** * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable * @vma: The virtual memory area - * @page: The page which is about to become writable + * @vmf: The virtual memory fault containing the page to become writable * * When the page becomes writable, we need to ensure that we have * blocks allocated on disk to back that page. @@ -383,6 +383,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct inode *inode = file_inode(vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned long last_index; u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; @@ -430,7 +431,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); - ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); + ap.target = data_blocks + ind_blocks; + ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_quota_unlock; @@ -492,6 +494,7 @@ out: static const struct vm_operations_struct gfs2_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = gfs2_page_mkwrite, .remap_pages = generic_file_remap_pages, }; @@ -620,7 +623,7 @@ static int gfs2_release(struct inode *inode, struct file *file) if (!(file->f_mode & FMODE_WRITE)) return 0; - gfs2_rs_delete(ip); + gfs2_rs_delete(ip, &inode->i_writecount); return 0; } @@ -650,7 +653,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); + int sync_state = inode->i_state & I_DIRTY; struct gfs2_inode *ip = GFS2_I(inode); int ret = 0, ret1 = 0; @@ -660,6 +663,8 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, return ret1; } + if (!gfs2_is_jdata(ip)) + sync_state &= ~I_DIRTY_PAGES; if (datasync) sync_state &= ~I_DIRTY_SYNC; @@ -679,7 +684,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, } /** - * gfs2_file_aio_write - Perform a write to a file + * gfs2_file_write_iter - Perform a write to a file * @iocb: The io context * @iov: The data to write * @nr_segs: Number of @iov segments @@ -692,11 +697,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, * */ -static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - size_t writesize = iov_length(iov, nr_segs); struct gfs2_inode *ip = GFS2_I(file_inode(file)); int ret; @@ -704,7 +707,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (ret) return ret; - gfs2_size_hint(file, pos, writesize); + gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); if (file->f_flags & O_APPEND) { struct gfs2_holder gh; @@ -715,7 +718,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, gfs2_glock_dq_uninit(&gh); } - return generic_file_aio_write(iocb, iov, nr_segs, pos); + return generic_file_write_iter(iocb, from); } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, @@ -798,6 +801,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes; int error; @@ -806,6 +810,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; loff_t max_chunk_size = UINT_MAX & bsize_mask; + struct gfs2_holder gh; + next = (next + 1) << sdp->sd_sb.sb_bsize_shift; /* We only support the FALLOC_FL_KEEP_SIZE mode */ @@ -826,8 +832,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (error) return error; - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); - error = gfs2_glock_nq(&ip->i_gh); + mutex_lock(&inode->i_mutex); + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + error = gfs2_glock_nq(&gh); if (unlikely(error)) goto out_uninit; @@ -848,7 +856,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, retry: gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); - error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); + ap.target = data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip, &ap); if (error) { if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { bytes >>= 1; @@ -894,9 +903,10 @@ out_trans_fail: out_qunlock: gfs2_quota_unlock(ip); out_unlock: - gfs2_glock_dq(&ip->i_gh); + gfs2_glock_dq(&gh); out_uninit: - gfs2_holder_uninit(&ip->i_gh); + gfs2_holder_uninit(&gh); + mutex_unlock(&inode->i_mutex); return error; } @@ -971,7 +981,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) int error = 0; state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; - flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; + flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT; mutex_lock(&fp->f_fl_mutex); @@ -981,7 +991,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) goto out; flock_lock_file_wait(file, &(struct file_lock){.fl_type = F_UNLCK}); - gfs2_glock_dq_wait(fl_gh); + gfs2_glock_dq(fl_gh); gfs2_holder_reinit(state, flags, fl_gh); } else { error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, @@ -1046,10 +1056,10 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) const struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = gfs2_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, .open = gfs2_open, @@ -1058,7 +1068,7 @@ const struct file_operations gfs2_file_fops = { .lock = gfs2_lock, .flock = gfs2_flock, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .setlease = gfs2_setlease, .fallocate = gfs2_fallocate, }; @@ -1078,17 +1088,17 @@ const struct file_operations gfs2_dir_fops = { const struct file_operations gfs2_file_fops_nolock = { .llseek = gfs2_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = gfs2_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, .open = gfs2_open, .release = gfs2_release, .fsync = gfs2_fsync, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .setlease = generic_setlease, .fallocate = gfs2_fallocate, }; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9435384562a..ee4e04fe60f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -31,6 +33,7 @@ #include <linux/bit_spinlock.h> #include <linux/percpu.h> #include <linux/list_sort.h> +#include <linux/lockref.h> #include "gfs2.h" #include "incore.h" @@ -129,10 +132,10 @@ void gfs2_glock_free(struct gfs2_glock *gl) * */ -void gfs2_glock_hold(struct gfs2_glock *gl) +static void gfs2_glock_hold(struct gfs2_glock *gl) { - GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0); - atomic_inc(&gl->gl_ref); + GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); + lockref_get(&gl->gl_lockref); } /** @@ -187,20 +190,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) } /** - * gfs2_glock_put_nolock() - Decrement reference count on glock - * @gl: The glock to put - * - * This function should only be used if the caller has its own reference - * to the glock, in addition to the one it is dropping. - */ - -void gfs2_glock_put_nolock(struct gfs2_glock *gl) -{ - if (atomic_dec_and_test(&gl->gl_ref)) - GLOCK_BUG_ON(gl, 1); -} - -/** * gfs2_glock_put() - Decrement reference count on glock * @gl: The glock to put * @@ -211,17 +200,22 @@ void gfs2_glock_put(struct gfs2_glock *gl) struct gfs2_sbd *sdp = gl->gl_sbd; struct address_space *mapping = gfs2_glock2aspace(gl); - if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { - __gfs2_glock_remove_from_lru(gl); - spin_unlock(&lru_lock); - spin_lock_bucket(gl->gl_hash); - hlist_bl_del_rcu(&gl->gl_list); - spin_unlock_bucket(gl->gl_hash); - GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); - GLOCK_BUG_ON(gl, mapping && mapping->nrpages); - trace_gfs2_glock_put(gl); - sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); - } + if (lockref_put_or_lock(&gl->gl_lockref)) + return; + + lockref_mark_dead(&gl->gl_lockref); + + spin_lock(&lru_lock); + __gfs2_glock_remove_from_lru(gl); + spin_unlock(&lru_lock); + spin_unlock(&gl->gl_lockref.lock); + spin_lock_bucket(gl->gl_hash); + hlist_bl_del_rcu(&gl->gl_list); + spin_unlock_bucket(gl->gl_hash); + GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); + GLOCK_BUG_ON(gl, mapping && mapping->nrpages); + trace_gfs2_glock_put(gl); + sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); } /** @@ -244,7 +238,7 @@ static struct gfs2_glock *search_bucket(unsigned int hash, continue; if (gl->gl_sbd != sdp) continue; - if (atomic_inc_not_zero(&gl->gl_ref)) + if (lockref_get_not_dead(&gl->gl_lockref)) return gl; } @@ -283,7 +277,7 @@ static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holde static void gfs2_holder_wake(struct gfs2_holder *gh) { clear_bit(HIF_WAIT, &gh->gh_iflags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&gh->gh_iflags, HIF_WAIT); } @@ -396,10 +390,11 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) held2 = (new_state != LM_ST_UNLOCKED); if (held1 != held2) { + GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); if (held2) - gfs2_glock_hold(gl); + gl->gl_lockref.count++; else - gfs2_glock_put_nolock(gl); + gl->gl_lockref.count--; } if (held1 && held2 && list_empty(&gl->gl_holders)) clear_bit(GLF_QUEUED, &gl->gl_flags); @@ -416,7 +411,7 @@ static void gfs2_demote_wake(struct gfs2_glock *gl) { gl->gl_demote_state = LM_ST_EXCLUSIVE; clear_bit(GLF_DEMOTE, &gl->gl_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&gl->gl_flags, GLF_DEMOTE); } @@ -475,7 +470,7 @@ retry: do_xmote(gl, gh, LM_ST_UNLOCKED); break; default: /* Everything else */ - printk(KERN_ERR "GFS2: wanted %u got %u\n", gl->gl_target, state); + pr_err("wanted %u got %u\n", gl->gl_target, state); GLOCK_BUG_ON(gl, 1); } spin_unlock(&gl->gl_spin); @@ -549,7 +544,7 @@ __acquires(&gl->gl_spin) /* lock_dlm */ ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); if (ret) { - printk(KERN_ERR "GFS2: lm_lock ret %d\n", ret); + pr_err("lm_lock ret %d\n", ret); GLOCK_BUG_ON(gl, 1); } } else { /* lock_nolock */ @@ -625,15 +620,15 @@ out: out_sched: clear_bit(GLF_LOCK, &gl->gl_flags); - smp_mb__after_clear_bit(); - gfs2_glock_hold(gl); + smp_mb__after_atomic(); + gl->gl_lockref.count++; if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) - gfs2_glock_put_nolock(gl); + gl->gl_lockref.count--; return; out_unlock: clear_bit(GLF_LOCK, &gl->gl_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); return; } @@ -736,14 +731,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, cachep = gfs2_glock_aspace_cachep; else cachep = gfs2_glock_cachep; - gl = kmem_cache_alloc(cachep, GFP_KERNEL); + gl = kmem_cache_alloc(cachep, GFP_NOFS); if (!gl) return -ENOMEM; memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); if (glops->go_flags & GLOF_LVB) { - gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL); + gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_NOFS); if (!gl->gl_lksb.sb_lvbptr) { kmem_cache_free(cachep, gl); return -ENOMEM; @@ -754,7 +749,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_sbd = sdp; gl->gl_flags = 0; gl->gl_name = name; - atomic_set(&gl->gl_ref, 1); + gl->gl_lockref.count = 1; gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; @@ -942,7 +937,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - printk(KERN_ERR " %pV", &vaf); + pr_err("%pV", &vaf); } va_end(args); @@ -1017,13 +1012,13 @@ do_cancel: return; trap_recursive: - printk(KERN_ERR "original: %pSR\n", (void *)gh2->gh_ip); - printk(KERN_ERR "pid: %d\n", pid_nr(gh2->gh_owner_pid)); - printk(KERN_ERR "lock type: %d req lock state : %d\n", + pr_err("original: %pSR\n", (void *)gh2->gh_ip); + pr_err("pid: %d\n", pid_nr(gh2->gh_owner_pid)); + pr_err("lock type: %d req lock state : %d\n", gh2->gh_gl->gl_name.ln_type, gh2->gh_state); - printk(KERN_ERR "new: %pSR\n", (void *)gh->gh_ip); - printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); - printk(KERN_ERR "lock type: %d req lock state : %d\n", + pr_err("new: %pSR\n", (void *)gh->gh_ip); + pr_err("pid: %d\n", pid_nr(gh->gh_owner_pid)); + pr_err("lock type: %d req lock state : %d\n", gh->gh_gl->gl_name.ln_type, gh->gh_state); gfs2_dump_glock(NULL, gl); BUG(); @@ -1052,9 +1047,13 @@ int gfs2_glock_nq(struct gfs2_holder *gh) spin_lock(&gl->gl_spin); add_to_queue(gh); - if ((LM_FLAG_NOEXP & gh->gh_flags) && - test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) + if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && + test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + gl->gl_lockref.count++; + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gl->gl_lockref.count--; + } run_queue(gl, 1); spin_unlock(&gl->gl_spin); @@ -1356,10 +1355,10 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) } } - spin_unlock(&gl->gl_spin); + gl->gl_lockref.count++; set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - smp_wmb(); - gfs2_glock_hold(gl); + spin_unlock(&gl->gl_spin); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); } @@ -1404,18 +1403,25 @@ __acquires(&lru_lock) while(!list_empty(list)) { gl = list_entry(list->next, struct gfs2_glock, gl_lru); list_del_init(&gl->gl_lru); + if (!spin_trylock(&gl->gl_spin)) { +add_back_to_lru: + list_add(&gl->gl_lru, &lru_list); + atomic_inc(&lru_count); + continue; + } + if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { + spin_unlock(&gl->gl_spin); + goto add_back_to_lru; + } clear_bit(GLF_LRU, &gl->gl_flags); - gfs2_glock_hold(gl); - spin_unlock(&lru_lock); - spin_lock(&gl->gl_spin); + gl->gl_lockref.count++; if (demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED, 0, false); WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); - smp_mb__after_clear_bit(); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) - gfs2_glock_put_nolock(gl); + gl->gl_lockref.count--; spin_unlock(&gl->gl_spin); - spin_lock(&lru_lock); + cond_resched_lock(&lru_lock); } } @@ -1428,21 +1434,22 @@ __acquires(&lru_lock) * gfs2_dispose_glock_lru() above. */ -static void gfs2_scan_glock_lru(int nr) +static long gfs2_scan_glock_lru(int nr) { struct gfs2_glock *gl; LIST_HEAD(skipped); LIST_HEAD(dispose); + long freed = 0; spin_lock(&lru_lock); - while(nr && !list_empty(&lru_list)) { + while ((nr-- >= 0) && !list_empty(&lru_list)) { gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); /* Test for being demotable */ - if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { + if (!test_bit(GLF_LOCK, &gl->gl_flags)) { list_move(&gl->gl_lru, &dispose); atomic_dec(&lru_count); - nr--; + freed++; continue; } @@ -1452,23 +1459,28 @@ static void gfs2_scan_glock_lru(int nr) if (!list_empty(&dispose)) gfs2_dispose_glock_lru(&dispose); spin_unlock(&lru_lock); + + return freed; } -static int gfs2_shrink_glock_memory(struct shrinker *shrink, - struct shrink_control *sc) +static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) { - if (sc->nr_to_scan) { - if (!(sc->gfp_mask & __GFP_FS)) - return -1; - gfs2_scan_glock_lru(sc->nr_to_scan); - } + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; + return gfs2_scan_glock_lru(sc->nr_to_scan); +} - return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure; +static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + return vfs_pressure_ratio(atomic_read(&lru_count)); } static struct shrinker glock_shrinker = { - .shrink = gfs2_shrink_glock_memory, .seeks = DEFAULT_SEEKS, + .count_objects = gfs2_glock_shrink_count, + .scan_objects = gfs2_glock_shrink_scan, }; /** @@ -1488,7 +1500,7 @@ static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, rcu_read_lock(); hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { - if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref)) + if ((gl->gl_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) examiner(gl); } rcu_read_unlock(); @@ -1508,18 +1520,17 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) * thaw_glock - thaw out a glock which has an unprocessed reply waiting * @gl: The glock to thaw * - * N.B. When we freeze a glock, we leave a ref to the glock outstanding, - * so this has to result in the ref count being dropped by one. */ static void thaw_glock(struct gfs2_glock *gl) { if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) - return; + goto out; set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - gfs2_glock_hold(gl); - if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) { +out: gfs2_glock_put(gl); + } } /** @@ -1536,7 +1547,6 @@ static void clear_glock(struct gfs2_glock *gl) if (gl->gl_state != LM_ST_UNLOCKED) handle_callback(gl, LM_ST_UNLOCKED, 0, false); spin_unlock(&gl->gl_spin); - gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); } @@ -1552,13 +1562,11 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp) glock_hash_walk(thaw_glock, sdp); } -static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) +static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl) { - int ret; spin_lock(&gl->gl_spin); - ret = gfs2_dump_glock(seq, gl); + gfs2_dump_glock(seq, gl); spin_unlock(&gl->gl_spin); - return ret; } static void dump_glock_func(struct gfs2_glock *gl) @@ -1647,14 +1655,14 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) * @seq: the seq_file struct * @gh: the glock holder * - * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) +static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) { struct task_struct *gh_owner = NULL; char flags_buf[32]; + rcu_read_lock(); if (gh->gh_owner_pid) gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", @@ -1664,7 +1672,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, gh_owner ? gh_owner->comm : "(ended)", (void *)gh->gh_ip); - return 0; + rcu_read_unlock(); } static const char *gflags2str(char *buf, const struct gfs2_glock *gl) @@ -1719,16 +1727,14 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) * example. The field's are n = number (id of the object), f = flags, * t = type, s = state, r = refcount, e = error, p = pid. * - * Returns: 0 on success, -ENOBUFS when we run out of space */ -int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) +void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) { const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned long long dtime; const struct gfs2_holder *gh; char gflags_buf[32]; - int error = 0; dtime = jiffies - gl->gl_demote_time; dtime *= 1000000/HZ; /* demote time in uSec */ @@ -1743,17 +1749,13 @@ int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), atomic_read(&gl->gl_revokes), - atomic_read(&gl->gl_ref), gl->gl_hold_time); + (int)gl->gl_lockref.count, gl->gl_hold_time); + + list_for_each_entry(gh, &gl->gl_holders, gh_list) + dump_holder(seq, gh); - list_for_each_entry(gh, &gl->gl_holders, gh_list) { - error = dump_holder(seq, gh); - if (error) - goto out; - } if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) - error = glops->go_dump(seq, gl); -out: - return error; + glops->go_dump(seq, gl); } static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) @@ -1838,14 +1840,14 @@ int __init gfs2_glock_init(void) glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0); - if (IS_ERR(glock_workqueue)) - return PTR_ERR(glock_workqueue); + if (!glock_workqueue) + return -ENOMEM; gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); - if (IS_ERR(gfs2_delete_workqueue)) { + if (!gfs2_delete_workqueue) { destroy_workqueue(glock_workqueue); - return PTR_ERR(gfs2_delete_workqueue); + return -ENOMEM; } register_shrinker(&glock_shrinker); @@ -1899,7 +1901,8 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) gi->nhash = 0; } /* Skip entries for other sb and dead entries */ - } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0); + } while (gi->sdp != gi->gl->gl_sbd || + __lockref_is_dead(&gi->gl->gl_lockref)); return 0; } @@ -1950,7 +1953,8 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) { - return dump_glock(seq, iter_ptr); + dump_glock(seq, iter_ptr); + return 0; } static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 69f66e3d22b..32572f71f02 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -181,8 +181,6 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); -extern void gfs2_glock_hold(struct gfs2_glock *gl); -extern void gfs2_glock_put_nolock(struct gfs2_glock *gl); extern void gfs2_glock_put(struct gfs2_glock *gl); extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, struct gfs2_holder *gh); @@ -201,7 +199,7 @@ extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, struct gfs2_holder *gh); extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); -extern int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); +extern void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0) extern __printf(2, 3) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 5f2e5224c51..2ffc67dce87 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -47,7 +47,8 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) * None of the buffers should be dirty, locked, or pinned. */ -static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) +static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, + unsigned int nr_revokes) { struct gfs2_sbd *sdp = gl->gl_sbd; struct list_head *head = &gl->gl_ail_list; @@ -57,7 +58,9 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) gfs2_log_lock(sdp); spin_lock(&sdp->sd_ail_lock); - list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) { + list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) { + if (nr_revokes == 0) + break; bh = bd->bd_bh; if (bh->b_state & b_state) { if (fsync) @@ -65,6 +68,7 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) gfs2_ail_error(gl, bh); } gfs2_trans_add_revoke(sdp, bd); + nr_revokes--; } GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); spin_unlock(&sdp->sd_ail_lock); @@ -78,40 +82,51 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) struct gfs2_trans tr; memset(&tr, 0, sizeof(tr)); + INIT_LIST_HEAD(&tr.tr_buf); + INIT_LIST_HEAD(&tr.tr_databuf); tr.tr_revokes = atomic_read(&gl->gl_ail_count); if (!tr.tr_revokes) return; - /* A shortened, inline version of gfs2_trans_begin() */ + /* A shortened, inline version of gfs2_trans_begin() + * tr->alloced is not set since the transaction structure is + * on the stack */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); tr.tr_ip = (unsigned long)__builtin_return_address(0); sb_start_intwrite(sdp->sd_vfs); - gfs2_log_reserve(sdp, tr.tr_reserved); + if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) { + sb_end_intwrite(sdp->sd_vfs); + return; + } WARN_ON_ONCE(current->journal_info); current->journal_info = &tr; - __gfs2_ail_flush(gl, 0); + __gfs2_ail_flush(gl, 0, tr.tr_revokes); gfs2_trans_end(sdp); - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) { struct gfs2_sbd *sdp = gl->gl_sbd; unsigned int revokes = atomic_read(&gl->gl_ail_count); + unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); int ret; if (!revokes) return; - ret = gfs2_trans_begin(sdp, 0, revokes); + while (revokes > max_revokes) + max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); + + ret = gfs2_trans_begin(sdp, 0, max_revokes); if (ret) return; - __gfs2_ail_flush(gl, fsync); + __gfs2_ail_flush(gl, fsync, max_revokes); gfs2_trans_end(sdp); - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } /** @@ -125,7 +140,8 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) static void rgrp_go_sync(struct gfs2_glock *gl) { - struct address_space *metamapping = gfs2_glock2aspace(gl); + struct gfs2_sbd *sdp = gl->gl_sbd; + struct address_space *mapping = &sdp->sd_aspace; struct gfs2_rgrpd *rgd; int error; @@ -133,10 +149,10 @@ static void rgrp_go_sync(struct gfs2_glock *gl) return; GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); - gfs2_log_flush(gl->gl_sbd, gl); - filemap_fdatawrite(metamapping); - error = filemap_fdatawait(metamapping); - mapping_set_error(metamapping, error); + gfs2_log_flush(sdp, gl, NORMAL_FLUSH); + filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end); + error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end); + mapping_set_error(mapping, error); gfs2_ail_empty_gl(gl); spin_lock(&gl->gl_spin); @@ -158,11 +174,12 @@ static void rgrp_go_sync(struct gfs2_glock *gl) static void rgrp_go_inval(struct gfs2_glock *gl, int flags) { - struct address_space *mapping = gfs2_glock2aspace(gl); + struct gfs2_sbd *sdp = gl->gl_sbd; + struct address_space *mapping = &sdp->sd_aspace; WARN_ON_ONCE(!(flags & DIO_METADATA)); - gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); - truncate_inode_pages(mapping, 0); + gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); + truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end); if (gl->gl_object) { struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; @@ -184,14 +201,17 @@ static void inode_go_sync(struct gfs2_glock *gl) if (ip && !S_ISREG(ip->i_inode.i_mode)) ip = NULL; - if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) - unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); + if (ip) { + if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) + unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); + inode_dio_wait(&ip->i_inode); + } if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) return; GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); - gfs2_log_flush(gl->gl_sbd, gl); + gfs2_log_flush(gl->gl_sbd, gl, NORMAL_FLUSH); filemap_fdatawrite(metamapping); if (ip) { struct address_space *mapping = ip->i_inode.i_mapping; @@ -206,7 +226,7 @@ static void inode_go_sync(struct gfs2_glock *gl) * Writeback of the data mapping may cause the dirty flag to be set * so we have to clear it again here. */ - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(GLF_DIRTY, &gl->gl_flags); } @@ -214,8 +234,8 @@ static void inode_go_sync(struct gfs2_glock *gl) * inode_go_inval - prepare a inode glock to be released * @gl: the glock * @flags: - * - * Normally we invlidate everything, but if we are moving into + * + * Normally we invalidate everything, but if we are moving into * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we * can keep hold of the metadata, since it won't have changed. * @@ -238,7 +258,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) } if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) { - gfs2_log_flush(gl->gl_sbd, NULL); + gfs2_log_flush(gl->gl_sbd, NULL, NORMAL_FLUSH); gl->gl_sbd->sd_rindex_uptodate = 0; } if (ip && S_ISREG(ip->i_inode.i_mode)) @@ -402,6 +422,9 @@ static int inode_go_lock(struct gfs2_holder *gh) return error; } + if (gh->gh_state != LM_ST_DEFERRED) + inode_dio_wait(&ip->i_inode); + if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && (gl->gl_state == LM_ST_EXCLUSIVE) && (gh->gh_state == LM_ST_EXCLUSIVE)) { @@ -421,49 +444,55 @@ static int inode_go_lock(struct gfs2_holder *gh) * @seq: The iterator * @ip: the inode * - * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) +static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) { const struct gfs2_inode *ip = gl->gl_object; if (ip == NULL) - return 0; + return; gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", (unsigned long long)ip->i_no_formal_ino, (unsigned long long)ip->i_no_addr, IF2DT(ip->i_inode.i_mode), ip->i_flags, (unsigned int)ip->i_diskflags, (unsigned long long)i_size_read(&ip->i_inode)); - return 0; } /** - * trans_go_sync - promote/demote the transaction glock + * freeze_go_sync - promote/demote the freeze glock * @gl: the glock * @state: the requested state * @flags: * */ -static void trans_go_sync(struct gfs2_glock *gl) +static void freeze_go_sync(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; + DEFINE_WAIT(wait); - if (gl->gl_state != LM_ST_UNLOCKED && + if (gl->gl_state == LM_ST_SHARED && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { - gfs2_meta_syncfs(sdp); - gfs2_log_shutdown(sdp); + atomic_set(&sdp->sd_log_freeze, 1); + wake_up(&sdp->sd_logd_waitq); + do { + prepare_to_wait(&sdp->sd_log_frozen_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (atomic_read(&sdp->sd_log_freeze)) + io_schedule(); + } while(atomic_read(&sdp->sd_log_freeze)); + finish_wait(&sdp->sd_log_frozen_wait, &wait); } } /** - * trans_go_xmote_bh - After promoting/demoting the transaction glock + * freeze_go_xmote_bh - After promoting/demoting the freeze glock * @gl: the glock * */ -static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) +static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) { struct gfs2_sbd *sdp = gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); @@ -496,7 +525,7 @@ static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) * Always returns 0 */ -static int trans_go_demote_ok(const struct gfs2_glock *gl) +static int freeze_go_demote_ok(const struct gfs2_glock *gl) { return 0; } @@ -517,9 +546,9 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) if (gl->gl_demote_state == LM_ST_UNLOCKED && gl->gl_state == LM_ST_SHARED && ip) { - gfs2_glock_hold(gl); + gl->gl_lockref.count++; if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) - gfs2_glock_put_nolock(gl); + gl->gl_lockref.count--; } } @@ -544,13 +573,13 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_unlock = gfs2_rgrp_go_unlock, .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, - .go_flags = GLOF_ASPACE | GLOF_LVB, + .go_flags = GLOF_LVB, }; -const struct gfs2_glock_operations gfs2_trans_glops = { - .go_sync = trans_go_sync, - .go_xmote_bh = trans_go_xmote_bh, - .go_demote_ok = trans_go_demote_ok, +const struct gfs2_glock_operations gfs2_freeze_glops = { + .go_sync = freeze_go_sync, + .go_xmote_bh = freeze_go_xmote_bh, + .go_demote_ok = freeze_go_demote_ok, .go_type = LM_TYPE_NONDISK, }; diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h index bf95a2dc166..7455d2629bc 100644 --- a/fs/gfs2/glops.h +++ b/fs/gfs2/glops.h @@ -15,7 +15,7 @@ extern const struct gfs2_glock_operations gfs2_meta_glops; extern const struct gfs2_glock_operations gfs2_inode_glops; extern const struct gfs2_glock_operations gfs2_rgrp_glops; -extern const struct gfs2_glock_operations gfs2_trans_glops; +extern const struct gfs2_glock_operations gfs2_freeze_glops; extern const struct gfs2_glock_operations gfs2_iopen_glops; extern const struct gfs2_glock_operations gfs2_flock_glops; extern const struct gfs2_glock_operations gfs2_nondisk_glops; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 26aabd7caba..67d310c9ada 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -21,6 +21,7 @@ #include <linux/rbtree.h> #include <linux/ktime.h> #include <linux/percpu.h> +#include <linux/lockref.h> #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -51,7 +52,7 @@ struct gfs2_log_header_host { */ struct gfs2_log_operations { - void (*lo_before_commit) (struct gfs2_sbd *sdp); + void (*lo_before_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); void (*lo_before_scan) (struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, int pass); @@ -71,6 +72,7 @@ struct gfs2_bitmap { u32 bi_offset; u32 bi_start; u32 bi_len; + u32 bi_blocks; }; struct gfs2_rgrpd { @@ -91,6 +93,7 @@ struct gfs2_rgrpd { struct gfs2_rgrp_lvb *rd_rgl; u32 rd_last_alloc; u32 rd_flags; + u32 rd_extfail_pt; /* extent failure point */ #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ @@ -101,19 +104,25 @@ struct gfs2_rgrpd { struct gfs2_rbm { struct gfs2_rgrpd *rgd; - struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */ u32 offset; /* The offset is bitmap relative */ + int bii; /* Bitmap index */ }; +static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm) +{ + return rbm->rgd->rd_bits + rbm->bii; +} + static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) { - return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset; + return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) + + rbm->offset; } static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1, const struct gfs2_rbm *rbm2) { - return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) && + return (rbm1->rgd == rbm2->rgd) && (rbm1->bii == rbm2->bii) && (rbm1->offset == rbm2->offset); } @@ -209,7 +218,7 @@ struct gfs2_glock_operations { int (*go_demote_ok) (const struct gfs2_glock *gl); int (*go_lock) (struct gfs2_holder *gh); void (*go_unlock) (struct gfs2_holder *gh); - int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); + void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); void (*go_callback)(struct gfs2_glock *gl, bool remote); const int go_type; const unsigned long go_flags; @@ -278,6 +287,20 @@ struct gfs2_blkreserv { unsigned int rs_qa_qd_num; }; +/* + * Allocation parameters + * @target: The number of blocks we'd ideally like to allocate + * @aflags: The flags (e.g. Orlov flag) + * + * The intent is to gradually expand this structure over time in + * order to give more information, e.g. alignment, min extent size + * to the allocation code. + */ +struct gfs2_alloc_parms { + u32 target; + u32 aflags; +}; + enum { GLF_LOCK = 1, GLF_DEMOTE = 3, @@ -300,9 +323,9 @@ struct gfs2_glock { struct gfs2_sbd *gl_sbd; unsigned long gl_flags; /* GLF_... */ struct lm_lockname gl_name; - atomic_t gl_ref; - spinlock_t gl_spin; + struct lockref gl_lockref; +#define gl_spin gl_lockref.lock /* State fields protected by gl_spin */ unsigned int gl_state:2, /* Current state */ @@ -328,7 +351,15 @@ struct gfs2_glock { atomic_t gl_ail_count; atomic_t gl_revokes; struct delayed_work gl_work; - struct work_struct gl_delete; + union { + /* For inode and iopen glocks only */ + struct work_struct gl_delete; + /* For rgrp glocks only */ + struct { + loff_t start; + loff_t end; + } gl_vm; + }; struct rcu_head gl_rcu; }; @@ -340,6 +371,7 @@ enum { GIF_ALLOC_FAILED = 2, GIF_SW_PAGED = 3, GIF_ORDERED = 4, + GIF_FREE_VFS_INODE = 5, }; struct gfs2_inode { @@ -397,12 +429,14 @@ enum { }; struct gfs2_quota_data { + struct hlist_bl_node qd_hlist; struct list_head qd_list; - struct list_head qd_reclaim; - - atomic_t qd_count; - struct kqid qd_id; + struct gfs2_sbd *qd_sbd; + struct lockref qd_lockref; + struct list_head qd_lru; + unsigned qd_hash; + unsigned long qd_flags; /* QDF_... */ s64 qd_change; @@ -420,6 +454,7 @@ struct gfs2_quota_data { u64 qd_sync_gen; unsigned long qd_last_warn; + struct rcu_head qd_rcu; }; struct gfs2_trans { @@ -428,11 +463,9 @@ struct gfs2_trans { unsigned int tr_blocks; unsigned int tr_revokes; unsigned int tr_reserved; - - struct gfs2_holder tr_t_gh; - - int tr_touched; - int tr_attached; + unsigned int tr_touched:1; + unsigned int tr_attached:1; + unsigned int tr_alloced:1; unsigned int tr_num_buf_new; unsigned int tr_num_databuf_new; @@ -442,6 +475,8 @@ struct gfs2_trans { unsigned int tr_num_revoke_rm; struct list_head tr_list; + struct list_head tr_databuf; + struct list_head tr_buf; unsigned int tr_first; struct list_head tr_ail1_list; @@ -449,7 +484,7 @@ struct gfs2_trans { }; struct gfs2_journal_extent { - struct list_head extent_list; + struct list_head list; unsigned int lblock; /* First logical block */ u64 dblock; /* First disk block */ @@ -459,6 +494,7 @@ struct gfs2_journal_extent { struct gfs2_jdesc { struct list_head jd_list; struct list_head extent_list; + unsigned int nr_extents; struct work_struct jd_work; struct inode *jd_inode; unsigned long jd_flags; @@ -466,6 +502,15 @@ struct gfs2_jdesc { unsigned int jd_jid; unsigned int jd_blocks; int jd_recover_error; + /* Replay stuff */ + + unsigned int jd_found_blocks; + unsigned int jd_found_revokes; + unsigned int jd_replayed_blocks; + + struct list_head jd_revoke_list; + unsigned int jd_replay_tail; + }; struct gfs2_statfs_change_host { @@ -516,7 +561,6 @@ struct gfs2_tune { unsigned int gt_logd_secs; - unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */ unsigned int gt_quota_scale_num; /* Numerator */ unsigned int gt_quota_scale_den; /* Denominator */ @@ -636,7 +680,7 @@ struct gfs2_sbd { struct lm_lockstruct sd_lockstruct; struct gfs2_holder sd_live_gh; struct gfs2_glock *sd_rename_gl; - struct gfs2_glock *sd_trans_gl; + struct gfs2_glock *sd_freeze_gl; wait_queue_head_t sd_glock_wait; atomic_t sd_glock_disposal; struct completion sd_locking_init; @@ -684,6 +728,8 @@ struct gfs2_sbd { struct gfs2_holder sd_sc_gh; struct gfs2_holder sd_qc_gh; + struct completion sd_journal_ready; + /* Daemon stuff */ struct task_struct *sd_logd_process; @@ -694,35 +740,31 @@ struct gfs2_sbd { struct list_head sd_quota_list; atomic_t sd_quota_count; struct mutex sd_quota_mutex; + struct mutex sd_quota_sync_mutex; wait_queue_head_t sd_quota_wait; struct list_head sd_trunc_list; spinlock_t sd_trunc_lock; unsigned int sd_quota_slots; - unsigned int sd_quota_chunks; - unsigned char **sd_quota_bitmap; + unsigned long *sd_quota_bitmap; + spinlock_t sd_bitmap_lock; u64 sd_quota_sync_gen; /* Log stuff */ + struct address_space sd_aspace; + spinlock_t sd_log_lock; struct gfs2_trans *sd_log_tr; unsigned int sd_log_blks_reserved; - unsigned int sd_log_commited_buf; - unsigned int sd_log_commited_databuf; int sd_log_commited_revoke; atomic_t sd_log_pinned; - unsigned int sd_log_num_buf; unsigned int sd_log_num_revoke; - unsigned int sd_log_num_rg; - unsigned int sd_log_num_databuf; - struct list_head sd_log_le_buf; struct list_head sd_log_le_revoke; - struct list_head sd_log_le_databuf; struct list_head sd_log_le_ordered; spinlock_t sd_ordered_lock; @@ -750,17 +792,14 @@ struct gfs2_sbd { struct list_head sd_ail1_list; struct list_head sd_ail2_list; - /* Replay stuff */ - - struct list_head sd_revoke_list; - unsigned int sd_replay_tail; - - unsigned int sd_found_blocks; - unsigned int sd_found_revokes; - unsigned int sd_replayed_blocks; - /* For quiescing the filesystem */ struct gfs2_holder sd_freeze_gh; + struct gfs2_holder sd_freeze_root_gh; + struct gfs2_holder sd_thaw_gh; + atomic_t sd_log_freeze; + atomic_t sd_frozen_root; + wait_queue_head_t sd_frozen_root_wait; + wait_queue_head_t sd_log_frozen_wait; char sd_fsname[GFS2_FSNAME_LEN]; char sd_table_name[GFS2_FSNAME_LEN]; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index bbb2715171c..e62e5947788 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -149,7 +149,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, ip = GFS2_I(inode); if (!inode) - return ERR_PTR(-ENOBUFS); + return ERR_PTR(-ENOMEM); if (inode->i_state & I_NEW) { struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -376,25 +376,25 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip, inode->i_gid = current_fsgid(); } -static int alloc_dinode(struct gfs2_inode *ip, u32 flags) +static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, }; int error; - int dblocks = 1; error = gfs2_quota_lock_check(ip); if (error) goto out; - error = gfs2_inplace_reserve(ip, RES_DINODE, flags); + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_quota; - error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 0); + error = gfs2_trans_begin(sdp, (*dblocks * RES_RG_BIT) + RES_STATFS + RES_QUOTA, 0); if (error) goto out_ipreserv; - error = gfs2_alloc_blocks(ip, &ip->i_no_addr, &dblocks, 1, &ip->i_generation); + error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation); ip->i_no_formal_ino = ip->i_generation; ip->i_inode.i_ino = ip->i_no_addr; ip->i_goal = ip->i_no_addr; @@ -427,6 +427,33 @@ static void gfs2_init_dir(struct buffer_head *dibh, } /** + * gfs2_init_xattr - Initialise an xattr block for a new inode + * @ip: The inode in question + * + * This sets up an empty xattr block for a new inode, ready to + * take any ACLs, LSM xattrs, etc. + */ + +static void gfs2_init_xattr(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct buffer_head *bh; + struct gfs2_ea_header *ea; + + bh = gfs2_meta_new(ip->i_gl, ip->i_eattr); + gfs2_trans_add_meta(ip->i_gl, bh); + gfs2_metatype_set(bh, GFS2_METATYPE_EA, GFS2_FORMAT_EA); + gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); + + ea = GFS2_EA_BH2FIRST(bh); + ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize); + ea->ea_type = GFS2_EATYPE_UNUSED; + ea->ea_flags = GFS2_EAFLAG_LAST; + + brelse(bh); +} + +/** * init_dinode - Fill in a new dinode structure * @dip: The directory this inode is being created in * @ip: The inode @@ -468,25 +495,45 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, brelse(dibh); } +/** + * gfs2_trans_da_blocks - Calculate number of blocks to link inode + * @dip: The directory we are linking into + * @da: The dir add information + * @nr_inodes: The number of inodes involved + * + * This calculate the number of blocks we need to reserve in a + * transaction to link @nr_inodes into a directory. In most cases + * @nr_inodes will be 2 (the directory plus the inode being linked in) + * but in case of rename, 4 may be required. + * + * Returns: Number of blocks + */ + +static unsigned gfs2_trans_da_blks(const struct gfs2_inode *dip, + const struct gfs2_diradd *da, + unsigned nr_inodes) +{ + return da->nr_blocks + gfs2_rg_blocks(dip, da->nr_blocks) + + (nr_inodes * RES_DINODE) + RES_QUOTA + RES_STATFS; +} + static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, - struct gfs2_inode *ip, int arq) + struct gfs2_inode *ip, struct gfs2_diradd *da) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); + struct gfs2_alloc_parms ap = { .target = da->nr_blocks, }; int error; - if (arq) { + if (da->nr_blocks) { error = gfs2_quota_lock_check(dip); if (error) goto fail_quota_locks; - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); + error = gfs2_inplace_reserve(dip, &ap); if (error) goto fail_quota_locks; - error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - dip->i_rgd->rd_length + - 2 * RES_DINODE + - RES_STATFS + RES_QUOTA, 0); + error = gfs2_trans_begin(sdp, gfs2_trans_da_blks(dip, da, 2), 0); if (error) goto fail_ipreserv; } else { @@ -495,7 +542,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, goto fail_quota_locks; } - error = gfs2_dir_add(&dip->i_inode, name, ip); + error = gfs2_dir_add(&dip->i_inode, name, ip, da); if (error) goto fail_end_trans; @@ -524,13 +571,6 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, return err; } -static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, - const struct qstr *qstr) -{ - return security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr, - &gfs2_initxattrs, NULL); -} - /** * gfs2_create_inode - Create a new inode * @dir: The parent directory @@ -550,15 +590,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, unsigned int size, int excl, int *opened) { const struct qstr *name = &dentry->d_name; + struct posix_acl *default_acl, *acl; struct gfs2_holder ghs[2]; struct inode *inode = NULL; struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_glock *io_gl; struct dentry *d; - int error; + int error, free_vfs_inode = 0; u32 aflags = 0; - int arq; + unsigned blocks = 1; + struct gfs2_diradd da = { .bh = NULL, }; if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; @@ -583,24 +625,27 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = PTR_ERR(inode); if (!IS_ERR(inode)) { d = d_splice_alias(inode, dentry); + error = PTR_ERR(d); + if (IS_ERR(d)) + goto fail_gunlock; error = 0; - if (file && !IS_ERR(d)) { - if (d == NULL) - d = dentry; - if (S_ISREG(inode->i_mode)) - error = finish_open(file, d, gfs2_open_common, opened); - else + if (file) { + if (S_ISREG(inode->i_mode)) { + WARN_ON(d != NULL); + error = finish_open(file, dentry, gfs2_open_common, opened); + } else { error = finish_no_open(file, d); + } + } else { + dput(d); } gfs2_glock_dq_uninit(ghs); - if (IS_ERR(d)) - return PTR_RET(d); return error; } else if (error != -ENOENT) { goto fail_gunlock; } - arq = error = gfs2_diradd_alloc_required(dir, name); + error = gfs2_diradd_alloc_required(dir, name, &da); if (error < 0) goto fail_gunlock; @@ -609,10 +654,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!inode) goto fail_gunlock; + error = posix_acl_create(dir, &mode, &default_acl, &acl); + if (error) + goto fail_free_vfs_inode; + ip = GFS2_I(inode); error = gfs2_rs_alloc(ip); if (error) - goto fail_free_inode; + goto fail_free_acls; inode->i_mode = mode; set_nlink(inode, S_ISDIR(mode) ? 2 : 1); @@ -647,10 +696,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, (dip->i_diskflags & GFS2_DIF_TOPDIR)) aflags |= GFS2_AF_ORLOV; - error = alloc_dinode(ip, aflags); + if (default_acl || acl) + blocks++; + + error = alloc_dinode(ip, aflags, &blocks); if (error) goto fail_free_inode; + gfs2_set_inode_blocks(inode, blocks); + error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); if (error) goto fail_free_inode; @@ -660,10 +714,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_free_inode; - error = gfs2_trans_begin(sdp, RES_DINODE, 0); + error = gfs2_trans_begin(sdp, blocks, 0); if (error) goto fail_gunlock2; + if (blocks > 1) { + ip->i_eattr = ip->i_no_addr + 1; + gfs2_init_xattr(ip); + } init_dinode(dip, ip, symname); gfs2_trans_end(sdp); @@ -680,22 +738,34 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_set_iop(inode); insert_inode_hash(inode); - error = gfs2_acl_create(dip, inode); + if (default_acl) { + error = gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + posix_acl_release(default_acl); + } + if (acl) { + if (!error) + error = gfs2_set_acl(inode, acl, ACL_TYPE_ACCESS); + posix_acl_release(acl); + } + if (error) goto fail_gunlock3; - error = gfs2_security_init(dip, ip, name); + error = security_inode_init_security(&ip->i_inode, &dip->i_inode, name, + &gfs2_initxattrs, NULL); if (error) goto fail_gunlock3; - error = link_dinode(dip, name, ip, arq); + error = link_dinode(dip, name, ip, &da); if (error) goto fail_gunlock3; mark_inode_dirty(inode); d_instantiate(dentry, inode); - if (file) + if (file) { + *opened |= FILE_CREATED; error = finish_open(file, dentry, gfs2_open_common, opened); + } gfs2_glock_dq_uninit(ghs); gfs2_glock_dq_uninit(ghs + 1); return error; @@ -711,15 +781,23 @@ fail_gunlock2: fail_free_inode: if (ip->i_gl) gfs2_glock_put(ip->i_gl); - gfs2_rs_delete(ip); - free_inode_nonrcu(inode); - inode = NULL; + gfs2_rs_delete(ip, NULL); +fail_free_acls: + if (default_acl) + posix_acl_release(default_acl); + if (acl) + posix_acl_release(acl); +fail_free_vfs_inode: + free_vfs_inode = 1; fail_gunlock: + gfs2_dir_no_add(&da); gfs2_glock_dq_uninit(ghs); if (inode && !IS_ERR(inode)) { clear_nlink(inode); - mark_inode_dirty(inode); - set_bit(GIF_ALLOC_FAILED, &GFS2_I(inode)->i_flags); + if (!free_vfs_inode) + mark_inode_dirty(inode); + set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED, + &GFS2_I(inode)->i_flags); iput(inode); } fail: @@ -775,12 +853,19 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, } d = d_splice_alias(inode, dentry); + if (IS_ERR(d)) { + iput(inode); + gfs2_glock_dq_uninit(&gh); + return d; + } if (file && S_ISREG(inode->i_mode)) error = finish_open(file, dentry, gfs2_open_common, opened); gfs2_glock_dq_uninit(&gh); - if (error) + if (error) { + dput(d); return ERR_PTR(error); + } return d; } @@ -811,7 +896,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder ghs[2]; struct buffer_head *dibh; - int alloc_required; + struct gfs2_diradd da = { .bh = NULL, }; int error; if (S_ISDIR(inode->i_mode)) @@ -866,24 +951,21 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (ip->i_inode.i_nlink == (u32)-1) goto out_gunlock; - alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name); + error = gfs2_diradd_alloc_required(dir, &dentry->d_name, &da); if (error < 0) goto out_gunlock; - error = 0; - if (alloc_required) { + if (da.nr_blocks) { + struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; error = gfs2_quota_lock_check(dip); if (error) goto out_gunlock; - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); + error = gfs2_inplace_reserve(dip, &ap); if (error) goto out_gunlock_q; - error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(dip, sdp->sd_max_dirres) + - 2 * RES_DINODE + RES_STATFS + - RES_QUOTA, 0); + error = gfs2_trans_begin(sdp, gfs2_trans_da_blks(dip, &da, 2), 0); if (error) goto out_ipres; } else { @@ -896,7 +978,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (error) goto out_end_trans; - error = gfs2_dir_add(dir, &dentry->d_name, ip); + error = gfs2_dir_add(dir, &dentry->d_name, ip, &da); if (error) goto out_brelse; @@ -912,12 +994,13 @@ out_brelse: out_end_trans: gfs2_trans_end(sdp); out_ipres: - if (alloc_required) + if (da.nr_blocks) gfs2_inplace_release(dip); out_gunlock_q: - if (alloc_required) + if (da.nr_blocks) gfs2_quota_unlock(dip); out_gunlock: + gfs2_dir_no_add(&da); gfs2_glock_dq(ghs + 1); out_child: gfs2_glock_dq(ghs); @@ -1161,14 +1244,19 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, d = __gfs2_lookup(dir, dentry, file, opened); if (IS_ERR(d)) return PTR_ERR(d); - if (d == NULL) - d = dentry; - if (d->d_inode) { - if (!(*opened & FILE_OPENED)) - return finish_no_open(file, d); + if (d != NULL) + dentry = d; + if (dentry->d_inode) { + if (!(*opened & FILE_OPENED)) { + if (d == NULL) + dget(dentry); + return finish_no_open(file, dentry); + } + dput(d); return 0; } + BUG_ON(d != NULL); if (!(flags & O_CREAT)) return -ENOENT; @@ -1206,6 +1294,10 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) } tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); + if (!tmp) { + error = -ENOENT; + break; + } if (IS_ERR(tmp)) { error = PTR_ERR(tmp); break; @@ -1242,7 +1334,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, struct gfs2_rgrpd *nrgd; unsigned int num_gh; int dir_rename = 0; - int alloc_required = 0; + struct gfs2_diradd da = { .nr_blocks = 0, }; unsigned int x; int error; @@ -1376,25 +1468,24 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock; } - if (nip == NULL) - alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); - error = alloc_required; - if (error < 0) - goto out_gunlock; + if (nip == NULL) { + error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name, &da); + if (error) + goto out_gunlock; + } - if (alloc_required) { + if (da.nr_blocks) { + struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; error = gfs2_quota_lock_check(ndip); if (error) goto out_gunlock; - error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0); + error = gfs2_inplace_reserve(ndip, &ap); if (error) goto out_gunlock_q; - error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(ndip, sdp->sd_max_dirres) + - 4 * RES_DINODE + 4 * RES_LEAF + - RES_STATFS + RES_QUOTA + 4, 0); + error = gfs2_trans_begin(sdp, gfs2_trans_da_blks(ndip, &da, 4) + + 4 * RES_LEAF + 4, 0); if (error) goto out_ipreserv; } else { @@ -1428,19 +1519,20 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_end_trans; - error = gfs2_dir_add(ndir, &ndentry->d_name, ip); + error = gfs2_dir_add(ndir, &ndentry->d_name, ip, &da); if (error) goto out_end_trans; out_end_trans: gfs2_trans_end(sdp); out_ipreserv: - if (alloc_required) + if (da.nr_blocks) gfs2_inplace_release(ndip); out_gunlock_q: - if (alloc_required) + if (da.nr_blocks) gfs2_quota_unlock(ndip); out_gunlock: + gfs2_dir_no_add(&da); while (x--) { gfs2_glock_dq(ghs + x); gfs2_holder_uninit(ghs + x); @@ -1504,13 +1596,6 @@ out: return NULL; } -static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p) -{ - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - kfree(s); -} - /** * gfs2_permission - * @inode: The inode @@ -1528,18 +1613,26 @@ int gfs2_permission(struct inode *inode, int mask) { struct gfs2_inode *ip; struct gfs2_holder i_gh; + struct gfs2_sbd *sdp = GFS2_SB(inode); int error; int unlock = 0; + int frozen_root = 0; ip = GFS2_I(inode); if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); - if (error) - return error; - unlock = 1; + if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) && + inode == sdp->sd_root_dir->d_inode && + atomic_inc_not_zero(&sdp->sd_frozen_root))) + frozen_root = 1; + else { + if (mask & MAY_NOT_BLOCK) + return -ECHILD; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return error; + unlock = 1; + } } if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) @@ -1548,6 +1641,8 @@ int gfs2_permission(struct inode *inode, int mask) error = generic_permission(inode, mask); if (unlock) gfs2_glock_dq_uninit(&i_gh); + else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root)) + wake_up(&sdp->sd_frozen_root_wait); return error; } @@ -1601,10 +1696,22 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid)) ogid = ngid = NO_GID_QUOTA_CHANGE; - error = gfs2_quota_lock(ip, nuid, ngid); + error = get_write_access(inode); if (error) return error; + error = gfs2_rs_alloc(ip); + if (error) + goto out; + + error = gfs2_rindex_update(sdp); + if (error) + goto out; + + error = gfs2_quota_lock(ip, nuid, ngid); + if (error) + goto out; + if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { error = gfs2_quota_check(ip, nuid, ngid); @@ -1631,6 +1738,8 @@ out_end_trans: gfs2_trans_end(sdp); out_gunlock_q: gfs2_quota_unlock(ip); +out: + put_write_access(inode); return error; } @@ -1672,10 +1781,11 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) error = gfs2_setattr_size(inode, attr->ia_size); else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) error = setattr_chown(inode, attr); - else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) - error = gfs2_acl_chmod(ip, attr); - else + else { error = gfs2_setattr_simple(inode, attr); + if (!error && attr->ia_valid & ATTR_MODE) + error = posix_acl_chmod(inode, inode->i_mode); + } out: if (!error) @@ -1705,19 +1815,29 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct inode *inode = dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; + struct gfs2_sbd *sdp = GFS2_SB(inode); int error; int unlock = 0; + int frozen_root = 0; if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); - if (error) - return error; - unlock = 1; + if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) && + inode == sdp->sd_root_dir->d_inode && + atomic_inc_not_zero(&sdp->sd_frozen_root))) + frozen_root = 1; + else { + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + if (error) + return error; + unlock = 1; + } } generic_fillattr(inode, stat); if (unlock) gfs2_glock_dq_uninit(&gh); + else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root)) + wake_up(&sdp->sd_frozen_root_wait); return 0; } @@ -1750,6 +1870,10 @@ static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, struct gfs2_holder gh; int ret; + /* For selinux during lookup */ + if (gfs2_glock_is_locked_by_me(ip->i_gl)) + return generic_getxattr(dentry, name, data, size); + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); ret = gfs2_glock_nq(&gh); if (ret == 0) { @@ -1831,6 +1955,7 @@ const struct inode_operations gfs2_file_iops = { .removexattr = gfs2_removexattr, .fiemap = gfs2_fiemap, .get_acl = gfs2_get_acl, + .set_acl = gfs2_set_acl, }; const struct inode_operations gfs2_dir_iops = { @@ -1852,13 +1977,14 @@ const struct inode_operations gfs2_dir_iops = { .removexattr = gfs2_removexattr, .fiemap = gfs2_fiemap, .get_acl = gfs2_get_acl, + .set_acl = gfs2_set_acl, .atomic_open = gfs2_atomic_open, }; const struct inode_operations gfs2_symlink_iops = { .readlink = generic_readlink, .follow_link = gfs2_follow_link, - .put_link = gfs2_put_link, + .put_link = kfree_put_link, .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, @@ -1867,6 +1993,5 @@ const struct inode_operations gfs2_symlink_iops = { .listxattr = gfs2_listxattr, .removexattr = gfs2_removexattr, .fiemap = gfs2_fiemap, - .get_acl = gfs2_get_acl, }; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index c8423d6de6c..4fafea1c9ec 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/fs.h> #include <linux/dlm.h> #include <linux/slab.h> @@ -176,7 +178,7 @@ static void gdlm_bast(void *arg, int mode) gfs2_glock_cb(gl, LM_ST_SHARED); break; default: - printk(KERN_ERR "unknown bast mode %d", mode); + pr_err("unknown bast mode %d\n", mode); BUG(); } } @@ -195,7 +197,7 @@ static int make_mode(const unsigned int lmstate) case LM_ST_SHARED: return DLM_LOCK_PR; } - printk(KERN_ERR "unknown LM state %d", lmstate); + pr_err("unknown LM state %d\n", lmstate); BUG(); return -1; } @@ -308,7 +310,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl) error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, NULL, gl); if (error) { - printk(KERN_ERR "gdlm_unlock %x,%llx err=%d\n", + pr_err("gdlm_unlock %x,%llx err=%d\n", gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, error); return; @@ -466,19 +468,19 @@ static void gdlm_cancel(struct gfs2_glock *gl) static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, char *lvb_bits) { - uint32_t gen; + __le32 gen; memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); - memcpy(&gen, lvb_bits, sizeof(uint32_t)); + memcpy(&gen, lvb_bits, sizeof(__le32)); *lvb_gen = le32_to_cpu(gen); } static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, char *lvb_bits) { - uint32_t gen; + __le32 gen; memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); gen = cpu_to_le32(lvb_gen); - memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t)); + memcpy(ls->ls_control_lvb, &gen, sizeof(__le32)); } static int all_jid_bits_clear(char *lvb) @@ -1034,8 +1036,8 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, new_size = old_size + RECOVER_SIZE_INC; - submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); - result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); + submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS); + result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS); if (!submit || !result) { kfree(submit); kfree(result); @@ -1102,7 +1104,7 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot) } if (ls->ls_recover_submit[jid]) { - fs_info(sdp, "recover_slot jid %d gen %u prev %u", + fs_info(sdp, "recover_slot jid %d gen %u prev %u\n", jid, ls->ls_recover_block, ls->ls_recover_submit[jid]); } ls->ls_recover_submit[jid] = ls->ls_recover_block; @@ -1132,7 +1134,7 @@ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots, queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY); spin_unlock(&ls->ls_recover_spin); } @@ -1269,7 +1271,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); return 0; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 610613fb65b..3966fadbceb 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -18,6 +18,7 @@ #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/bio.h> +#include <linux/blkdev.h> #include <linux/writeback.h> #include <linux/list_sort.h> @@ -145,8 +146,10 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) { struct list_head *head = &sdp->sd_ail1_list; struct gfs2_trans *tr; + struct blk_plug plug; trace_gfs2_ail_flush(sdp, wbc, 1); + blk_start_plug(&plug); spin_lock(&sdp->sd_ail_lock); restart: list_for_each_entry_reverse(tr, head, tr_list) { @@ -156,6 +159,7 @@ restart: goto restart; } spin_unlock(&sdp->sd_ail_lock); + blk_finish_plug(&plug); trace_gfs2_ail_flush(sdp, wbc, 0); } @@ -297,6 +301,23 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) } /** + * gfs2_log_release - Release a given number of log blocks + * @sdp: The GFS2 superblock + * @blks: The number of blocks + * + */ + +void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) +{ + + atomic_add(blks, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, blks); + gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= + sdp->sd_jdesc->jd_blocks); + up_read(&sdp->sd_log_flush_lock); +} + +/** * gfs2_log_reserve - Make a log reservation * @sdp: The GFS2 superblock * @blks: The number of blocks to reserve @@ -354,7 +375,10 @@ retry: wake_up(&sdp->sd_log_waitq); down_read(&sdp->sd_log_flush_lock); - + if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { + gfs2_log_release(sdp, blks); + return -EROFS; + } return 0; } @@ -410,24 +434,22 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer static unsigned int calc_reserved(struct gfs2_sbd *sdp) { unsigned int reserved = 0; - unsigned int mbuf_limit, metabufhdrs_needed; - unsigned int dbuf_limit, databufhdrs_needed; - unsigned int revokes = 0; + unsigned int mbuf; + unsigned int dbuf; + struct gfs2_trans *tr = sdp->sd_log_tr; - mbuf_limit = buf_limit(sdp); - metabufhdrs_needed = (sdp->sd_log_commited_buf + - (mbuf_limit - 1)) / mbuf_limit; - dbuf_limit = databuf_limit(sdp); - databufhdrs_needed = (sdp->sd_log_commited_databuf + - (dbuf_limit - 1)) / dbuf_limit; + if (tr) { + mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm; + dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; + reserved = mbuf + dbuf; + /* Account for header blocks */ + reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp)); + reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp)); + } if (sdp->sd_log_commited_revoke > 0) - revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke, + reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke, sizeof(u64)); - - reserved = sdp->sd_log_commited_buf + metabufhdrs_needed + - sdp->sd_log_commited_databuf + databufhdrs_needed + - revokes; /* One for the overall header */ if (reserved) reserved++; @@ -551,10 +573,10 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) struct buffer_head *bh = bd->bd_bh; struct gfs2_glock *gl = bd->bd_gl; - gfs2_remove_from_ail(bd); - bd->bd_bh = NULL; bh->b_private = NULL; bd->bd_blkno = bh->b_blocknr; + gfs2_remove_from_ail(bd); /* drops ref on bh */ + bd->bd_bh = NULL; bd->bd_ops = &gfs2_revoke_lops; sdp->sd_log_num_revoke++; atomic_inc(&gl->gl_revokes); @@ -669,7 +691,8 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) * */ -void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) +void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, + enum gfs2_flush_type type) { struct gfs2_trans *tr; @@ -682,36 +705,25 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) } trace_gfs2_log_flush(sdp, 1); + sdp->sd_log_flush_head = sdp->sd_log_head; + sdp->sd_log_flush_wrapped = 0; tr = sdp->sd_log_tr; if (tr) { sdp->sd_log_tr = NULL; INIT_LIST_HEAD(&tr->tr_ail1_list); INIT_LIST_HEAD(&tr->tr_ail2_list); + tr->tr_first = sdp->sd_log_flush_head; } - if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) { - printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf, - sdp->sd_log_commited_buf); - gfs2_assert_withdraw(sdp, 0); - } - if (sdp->sd_log_num_databuf != sdp->sd_log_commited_databuf) { - printk(KERN_INFO "GFS2: log databuf %u %u\n", - sdp->sd_log_num_databuf, sdp->sd_log_commited_databuf); - gfs2_assert_withdraw(sdp, 0); - } gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); - sdp->sd_log_flush_head = sdp->sd_log_head; - sdp->sd_log_flush_wrapped = 0; - if (tr) - tr->tr_first = sdp->sd_log_flush_head; - gfs2_ordered_write(sdp); - lops_before_commit(sdp); + lops_before_commit(sdp, tr); gfs2_log_flush_bio(sdp, WRITE); if (sdp->sd_log_head != sdp->sd_log_flush_head) { + log_flush_wait(sdp); log_write_header(sdp, 0); } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ @@ -723,8 +735,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) gfs2_log_lock(sdp); sdp->sd_log_head = sdp->sd_log_flush_head; sdp->sd_log_blks_reserved = 0; - sdp->sd_log_commited_buf = 0; - sdp->sd_log_commited_databuf = 0; sdp->sd_log_commited_revoke = 0; spin_lock(&sdp->sd_ail_lock); @@ -734,40 +744,96 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) } spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); + + if (atomic_read(&sdp->sd_log_freeze)) + type = FREEZE_FLUSH; + if (type != NORMAL_FLUSH) { + if (!sdp->sd_log_idle) { + for (;;) { + gfs2_ail1_start(sdp); + gfs2_ail1_wait(sdp); + if (gfs2_ail1_empty(sdp)) + break; + } + atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ + trace_gfs2_log_blocks(sdp, -1); + sdp->sd_log_flush_wrapped = 0; + log_write_header(sdp, 0); + sdp->sd_log_head = sdp->sd_log_flush_head; + } + if (type == SHUTDOWN_FLUSH || type == FREEZE_FLUSH) + gfs2_log_shutdown(sdp); + if (type == FREEZE_FLUSH) { + int error; + + atomic_set(&sdp->sd_log_freeze, 0); + wake_up(&sdp->sd_log_frozen_wait); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, + LM_ST_SHARED, 0, + &sdp->sd_thaw_gh); + if (error) { + printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error); + gfs2_assert_withdraw(sdp, 0); + } + else + gfs2_glock_dq_uninit(&sdp->sd_thaw_gh); + } + } + trace_gfs2_log_flush(sdp, 0); up_write(&sdp->sd_log_flush_lock); kfree(tr); } +/** + * gfs2_merge_trans - Merge a new transaction into a cached transaction + * @old: Original transaction to be expanded + * @new: New transaction to be merged + */ + +static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) +{ + WARN_ON_ONCE(old->tr_attached != 1); + + old->tr_num_buf_new += new->tr_num_buf_new; + old->tr_num_databuf_new += new->tr_num_databuf_new; + old->tr_num_buf_rm += new->tr_num_buf_rm; + old->tr_num_databuf_rm += new->tr_num_databuf_rm; + old->tr_num_revoke += new->tr_num_revoke; + old->tr_num_revoke_rm += new->tr_num_revoke_rm; + + list_splice_tail_init(&new->tr_databuf, &old->tr_databuf); + list_splice_tail_init(&new->tr_buf, &old->tr_buf); +} + static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { unsigned int reserved; unsigned int unused; + unsigned int maxres; gfs2_log_lock(sdp); - sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm; - sdp->sd_log_commited_databuf += tr->tr_num_databuf_new - - tr->tr_num_databuf_rm; - gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) || - (((int)sdp->sd_log_commited_databuf) >= 0)); + if (sdp->sd_log_tr) { + gfs2_merge_trans(sdp->sd_log_tr, tr); + } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) { + gfs2_assert_withdraw(sdp, tr->tr_alloced); + sdp->sd_log_tr = tr; + tr->tr_attached = 1; + } + sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; reserved = calc_reserved(sdp); - gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved); - unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; + maxres = sdp->sd_log_blks_reserved + tr->tr_reserved; + gfs2_assert_withdraw(sdp, maxres >= reserved); + unused = maxres - reserved; atomic_add(unused, &sdp->sd_log_blks_free); trace_gfs2_log_blocks(sdp, unused); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks); sdp->sd_log_blks_reserved = reserved; - if (sdp->sd_log_tr == NULL && - (tr->tr_num_buf_new || tr->tr_num_databuf_new)) { - gfs2_assert_withdraw(sdp, tr->tr_t_gh.gh_gl); - sdp->sd_log_tr = tr; - tr->tr_attached = 1; - } gfs2_log_unlock(sdp); } @@ -804,13 +870,8 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) void gfs2_log_shutdown(struct gfs2_sbd *sdp) { - down_write(&sdp->sd_log_flush_lock); - gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); - gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); - gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); - gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); sdp->sd_log_flush_head = sdp->sd_log_head; @@ -818,38 +879,16 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT); - gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); sdp->sd_log_head = sdp->sd_log_flush_head; sdp->sd_log_tail = sdp->sd_log_head; - - up_write(&sdp->sd_log_flush_lock); -} - - -/** - * gfs2_meta_syncfs - sync all the buffers in a filesystem - * @sdp: the filesystem - * - */ - -void gfs2_meta_syncfs(struct gfs2_sbd *sdp) -{ - gfs2_log_flush(sdp, NULL); - for (;;) { - gfs2_ail1_start(sdp); - gfs2_ail1_wait(sdp); - if (gfs2_ail1_empty(sdp)) - break; - } - gfs2_log_flush(sdp, NULL); } static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) { - return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1)); + return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1) || atomic_read(&sdp->sd_log_freeze)); } static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) @@ -876,14 +915,14 @@ int gfs2_logd(void *data) if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { gfs2_ail1_empty(sdp); - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } if (gfs2_ail_flush_reqd(sdp)) { gfs2_ail1_start(sdp); gfs2_ail1_wait(sdp); gfs2_ail1_empty(sdp); - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } if (!gfs2_ail_flush_reqd(sdp)) diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 37216634f0a..9499a604921 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -63,14 +63,21 @@ extern void gfs2_ordered_del_inode(struct gfs2_inode *ip); extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, unsigned int ssize); +extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); -extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); +enum gfs2_flush_type { + NORMAL_FLUSH = 0, + SYNC_FLUSH, + SHUTDOWN_FLUSH, + FREEZE_FLUSH +}; +extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, + enum gfs2_flush_type type); extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc); extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); -extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); extern int gfs2_logd(void *data); extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); extern void gfs2_write_revokes(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 17c5b5d7dc8..2c1ae861dc9 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -75,7 +75,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd) unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number; struct gfs2_bitmap *bi = rgd->rd_bits + index; - if (bi->bi_clone == 0) + if (bi->bi_clone == NULL) return; if (sdp->sd_args.ar_discard) gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL); @@ -83,6 +83,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd) bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); clear_bit(GBF_FULL, &bi->bi_flags); rgd->rd_free_clone = rgd->rd_free; + rgd->rd_extfail_pt = rgd->rd_free; } /** @@ -145,8 +146,8 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp) struct gfs2_journal_extent *je; u64 block; - list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) { - if (lbn >= je->lblock && lbn < je->lblock + je->blocks) { + list_for_each_entry(je, &sdp->sd_jdesc->extent_list, list) { + if ((lbn >= je->lblock) && (lbn < (je->lblock + je->blocks))) { block = je->dblock + lbn - je->lblock; gfs2_log_incr_head(sdp); return block; @@ -272,7 +273,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno) nrvecs = max(nrvecs/2, 1U); } - bio->bi_sector = blkno * (sb->s_blocksize >> 9); + bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9); bio->bi_bdev = sb->s_bdev; bio->bi_end_io = gfs2_end_log_write; bio->bi_private = sdp; @@ -490,44 +491,40 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, gfs2_log_unlock(sdp); } -static void buf_lo_before_commit(struct gfs2_sbd *sdp) +static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */ - - gfs2_before_commit(sdp, limit, sdp->sd_log_num_buf, - &sdp->sd_log_le_buf, 0); + unsigned int nbuf; + if (tr == NULL) + return; + nbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm; + gfs2_before_commit(sdp, limit, nbuf, &tr->tr_buf, 0); } static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { - struct list_head *head = &sdp->sd_log_le_buf; + struct list_head *head; struct gfs2_bufdata *bd; - if (tr == NULL) { - gfs2_assert(sdp, list_empty(head)); + if (tr == NULL) return; - } + head = &tr->tr_buf; while (!list_empty(head)) { bd = list_entry(head->next, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); - sdp->sd_log_num_buf--; - gfs2_unpin(sdp, bd->bd_bh, tr); } - gfs2_assert_warn(sdp, !sdp->sd_log_num_buf); } static void buf_lo_before_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, int pass) { - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - if (pass != 0) return; - sdp->sd_found_blocks = 0; - sdp->sd_replayed_blocks = 0; + jd->jd_found_blocks = 0; + jd->jd_replayed_blocks = 0; } static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, @@ -550,9 +547,9 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { blkno = be64_to_cpu(*ptr++); - sdp->sd_found_blocks++; + jd->jd_found_blocks++; - if (gfs2_revoke_check(sdp, blkno, start)) + if (gfs2_revoke_check(jd, blkno, start)) continue; error = gfs2_replay_read_block(jd, start, &bh_log); @@ -573,12 +570,34 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, if (error) break; - sdp->sd_replayed_blocks++; + jd->jd_replayed_blocks++; } return error; } +/** + * gfs2_meta_sync - Sync all buffers associated with a glock + * @gl: The glock + * + */ + +static void gfs2_meta_sync(struct gfs2_glock *gl) +{ + struct address_space *mapping = gfs2_glock2aspace(gl); + struct gfs2_sbd *sdp = gl->gl_sbd; + int error; + + if (mapping == NULL) + mapping = &sdp->sd_aspace; + + filemap_fdatawrite(mapping); + error = filemap_fdatawait(mapping); + + if (error) + gfs2_io_error(gl->gl_sbd); +} + static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); @@ -594,10 +613,10 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) gfs2_meta_sync(ip->i_gl); fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", - jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); + jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks); } -static void revoke_lo_before_commit(struct gfs2_sbd *sdp) +static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { struct gfs2_meta_header *mh; unsigned int offset; @@ -656,13 +675,11 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) static void revoke_lo_before_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, int pass) { - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - if (pass != 0) return; - sdp->sd_found_revokes = 0; - sdp->sd_replay_tail = head->lh_tail; + jd->jd_found_revokes = 0; + jd->jd_replay_tail = head->lh_tail; } static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, @@ -694,13 +711,13 @@ static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) { blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); - error = gfs2_revoke_add(sdp, blkno, start); + error = gfs2_revoke_add(jd, blkno, start); if (error < 0) { brelse(bh); return error; } else if (error) - sdp->sd_found_revokes++; + jd->jd_found_revokes++; if (!--revokes) break; @@ -720,16 +737,16 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); if (error) { - gfs2_revoke_clean(sdp); + gfs2_revoke_clean(jd); return; } if (pass != 1) return; fs_info(sdp, "jid=%u: Found %u revoke tags\n", - jd->jd_jid, sdp->sd_found_revokes); + jd->jd_jid, jd->jd_found_revokes); - gfs2_revoke_clean(sdp); + gfs2_revoke_clean(jd); } /** @@ -737,12 +754,14 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) * */ -static void databuf_lo_before_commit(struct gfs2_sbd *sdp) +static void databuf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { - unsigned int limit = buf_limit(sdp) / 2; - - gfs2_before_commit(sdp, limit, sdp->sd_log_num_databuf, - &sdp->sd_log_le_databuf, 1); + unsigned int limit = databuf_limit(sdp); + unsigned int nbuf; + if (tr == NULL) + return; + nbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; + gfs2_before_commit(sdp, limit, nbuf, &tr->tr_databuf, 1); } static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, @@ -766,9 +785,9 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, blkno = be64_to_cpu(*ptr++); esc = be64_to_cpu(*ptr++); - sdp->sd_found_blocks++; + jd->jd_found_blocks++; - if (gfs2_revoke_check(sdp, blkno, start)) + if (gfs2_revoke_check(jd, blkno, start)) continue; error = gfs2_replay_read_block(jd, start, &bh_log); @@ -788,7 +807,7 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, brelse(bh_log); brelse(bh_ip); - sdp->sd_replayed_blocks++; + jd->jd_replayed_blocks++; } return error; @@ -812,26 +831,23 @@ static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) gfs2_meta_sync(ip->i_gl); fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", - jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); + jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks); } static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { - struct list_head *head = &sdp->sd_log_le_databuf; + struct list_head *head; struct gfs2_bufdata *bd; - if (tr == NULL) { - gfs2_assert(sdp, list_empty(head)); + if (tr == NULL) return; - } + head = &tr->tr_databuf; while (!list_empty(head)) { bd = list_entry(head->next, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); - sdp->sd_log_num_databuf--; gfs2_unpin(sdp, bd->bd_bh, tr); } - gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); } diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 9ca2e643841..a65a7ba32ff 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -46,12 +46,13 @@ static inline unsigned int databuf_limit(struct gfs2_sbd *sdp) return limit; } -static inline void lops_before_commit(struct gfs2_sbd *sdp) +static inline void lops_before_commit(struct gfs2_sbd *sdp, + struct gfs2_trans *tr) { int x; for (x = 0; gfs2_log_ops[x]; x++) if (gfs2_log_ops[x]->lo_before_commit) - gfs2_log_ops[x]->lo_before_commit(sdp); + gfs2_log_ops[x]->lo_before_commit(sdp, tr); } static inline void lops_after_commit(struct gfs2_sbd *sdp, diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index e04d0e09ee7..82b6ac82965 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> @@ -31,11 +33,6 @@ struct workqueue_struct *gfs2_control_wq; -static struct shrinker qd_shrinker = { - .shrink = gfs2_shrink_qd_memory, - .seeks = DEFAULT_SEEKS, -}; - static void gfs2_init_inode_once(void *foo) { struct gfs2_inode *ip = foo; @@ -81,11 +78,16 @@ static int __init init_gfs2_fs(void) gfs2_str2qstr(&gfs2_qdot, "."); gfs2_str2qstr(&gfs2_qdotdot, ".."); + gfs2_quota_hash_init(); error = gfs2_sys_init(); if (error) return error; + error = list_lru_init(&gfs2_qd_lru); + if (error) + goto fail_lru; + error = gfs2_glock_init(); if (error) goto fail; @@ -138,7 +140,7 @@ static int __init init_gfs2_fs(void) if (!gfs2_rsrv_cachep) goto fail; - register_shrinker(&qd_shrinker); + register_shrinker(&gfs2_qd_shrinker); error = register_filesystem(&gfs2_fs_type); if (error) @@ -155,7 +157,7 @@ static int __init init_gfs2_fs(void) goto fail_wq; gfs2_control_wq = alloc_workqueue("gfs2_control", - WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0); + WQ_UNBOUND | WQ_FREEZABLE, 0); if (!gfs2_control_wq) goto fail_recovery; @@ -165,7 +167,7 @@ static int __init init_gfs2_fs(void) gfs2_register_debugfs(); - printk("GFS2 installed\n"); + pr_info("GFS2 installed\n"); return 0; @@ -178,7 +180,9 @@ fail_wq: fail_unregister: unregister_filesystem(&gfs2_fs_type); fail: - unregister_shrinker(&qd_shrinker); + list_lru_destroy(&gfs2_qd_lru); +fail_lru: + unregister_shrinker(&gfs2_qd_shrinker); gfs2_glock_exit(); if (gfs2_rsrv_cachep) @@ -213,13 +217,14 @@ fail: static void __exit exit_gfs2_fs(void) { - unregister_shrinker(&qd_shrinker); + unregister_shrinker(&gfs2_qd_shrinker); gfs2_glock_exit(); gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); destroy_workqueue(gfs_recovery_wq); destroy_workqueue(gfs2_control_wq); + list_lru_destroy(&gfs2_qd_lru); rcu_barrier(); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 0da390686c0..b984a6e190b 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -97,23 +97,10 @@ const struct address_space_operations gfs2_meta_aops = { .releasepage = gfs2_releasepage, }; -/** - * gfs2_meta_sync - Sync all buffers associated with a glock - * @gl: The glock - * - */ - -void gfs2_meta_sync(struct gfs2_glock *gl) -{ - struct address_space *mapping = gfs2_glock2aspace(gl); - int error; - - filemap_fdatawrite(mapping); - error = filemap_fdatawait(mapping); - - if (error) - gfs2_io_error(gl->gl_sbd); -} +const struct address_space_operations gfs2_rgrp_aops = { + .writepage = gfs2_aspace_writepage, + .releasepage = gfs2_releasepage, +}; /** * gfs2_getbuf - Get a buffer with a given address space @@ -134,6 +121,9 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) unsigned long index; unsigned int bufnum; + if (mapping == NULL) + mapping = &sdp->sd_aspace; + shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift; index = blkno >> shift; /* convert block to page */ bufnum = blkno - (index << shift); /* block buf index within page */ @@ -146,7 +136,8 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) yield(); } } else { - page = find_lock_page(mapping, index); + page = find_get_page_flags(mapping, index, + FGP_LOCK|FGP_ACCESSED); if (!page) return NULL; } @@ -163,7 +154,6 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) map_bh(bh, sdp->sd_vfs, blkno); unlock_page(page); - mark_page_accessed(page); page_cache_release(page); return bh; @@ -276,27 +266,27 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int struct address_space *mapping = bh->b_page->mapping; struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); struct gfs2_bufdata *bd = bh->b_private; + int was_pinned = 0; if (test_clear_buffer_pinned(bh)) { trace_gfs2_pin(bd, 0); atomic_dec(&sdp->sd_log_pinned); list_del_init(&bd->bd_list); - if (meta) { - gfs2_assert_warn(sdp, sdp->sd_log_num_buf); - sdp->sd_log_num_buf--; + if (meta) tr->tr_num_buf_rm++; - } else { - gfs2_assert_warn(sdp, sdp->sd_log_num_databuf); - sdp->sd_log_num_databuf--; + else tr->tr_num_databuf_rm++; - } tr->tr_touched = 1; + was_pinned = 1; brelse(bh); } if (bd) { spin_lock(&sdp->sd_ail_lock); if (bd->bd_tr) { gfs2_trans_add_revoke(sdp, bd); + } else if (was_pinned) { + bh->b_private = NULL; + kmem_cache_free(gfs2_bufdata_cachep, bd); } spin_unlock(&sdp->sd_ail_lock); } diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 0d4c843b6f8..ac5d8027d33 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -38,31 +38,30 @@ static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh, } extern const struct address_space_operations gfs2_meta_aops; +extern const struct address_space_operations gfs2_rgrp_aops; static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) { struct inode *inode = mapping->host; if (mapping->a_ops == &gfs2_meta_aops) return (((struct gfs2_glock *)mapping) - 1)->gl_sbd; + else if (mapping->a_ops == &gfs2_rgrp_aops) + return container_of(mapping, struct gfs2_sbd, sd_aspace); else return inode->i_sb->s_fs_info; } -void gfs2_meta_sync(struct gfs2_glock *gl); - -struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); -int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, - int flags, struct buffer_head **bhp); -int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); -struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create); - -void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, - int meta); - -void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); - -int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, - struct buffer_head **bhp); +extern struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); +extern int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, + struct buffer_head **bhp); +extern int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); +extern struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, + int create); +extern void gfs2_remove_from_journal(struct buffer_head *bh, + struct gfs2_trans *tr, int meta); +extern void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, + struct buffer_head **bhp); static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, struct buffer_head **bhp) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 0262c190b6f..bc564c0d6d1 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -36,6 +38,7 @@ #include "log.h" #include "quota.h" #include "dir.h" +#include "meta_io.h" #include "trace_gfs2.h" #define DO 0 @@ -51,7 +54,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt) { spin_lock_init(>->gt_spin); - gt->gt_quota_simul_sync = 64; gt->gt_quota_warn_period = 10; gt->gt_quota_scale_num = 1; gt->gt_quota_scale_den = 1; @@ -63,6 +65,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt) static struct gfs2_sbd *init_sbd(struct super_block *sb) { struct gfs2_sbd *sdp; + struct address_space *mapping; sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL); if (!sdp) @@ -91,18 +94,30 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) INIT_LIST_HEAD(&sdp->sd_jindex_list); spin_lock_init(&sdp->sd_jindex_spin); mutex_init(&sdp->sd_jindex_mutex); + init_completion(&sdp->sd_journal_ready); INIT_LIST_HEAD(&sdp->sd_quota_list); mutex_init(&sdp->sd_quota_mutex); + mutex_init(&sdp->sd_quota_sync_mutex); init_waitqueue_head(&sdp->sd_quota_wait); INIT_LIST_HEAD(&sdp->sd_trunc_list); spin_lock_init(&sdp->sd_trunc_lock); + spin_lock_init(&sdp->sd_bitmap_lock); + + mapping = &sdp->sd_aspace; + + address_space_init_once(mapping); + mapping->a_ops = &gfs2_rgrp_aops; + mapping->host = sb->s_bdev->bd_inode; + mapping->flags = 0; + mapping_set_gfp_mask(mapping, GFP_NOFS); + mapping->private_data = NULL; + mapping->backing_dev_info = sb->s_bdi; + mapping->writeback_index = 0; spin_lock_init(&sdp->sd_log_lock); atomic_set(&sdp->sd_log_pinned, 0); - INIT_LIST_HEAD(&sdp->sd_log_le_buf); INIT_LIST_HEAD(&sdp->sd_log_le_revoke); - INIT_LIST_HEAD(&sdp->sd_log_le_databuf); INIT_LIST_HEAD(&sdp->sd_log_le_ordered); spin_lock_init(&sdp->sd_ordered_lock); @@ -115,8 +130,10 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_rwsem(&sdp->sd_log_flush_lock); atomic_set(&sdp->sd_log_in_flight, 0); init_waitqueue_head(&sdp->sd_log_flush_wait); - - INIT_LIST_HEAD(&sdp->sd_revoke_list); + init_waitqueue_head(&sdp->sd_log_frozen_wait); + atomic_set(&sdp->sd_log_freeze, 0); + atomic_set(&sdp->sd_frozen_root, 0); + init_waitqueue_head(&sdp->sd_frozen_root_wait); return sdp; } @@ -140,7 +157,7 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) if (sb->sb_magic != GFS2_MAGIC || sb->sb_type != GFS2_METATYPE_SB) { if (!silent) - printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); + pr_warn("not a GFS2 filesystem\n"); return -EINVAL; } @@ -162,7 +179,7 @@ static void end_bio_io_page(struct bio *bio, int error) if (!error) SetPageUptodate(page); else - printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); + pr_warn("error %d reading superblock\n", error); unlock_page(page); } @@ -217,14 +234,14 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) page = alloc_page(GFP_NOFS); if (unlikely(!page)) - return -ENOBUFS; + return -ENOMEM; ClearPageUptodate(page); ClearPageDirty(page); lock_page(page); bio = bio_alloc(GFP_NOFS, 1); - bio->bi_sector = sector * (sb->s_blocksize >> 9); + bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9); bio->bi_bdev = sb->s_bdev; bio_add_page(bio, page, PAGE_SIZE, 0); @@ -407,8 +424,8 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, goto fail_live; } - error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops, - CREATE, &sdp->sd_trans_gl); + error = gfs2_glock_get(sdp, GFS2_FREEZE_LOCK, &gfs2_freeze_glops, + CREATE, &sdp->sd_freeze_gl); if (error) { fs_err(sdp, "can't create transaction glock: %d\n", error); goto fail_rename; @@ -417,7 +434,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, return 0; fail_trans: - gfs2_glock_put(sdp->sd_trans_gl); + gfs2_glock_put(sdp->sd_freeze_gl); fail_rename: gfs2_glock_put(sdp->sd_rename_gl); fail_live: @@ -505,67 +522,6 @@ out: return ret; } -/** - * map_journal_extents - create a reusable "extent" mapping from all logical - * blocks to all physical blocks for the given journal. This will save - * us time when writing journal blocks. Most journals will have only one - * extent that maps all their logical blocks. That's because gfs2.mkfs - * arranges the journal blocks sequentially to maximize performance. - * So the extent would map the first block for the entire file length. - * However, gfs2_jadd can happen while file activity is happening, so - * those journals may not be sequential. Less likely is the case where - * the users created their own journals by mounting the metafs and - * laying it out. But it's still possible. These journals might have - * several extents. - * - * TODO: This should be done in bigger chunks rather than one block at a time, - * but since it's only done at mount time, I'm not worried about the - * time it takes. - */ -static int map_journal_extents(struct gfs2_sbd *sdp) -{ - struct gfs2_jdesc *jd = sdp->sd_jdesc; - unsigned int lb; - u64 db, prev_db; /* logical block, disk block, prev disk block */ - struct gfs2_inode *ip = GFS2_I(jd->jd_inode); - struct gfs2_journal_extent *jext = NULL; - struct buffer_head bh; - int rc = 0; - - prev_db = 0; - - for (lb = 0; lb < i_size_read(jd->jd_inode) >> sdp->sd_sb.sb_bsize_shift; lb++) { - bh.b_state = 0; - bh.b_blocknr = 0; - bh.b_size = 1 << ip->i_inode.i_blkbits; - rc = gfs2_block_map(jd->jd_inode, lb, &bh, 0); - db = bh.b_blocknr; - if (rc || !db) { - printk(KERN_INFO "GFS2 journal mapping error %d: lb=" - "%u db=%llu\n", rc, lb, (unsigned long long)db); - break; - } - if (!prev_db || db != prev_db + 1) { - jext = kzalloc(sizeof(struct gfs2_journal_extent), - GFP_KERNEL); - if (!jext) { - printk(KERN_INFO "GFS2 error: out of memory " - "mapping journal extents.\n"); - rc = -ENOMEM; - break; - } - jext->dblock = db; - jext->lblock = lb; - jext->blocks = 1; - list_add_tail(&jext->extent_list, &jd->extent_list); - } else { - jext->blocks++; - } - prev_db = db; - } - return rc; -} - static void gfs2_others_may_mount(struct gfs2_sbd *sdp) { char *message = "FIRSTMOUNT=Done"; @@ -624,6 +580,8 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) break; INIT_LIST_HEAD(&jd->extent_list); + INIT_LIST_HEAD(&jd->jd_revoke_list); + INIT_WORK(&jd->jd_work, gfs2_recover_func); jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { @@ -646,6 +604,48 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) return error; } +/** + * check_journal_clean - Make sure a journal is clean for a spectator mount + * @sdp: The GFS2 superblock + * @jd: The journal descriptor + * + * Returns: 0 if the journal is clean or locked, else an error + */ +static int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) +{ + int error; + struct gfs2_holder j_gh; + struct gfs2_log_header_host head; + struct gfs2_inode *ip; + + ip = GFS2_I(jd->jd_inode); + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | + GL_EXACT | GL_NOCACHE, &j_gh); + if (error) { + fs_err(sdp, "Error locking journal for spectator mount.\n"); + return -EPERM; + } + error = gfs2_jdesc_check(jd); + if (error) { + fs_err(sdp, "Error checking journal for spectator mount.\n"); + goto out_unlock; + } + error = gfs2_find_jhead(jd, &head); + if (error) { + fs_err(sdp, "Error parsing journal for spectator mount.\n"); + goto out_unlock; + } + if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { + error = -EPERM; + fs_err(sdp, "jid=%u: Journal is dirty, so the first mounter " + "must not be a spectator.\n", jd->jd_jid); + } + +out_unlock: + gfs2_glock_dq_uninit(&j_gh); + return error; +} + static int init_journal(struct gfs2_sbd *sdp, int undo) { struct inode *master = sdp->sd_master_dir->d_inode; @@ -725,15 +725,22 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5); /* Map the extents for this journal's blocks */ - map_journal_extents(sdp); + gfs2_map_journal_extents(sdp, sdp->sd_jdesc); } trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free)); if (sdp->sd_lockstruct.ls_first) { unsigned int x; for (x = 0; x < sdp->sd_journals; x++) { - error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x), - true); + struct gfs2_jdesc *jd = gfs2_jdesc_find(sdp, x); + + if (sdp->sd_args.ar_spectator) { + error = check_journal_clean(sdp, jd); + if (error) + goto fail_jinode_gh; + continue; + } + error = gfs2_recover_journal(jd, true); if (error) { fs_err(sdp, "error recovering journal %u: %d\n", x, error); @@ -753,7 +760,15 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags); gfs2_glock_dq_uninit(&ji_gh); jindex = 0; - + if (!sdp->sd_args.ar_spectator) { + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, + &sdp->sd_thaw_gh); + if (error) { + fs_err(sdp, "can't acquire freeze glock: %d\n", error); + goto fail_jinode_gh; + } + } + gfs2_glock_dq_uninit(&sdp->sd_thaw_gh); return 0; fail_jinode_gh: @@ -782,6 +797,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) goto fail_qinode; error = init_journal(sdp, undo); + complete_all(&sdp->sd_journal_ready); if (error) goto fail; @@ -907,40 +923,6 @@ fail: return error; } -static int init_threads(struct gfs2_sbd *sdp, int undo) -{ - struct task_struct *p; - int error = 0; - - if (undo) - goto fail_quotad; - - p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); - if (IS_ERR(p)) { - error = PTR_ERR(p); - fs_err(sdp, "can't start logd thread: %d\n", error); - return error; - } - sdp->sd_logd_process = p; - - p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); - if (IS_ERR(p)) { - error = PTR_ERR(p); - fs_err(sdp, "can't start quotad thread: %d\n", error); - goto fail; - } - sdp->sd_quotad_process = p; - - return 0; - - -fail_quotad: - kthread_stop(sdp->sd_quotad_process); -fail: - kthread_stop(sdp->sd_logd_process); - return error; -} - static const match_table_t nolock_tokens = { { Opt_jid, "jid=%d\n", }, { Opt_err, NULL }, @@ -979,7 +961,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) lm = &gfs2_dlm_ops; #endif } else { - printk(KERN_INFO "GFS2: can't find protocol %s\n", proto); + pr_info("can't find protocol %s\n", proto); return -ENOENT; } @@ -1086,7 +1068,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent sdp = init_sbd(sb); if (!sdp) { - printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n"); + pr_warn("can't alloc struct gfs2_sbd\n"); return -ENOMEM; } sdp->sd_args = *args; @@ -1205,15 +1187,11 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent goto fail_per_node; } - error = init_threads(sdp, DO); - if (error) - goto fail_per_node; - if (!(sb->s_flags & MS_RDONLY)) { error = gfs2_make_fs_rw(sdp); if (error) { fs_err(sdp, "can't make FS RW: %d\n", error); - goto fail_threads; + goto fail_per_node; } } @@ -1221,8 +1199,6 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent gfs2_online_uevent(sdp); return 0; -fail_threads: - init_threads(sdp, UNDO); fail_per_node: init_per_node(sdp, UNDO); fail_inodes: @@ -1238,6 +1214,7 @@ fail_sb: fail_locking: init_locking(sdp, &mount_gh, UNDO); fail_lm: + complete_all(&sdp->sd_journal_ready); gfs2_gl_hash_clear(sdp); gfs2_lm_unmount(sdp); fail_debug: @@ -1317,8 +1294,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, if (IS_ERR(s)) goto error_bdev; - if (s->s_root) + if (s->s_root) { + /* + * s_umount nests inside bd_mutex during + * __invalidate_device(). blkdev_put() acquires + * bd_mutex and can't be called under s_umount. Drop + * s_umount temporarily. This is safe as we're + * holding an active reference. + */ + up_write(&s->s_umount); blkdev_put(bdev, mode); + down_write(&s->s_umount); + } memset(&args, 0, sizeof(args)); args.ar_quota = GFS2_QUOTA_DEFAULT; @@ -1330,7 +1317,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, error = gfs2_mount_args(&args, data); if (error) { - printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); + pr_warn("can't parse mount arguments\n"); goto error_super; } @@ -1380,15 +1367,15 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, error = kern_path(dev_name, LOOKUP_FOLLOW, &path); if (error) { - printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", - dev_name, error); + pr_warn("path_lookup on %s returned error %d\n", + dev_name, error); return ERR_PTR(error); } s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags, path.dentry->d_inode->i_sb->s_bdev); path_put(&path); if (IS_ERR(s)) { - printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); + pr_warn("gfs2 mount does not exist\n"); return ERR_CAST(s); } if ((flags ^ s->s_flags) & MS_RDONLY) { @@ -1408,7 +1395,7 @@ static void gfs2_kill_sb(struct super_block *sb) return; } - gfs2_meta_syncfs(sdp); + gfs2_log_flush(sdp, NULL, SYNC_FLUSH); dput(sdp->sd_root_dir); dput(sdp->sd_master_dir); sdp->sd_root_dir = NULL; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 3768c2f40e4..64b29f7f6b4 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -36,6 +36,8 @@ * the quota file, so it is not being constantly read. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sched.h> #include <linux/slab.h> #include <linux/mm.h> @@ -50,6 +52,13 @@ #include <linux/freezer.h> #include <linux/quota.h> #include <linux/dqblk_xfs.h> +#include <linux/lockref.h> +#include <linux/list_lru.h> +#include <linux/rcupdate.h> +#include <linux/rculist_bl.h> +#include <linux/bit_spinlock.h> +#include <linux/jhash.h> +#include <linux/vmalloc.h> #include "gfs2.h" #include "incore.h" @@ -65,36 +74,63 @@ #include "inode.h" #include "util.h" -struct gfs2_quota_change_host { - u64 qc_change; - u32 qc_flags; /* GFS2_QCF_... */ - struct kqid qc_id; -}; +#define GFS2_QD_HASH_SHIFT 12 +#define GFS2_QD_HASH_SIZE (1 << GFS2_QD_HASH_SHIFT) +#define GFS2_QD_HASH_MASK (GFS2_QD_HASH_SIZE - 1) -static LIST_HEAD(qd_lru_list); -static atomic_t qd_lru_count = ATOMIC_INIT(0); -static DEFINE_SPINLOCK(qd_lru_lock); +/* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */ +/* -> sd_bitmap_lock */ +static DEFINE_SPINLOCK(qd_lock); +struct list_lru gfs2_qd_lru; -int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) +static struct hlist_bl_head qd_hash_table[GFS2_QD_HASH_SIZE]; + +static unsigned int gfs2_qd_hash(const struct gfs2_sbd *sdp, + const struct kqid qid) { - struct gfs2_quota_data *qd; - struct gfs2_sbd *sdp; - int nr_to_scan = sc->nr_to_scan; + unsigned int h; - if (nr_to_scan == 0) - goto out; + h = jhash(&sdp, sizeof(struct gfs2_sbd *), 0); + h = jhash(&qid, sizeof(struct kqid), h); - if (!(sc->gfp_mask & __GFP_FS)) - return -1; + return h & GFS2_QD_HASH_MASK; +} + +static inline void spin_lock_bucket(unsigned int hash) +{ + hlist_bl_lock(&qd_hash_table[hash]); +} - spin_lock(&qd_lru_lock); - while (nr_to_scan && !list_empty(&qd_lru_list)) { - qd = list_entry(qd_lru_list.next, - struct gfs2_quota_data, qd_reclaim); +static inline void spin_unlock_bucket(unsigned int hash) +{ + hlist_bl_unlock(&qd_hash_table[hash]); +} + +static void gfs2_qd_dealloc(struct rcu_head *rcu) +{ + struct gfs2_quota_data *qd = container_of(rcu, struct gfs2_quota_data, qd_rcu); + kmem_cache_free(gfs2_quotad_cachep, qd); +} + +static void gfs2_qd_dispose(struct list_head *list) +{ + struct gfs2_quota_data *qd; + struct gfs2_sbd *sdp; + + while (!list_empty(list)) { + qd = list_entry(list->next, struct gfs2_quota_data, qd_lru); sdp = qd->qd_gl->gl_sbd; + list_del(&qd->qd_lru); + /* Free from the filesystem-specific list */ + spin_lock(&qd_lock); list_del(&qd->qd_list); + spin_unlock(&qd_lock); + + spin_lock_bucket(qd->qd_hash); + hlist_bl_del_rcu(&qd->qd_hlist); + spin_unlock_bucket(qd->qd_hash); gfs2_assert_warn(sdp, !qd->qd_change); gfs2_assert_warn(sdp, !qd->qd_slot_count); @@ -104,19 +140,59 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) atomic_dec(&sdp->sd_quota_count); /* Delete it from the common reclaim list */ - list_del_init(&qd->qd_reclaim); - atomic_dec(&qd_lru_count); - spin_unlock(&qd_lru_lock); - kmem_cache_free(gfs2_quotad_cachep, qd); - spin_lock(&qd_lru_lock); - nr_to_scan--; + call_rcu(&qd->qd_rcu, gfs2_qd_dealloc); } - spin_unlock(&qd_lru_lock); +} -out: - return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100; + +static enum lru_status gfs2_qd_isolate(struct list_head *item, spinlock_t *lock, void *arg) +{ + struct list_head *dispose = arg; + struct gfs2_quota_data *qd = list_entry(item, struct gfs2_quota_data, qd_lru); + + if (!spin_trylock(&qd->qd_lockref.lock)) + return LRU_SKIP; + + if (qd->qd_lockref.count == 0) { + lockref_mark_dead(&qd->qd_lockref); + list_move(&qd->qd_lru, dispose); + } + + spin_unlock(&qd->qd_lockref.lock); + return LRU_REMOVED; } +static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ + LIST_HEAD(dispose); + unsigned long freed; + + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; + + freed = list_lru_walk_node(&gfs2_qd_lru, sc->nid, gfs2_qd_isolate, + &dispose, &sc->nr_to_scan); + + gfs2_qd_dispose(&dispose); + + return freed; +} + +static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + return vfs_pressure_ratio(list_lru_count_node(&gfs2_qd_lru, sc->nid)); +} + +struct shrinker gfs2_qd_shrinker = { + .count_objects = gfs2_qd_shrink_count, + .scan_objects = gfs2_qd_shrink_scan, + .seeks = DEFAULT_SEEKS, + .flags = SHRINKER_NUMA_AWARE, +}; + + static u64 qd2index(struct gfs2_quota_data *qd) { struct kqid qid = qd->qd_id; @@ -134,168 +210,158 @@ static u64 qd2offset(struct gfs2_quota_data *qd) return offset; } -static int qd_alloc(struct gfs2_sbd *sdp, struct kqid qid, - struct gfs2_quota_data **qdp) +static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, struct kqid qid) { struct gfs2_quota_data *qd; int error; qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS); if (!qd) - return -ENOMEM; + return NULL; - atomic_set(&qd->qd_count, 1); + qd->qd_sbd = sdp; + qd->qd_lockref.count = 1; + spin_lock_init(&qd->qd_lockref.lock); qd->qd_id = qid; qd->qd_slot = -1; - INIT_LIST_HEAD(&qd->qd_reclaim); + INIT_LIST_HEAD(&qd->qd_lru); + qd->qd_hash = hash; error = gfs2_glock_get(sdp, qd2index(qd), &gfs2_quota_glops, CREATE, &qd->qd_gl); if (error) goto fail; - *qdp = qd; - - return 0; + return qd; fail: kmem_cache_free(gfs2_quotad_cachep, qd); - return error; + return NULL; } -static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, - struct gfs2_quota_data **qdp) +static struct gfs2_quota_data *gfs2_qd_search_bucket(unsigned int hash, + const struct gfs2_sbd *sdp, + struct kqid qid) { - struct gfs2_quota_data *qd = NULL, *new_qd = NULL; - int error, found; - - *qdp = NULL; + struct gfs2_quota_data *qd; + struct hlist_bl_node *h; - for (;;) { - found = 0; - spin_lock(&qd_lru_lock); - list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { - if (qid_eq(qd->qd_id, qid)) { - if (!atomic_read(&qd->qd_count) && - !list_empty(&qd->qd_reclaim)) { - /* Remove it from reclaim list */ - list_del_init(&qd->qd_reclaim); - atomic_dec(&qd_lru_count); - } - atomic_inc(&qd->qd_count); - found = 1; - break; - } + hlist_bl_for_each_entry_rcu(qd, h, &qd_hash_table[hash], qd_hlist) { + if (!qid_eq(qd->qd_id, qid)) + continue; + if (qd->qd_sbd != sdp) + continue; + if (lockref_get_not_dead(&qd->qd_lockref)) { + list_lru_del(&gfs2_qd_lru, &qd->qd_lru); + return qd; } + } - if (!found) - qd = NULL; + return NULL; +} - if (!qd && new_qd) { - qd = new_qd; - list_add(&qd->qd_list, &sdp->sd_quota_list); - atomic_inc(&sdp->sd_quota_count); - new_qd = NULL; - } - spin_unlock(&qd_lru_lock); +static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, + struct gfs2_quota_data **qdp) +{ + struct gfs2_quota_data *qd, *new_qd; + unsigned int hash = gfs2_qd_hash(sdp, qid); - if (qd) { - if (new_qd) { - gfs2_glock_put(new_qd->qd_gl); - kmem_cache_free(gfs2_quotad_cachep, new_qd); - } - *qdp = qd; - return 0; - } + rcu_read_lock(); + *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid); + rcu_read_unlock(); - error = qd_alloc(sdp, qid, &new_qd); - if (error) - return error; + if (qd) + return 0; + + new_qd = qd_alloc(hash, sdp, qid); + if (!new_qd) + return -ENOMEM; + + spin_lock(&qd_lock); + spin_lock_bucket(hash); + *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid); + if (qd == NULL) { + *qdp = new_qd; + list_add(&new_qd->qd_list, &sdp->sd_quota_list); + hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]); + atomic_inc(&sdp->sd_quota_count); + } + spin_unlock_bucket(hash); + spin_unlock(&qd_lock); + + if (qd) { + gfs2_glock_put(new_qd->qd_gl); + kmem_cache_free(gfs2_quotad_cachep, new_qd); } + + return 0; } + static void qd_hold(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - gfs2_assert(sdp, atomic_read(&qd->qd_count)); - atomic_inc(&qd->qd_count); + gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref)); + lockref_get(&qd->qd_lockref); } static void qd_put(struct gfs2_quota_data *qd) { - if (atomic_dec_and_lock(&qd->qd_count, &qd_lru_lock)) { - /* Add to the reclaim list */ - list_add_tail(&qd->qd_reclaim, &qd_lru_list); - atomic_inc(&qd_lru_count); - spin_unlock(&qd_lru_lock); - } + if (lockref_put_or_lock(&qd->qd_lockref)) + return; + + qd->qd_lockref.count = 0; + list_lru_add(&gfs2_qd_lru, &qd->qd_lru); + spin_unlock(&qd->qd_lockref.lock); + } static int slot_get(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - unsigned int c, o = 0, b; - unsigned char byte = 0; + struct gfs2_sbd *sdp = qd->qd_sbd; + unsigned int bit; + int error = 0; - spin_lock(&qd_lru_lock); + spin_lock(&sdp->sd_bitmap_lock); + if (qd->qd_slot_count != 0) + goto out; - if (qd->qd_slot_count++) { - spin_unlock(&qd_lru_lock); - return 0; + error = -ENOSPC; + bit = find_first_zero_bit(sdp->sd_quota_bitmap, sdp->sd_quota_slots); + if (bit < sdp->sd_quota_slots) { + set_bit(bit, sdp->sd_quota_bitmap); + qd->qd_slot = bit; + error = 0; +out: + qd->qd_slot_count++; } + spin_unlock(&sdp->sd_bitmap_lock); - for (c = 0; c < sdp->sd_quota_chunks; c++) - for (o = 0; o < PAGE_SIZE; o++) { - byte = sdp->sd_quota_bitmap[c][o]; - if (byte != 0xFF) - goto found; - } - - goto fail; - -found: - for (b = 0; b < 8; b++) - if (!(byte & (1 << b))) - break; - qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b; - - if (qd->qd_slot >= sdp->sd_quota_slots) - goto fail; - - sdp->sd_quota_bitmap[c][o] |= 1 << b; - - spin_unlock(&qd_lru_lock); - - return 0; - -fail: - qd->qd_slot_count--; - spin_unlock(&qd_lru_lock); - return -ENOSPC; + return error; } static void slot_hold(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_sbd; - spin_lock(&qd_lru_lock); + spin_lock(&sdp->sd_bitmap_lock); gfs2_assert(sdp, qd->qd_slot_count); qd->qd_slot_count++; - spin_unlock(&qd_lru_lock); + spin_unlock(&sdp->sd_bitmap_lock); } static void slot_put(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_sbd; - spin_lock(&qd_lru_lock); + spin_lock(&sdp->sd_bitmap_lock); gfs2_assert(sdp, qd->qd_slot_count); if (!--qd->qd_slot_count) { - gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0); + BUG_ON(!test_and_clear_bit(qd->qd_slot, sdp->sd_quota_bitmap)); qd->qd_slot = -1; } - spin_unlock(&qd_lru_lock); + spin_unlock(&sdp->sd_bitmap_lock); } static int bh_get(struct gfs2_quota_data *qd) @@ -359,6 +425,24 @@ static void bh_put(struct gfs2_quota_data *qd) mutex_unlock(&sdp->sd_quota_mutex); } +static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, + u64 *sync_gen) +{ + if (test_bit(QDF_LOCKED, &qd->qd_flags) || + !test_bit(QDF_CHANGE, &qd->qd_flags) || + (sync_gen && (qd->qd_sync_gen >= *sync_gen))) + return 0; + + if (!lockref_get_not_dead(&qd->qd_lockref)) + return 0; + + list_move_tail(&qd->qd_list, &sdp->sd_quota_list); + set_bit(QDF_LOCKED, &qd->qd_flags); + qd->qd_change_sync = qd->qd_change; + slot_hold(qd); + return 1; +} + static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) { struct gfs2_quota_data *qd = NULL; @@ -370,31 +454,18 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) if (sdp->sd_vfs->s_flags & MS_RDONLY) return 0; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { - if (test_bit(QDF_LOCKED, &qd->qd_flags) || - !test_bit(QDF_CHANGE, &qd->qd_flags) || - qd->qd_sync_gen >= sdp->sd_quota_sync_gen) - continue; - - list_move_tail(&qd->qd_list, &sdp->sd_quota_list); - - set_bit(QDF_LOCKED, &qd->qd_flags); - gfs2_assert_warn(sdp, atomic_read(&qd->qd_count)); - atomic_inc(&qd->qd_count); - qd->qd_change_sync = qd->qd_change; - gfs2_assert_warn(sdp, qd->qd_slot_count); - qd->qd_slot_count++; - found = 1; - - break; + found = qd_check_sync(sdp, qd, &sdp->sd_quota_sync_gen); + if (found) + break; } if (!found) qd = NULL; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); if (qd) { gfs2_assert_warn(sdp, qd->qd_change_sync); @@ -412,43 +483,6 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) return 0; } -static int qd_trylock(struct gfs2_quota_data *qd) -{ - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - - if (sdp->sd_vfs->s_flags & MS_RDONLY) - return 0; - - spin_lock(&qd_lru_lock); - - if (test_bit(QDF_LOCKED, &qd->qd_flags) || - !test_bit(QDF_CHANGE, &qd->qd_flags)) { - spin_unlock(&qd_lru_lock); - return 0; - } - - list_move_tail(&qd->qd_list, &sdp->sd_quota_list); - - set_bit(QDF_LOCKED, &qd->qd_flags); - gfs2_assert_warn(sdp, atomic_read(&qd->qd_count)); - atomic_inc(&qd->qd_count); - qd->qd_change_sync = qd->qd_change; - gfs2_assert_warn(sdp, qd->qd_slot_count); - qd->qd_slot_count++; - - spin_unlock(&qd_lru_lock); - - gfs2_assert_warn(sdp, qd->qd_change_sync); - if (bh_get(qd)) { - clear_bit(QDF_LOCKED, &qd->qd_flags); - slot_put(qd); - qd_put(qd); - return 0; - } - - return 1; -} - static void qd_unlock(struct gfs2_quota_data *qd) { gfs2_assert_warn(qd->qd_gl->gl_sbd, @@ -598,9 +632,9 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) x = be64_to_cpu(qc->qc_change) + change; qc->qc_change = cpu_to_be64(x); - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); qd->qd_change = x; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); if (!x) { gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags)); @@ -644,7 +678,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, struct buffer_head *bh; struct page *page; void *kaddr, *ptr; - struct gfs2_quota q, *qp; + struct gfs2_quota q; int err, nbytes; u64 size; @@ -660,28 +694,25 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, return err; err = -EIO; - qp = &q; - qp->qu_value = be64_to_cpu(qp->qu_value); - qp->qu_value += change; - qp->qu_value = cpu_to_be64(qp->qu_value); - qd->qd_qb.qb_value = qp->qu_value; + be64_add_cpu(&q.qu_value, change); + qd->qd_qb.qb_value = q.qu_value; if (fdq) { if (fdq->d_fieldmask & FS_DQ_BSOFT) { - qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_warn = qp->qu_warn; + q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_warn = q.qu_warn; } if (fdq->d_fieldmask & FS_DQ_BHARD) { - qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_limit = qp->qu_limit; + q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_limit = q.qu_limit; } if (fdq->d_fieldmask & FS_DQ_BCOUNT) { - qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_value = qp->qu_value; + q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_value = q.qu_value; } } /* Write the quota into the quota file on disk */ - ptr = qp; + ptr = &q; nbytes = sizeof(struct gfs2_quota); get_a_page: page = find_or_create_page(mapping, index, GFP_NOFS); @@ -747,6 +778,7 @@ get_a_page: i_size_write(inode, size); inode->i_mtime = inode->i_atime = CURRENT_TIME; mark_inode_dirty(inode); + set_bit(QDF_REFRESH, &qd->qd_flags); return 0; unlock_out: @@ -759,6 +791,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) { struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); + struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks, ind_blocks; struct gfs2_holder *ghs, i_gh; unsigned int qx, x; @@ -811,7 +844,8 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; reserved = 1 + (nalloc * (data_blocks + ind_blocks)); - error = gfs2_inplace_reserve(ip, reserved, 0); + ap.target = reserved; + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_alloc; @@ -846,7 +880,7 @@ out: gfs2_glock_dq_uninit(&ghs[qx]); mutex_unlock(&ip->i_inode.i_mutex); kfree(ghs); - gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl); + gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl, NORMAL_FLUSH); return error; } @@ -970,9 +1004,9 @@ static int need_sync(struct gfs2_quota_data *qd) if (!qd->qd_qb.qb_limit) return 0; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); value = qd->qd_change; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); spin_lock(>->gt_spin); num = gt->gt_quota_scale_num; @@ -997,9 +1031,11 @@ static int need_sync(struct gfs2_quota_data *qd) void gfs2_quota_unlock(struct gfs2_inode *ip) { + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data *qda[4]; unsigned int count = 0; unsigned int x; + int found; if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) goto out; @@ -1012,9 +1048,25 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) sync = need_sync(qd); gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]); + if (!sync) + continue; + + spin_lock(&qd_lock); + found = qd_check_sync(sdp, qd, NULL); + spin_unlock(&qd_lock); + + if (!found) + continue; - if (sync && qd_trylock(qd)) - qda[count++] = qd; + gfs2_assert_warn(sdp, qd->qd_change_sync); + if (bh_get(qd)) { + clear_bit(QDF_LOCKED, &qd->qd_flags); + slot_put(qd); + qd_put(qd); + continue; + } + + qda[count++] = qd; } if (count) { @@ -1033,10 +1085,10 @@ static int print_message(struct gfs2_quota_data *qd, char *type) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n", - sdp->sd_fsname, type, - (qd->qd_id.type == USRQUOTA) ? "user" : "group", - from_kqid(&init_user_ns, qd->qd_id)); + fs_info(sdp, "quota %s for %s %u\n", + type, + (qd->qd_id.type == USRQUOTA) ? "user" : "group", + from_kqid(&init_user_ns, qd->qd_id)); return 0; } @@ -1063,9 +1115,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) continue; value = (s64)be64_to_cpu(qd->qd_qb.qb_value); - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); value += qd->qd_change; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { print_message(qd, "exceeded"); @@ -1114,17 +1166,18 @@ int gfs2_quota_sync(struct super_block *sb, int type) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_data **qda; - unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync); + unsigned int max_qd = PAGE_SIZE/sizeof(struct gfs2_holder); unsigned int num_qd; unsigned int x; int error = 0; - sdp->sd_quota_sync_gen++; - qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL); if (!qda) return -ENOMEM; + mutex_lock(&sdp->sd_quota_sync_mutex); + sdp->sd_quota_sync_gen++; + do { num_qd = 0; @@ -1149,6 +1202,7 @@ int gfs2_quota_sync(struct super_block *sb, int type) } } while (!error && num_qd == max_qd); + mutex_unlock(&sdp->sd_quota_sync_mutex); kfree(qda); return error; @@ -1172,17 +1226,6 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid) return error; } -static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf) -{ - const struct gfs2_quota_change *str = buf; - - qc->qc_change = be64_to_cpu(str->qc_change); - qc->qc_flags = be32_to_cpu(str->qc_flags); - qc->qc_id = make_kqid(&init_user_ns, - (qc->qc_flags & GFS2_QCF_USER)?USRQUOTA:GRPQUOTA, - be32_to_cpu(str->qc_id)); -} - int gfs2_quota_init(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); @@ -1190,6 +1233,8 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift; unsigned int x, slot = 0; unsigned int found = 0; + unsigned int hash; + unsigned int bm_size; u64 dblock; u32 extlen = 0; int error; @@ -1198,23 +1243,19 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) return -EIO; sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; - sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); - + bm_size = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * sizeof(unsigned long)); + bm_size *= sizeof(unsigned long); error = -ENOMEM; - - sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks, - sizeof(unsigned char *), GFP_NOFS); + sdp->sd_quota_bitmap = kzalloc(bm_size, GFP_NOFS | __GFP_NOWARN); + if (sdp->sd_quota_bitmap == NULL) + sdp->sd_quota_bitmap = __vmalloc(bm_size, GFP_NOFS | + __GFP_ZERO, PAGE_KERNEL); if (!sdp->sd_quota_bitmap) return error; - for (x = 0; x < sdp->sd_quota_chunks; x++) { - sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_NOFS); - if (!sdp->sd_quota_bitmap[x]) - goto fail; - } - for (x = 0; x < blocks; x++) { struct buffer_head *bh; + const struct gfs2_quota_change *qc; unsigned int y; if (!extlen) { @@ -1232,33 +1273,41 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) goto fail; } + qc = (const struct gfs2_quota_change *)(bh->b_data + sizeof(struct gfs2_meta_header)); for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots; y++, slot++) { - struct gfs2_quota_change_host qc; struct gfs2_quota_data *qd; - - gfs2_quota_change_in(&qc, bh->b_data + - sizeof(struct gfs2_meta_header) + - y * sizeof(struct gfs2_quota_change)); - if (!qc.qc_change) + s64 qc_change = be64_to_cpu(qc->qc_change); + u32 qc_flags = be32_to_cpu(qc->qc_flags); + enum quota_type qtype = (qc_flags & GFS2_QCF_USER) ? + USRQUOTA : GRPQUOTA; + struct kqid qc_id = make_kqid(&init_user_ns, qtype, + be32_to_cpu(qc->qc_id)); + qc++; + if (!qc_change) continue; - error = qd_alloc(sdp, qc.qc_id, &qd); - if (error) { + hash = gfs2_qd_hash(sdp, qc_id); + qd = qd_alloc(hash, sdp, qc_id); + if (qd == NULL) { brelse(bh); goto fail; } set_bit(QDF_CHANGE, &qd->qd_flags); - qd->qd_change = qc.qc_change; + qd->qd_change = qc_change; qd->qd_slot = slot; qd->qd_slot_count = 1; - spin_lock(&qd_lru_lock); - gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1); + spin_lock(&qd_lock); + BUG_ON(test_and_set_bit(slot, sdp->sd_quota_bitmap)); list_add(&qd->qd_list, &sdp->sd_quota_list); atomic_inc(&sdp->sd_quota_count); - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); + + spin_lock_bucket(hash); + hlist_bl_add_head_rcu(&qd->qd_hlist, &qd_hash_table[hash]); + spin_unlock_bucket(hash); found++; } @@ -1282,51 +1331,41 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) { struct list_head *head = &sdp->sd_quota_list; struct gfs2_quota_data *qd; - unsigned int x; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); while (!list_empty(head)) { qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); - if (atomic_read(&qd->qd_count) > 1 || - (atomic_read(&qd->qd_count) && - !test_bit(QDF_CHANGE, &qd->qd_flags))) { - list_move(&qd->qd_list, head); - spin_unlock(&qd_lru_lock); - schedule(); - spin_lock(&qd_lru_lock); - continue; - } - list_del(&qd->qd_list); + /* Also remove if this qd exists in the reclaim list */ - if (!list_empty(&qd->qd_reclaim)) { - list_del_init(&qd->qd_reclaim); - atomic_dec(&qd_lru_count); - } + list_lru_del(&gfs2_qd_lru, &qd->qd_lru); atomic_dec(&sdp->sd_quota_count); - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); + + spin_lock_bucket(qd->qd_hash); + hlist_bl_del_rcu(&qd->qd_hlist); + spin_unlock_bucket(qd->qd_hash); - if (!atomic_read(&qd->qd_count)) { - gfs2_assert_warn(sdp, !qd->qd_change); - gfs2_assert_warn(sdp, !qd->qd_slot_count); - } else - gfs2_assert_warn(sdp, qd->qd_slot_count == 1); + gfs2_assert_warn(sdp, !qd->qd_change); + gfs2_assert_warn(sdp, !qd->qd_slot_count); gfs2_assert_warn(sdp, !qd->qd_bh_count); gfs2_glock_put(qd->qd_gl); - kmem_cache_free(gfs2_quotad_cachep, qd); + call_rcu(&qd->qd_rcu, gfs2_qd_dealloc); - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); } - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); if (sdp->sd_quota_bitmap) { - for (x = 0; x < sdp->sd_quota_chunks; x++) - kfree(sdp->sd_quota_bitmap[x]); - kfree(sdp->sd_quota_bitmap); + if (is_vmalloc_addr(sdp->sd_quota_bitmap)) + vfree(sdp->sd_quota_bitmap); + else + kfree(sdp->sd_quota_bitmap); + sdp->sd_quota_bitmap = NULL; } } @@ -1458,7 +1497,7 @@ static int gfs2_quota_get_xstate(struct super_block *sb, } fqs->qs_uquota.qfs_nextents = 1; /* unsupported */ fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */ - fqs->qs_incoredqs = atomic_read(&qd_lru_count); + fqs->qs_incoredqs = list_lru_count(&gfs2_qd_lru); return 0; } @@ -1569,10 +1608,12 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, if (gfs2_is_stuffed(ip)) alloc_required = 1; if (alloc_required) { + struct gfs2_alloc_parms ap = { .aflags = 0, }; gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); blocks = 1 + data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip, blocks, 0); + ap.target = blocks; + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_i; blocks += gfs2_rg_blocks(ip, blocks); @@ -1608,3 +1649,11 @@ const struct quotactl_ops gfs2_quotactl_ops = { .get_dqblk = gfs2_get_dqblk, .set_dqblk = gfs2_set_dqblk, }; + +void __init gfs2_quota_hash_init(void) +{ + unsigned i; + + for(i = 0; i < GFS2_QD_HASH_SIZE; i++) + INIT_HLIST_BL_HEAD(&qd_hash_table[i]); +} diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 4f5e6e44ed8..55d506eb3c4 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -10,9 +10,10 @@ #ifndef __QUOTA_DOT_H__ #define __QUOTA_DOT_H__ +#include <linux/list_lru.h> + struct gfs2_inode; struct gfs2_sbd; -struct shrink_control; #define NO_UID_QUOTA_CHANGE INVALID_UID #define NO_GID_QUOTA_CHANGE INVALID_GID @@ -53,8 +54,9 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; } -extern int gfs2_shrink_qd_memory(struct shrinker *shrink, - struct shrink_control *sc); extern const struct quotactl_ops gfs2_quotactl_ops; +extern struct shrinker gfs2_qd_shrinker; +extern struct list_lru gfs2_qd_lru; +extern void __init gfs2_quota_hash_init(void); #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 963b2d75200..94555d4c569 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -52,9 +52,9 @@ int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, return error; } -int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) +int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where) { - struct list_head *head = &sdp->sd_revoke_list; + struct list_head *head = &jd->jd_revoke_list; struct gfs2_revoke_replay *rr; int found = 0; @@ -81,13 +81,13 @@ int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) return 1; } -int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) +int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where) { struct gfs2_revoke_replay *rr; int wrap, a, b, revoke; int found = 0; - list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) { + list_for_each_entry(rr, &jd->jd_revoke_list, rr_list) { if (rr->rr_blkno == blkno) { found = 1; break; @@ -97,17 +97,17 @@ int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) if (!found) return 0; - wrap = (rr->rr_where < sdp->sd_replay_tail); - a = (sdp->sd_replay_tail < where); + wrap = (rr->rr_where < jd->jd_replay_tail); + a = (jd->jd_replay_tail < where); b = (where < rr->rr_where); revoke = (wrap) ? (a || b) : (a && b); return revoke; } -void gfs2_revoke_clean(struct gfs2_sbd *sdp) +void gfs2_revoke_clean(struct gfs2_jdesc *jd) { - struct list_head *head = &sdp->sd_revoke_list; + struct list_head *head = &jd->jd_revoke_list; struct gfs2_revoke_replay *rr; while (!list_empty(head)) { @@ -454,7 +454,7 @@ void gfs2_recover_func(struct work_struct *work) struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct gfs2_log_header_host head; - struct gfs2_holder j_gh, ji_gh, t_gh; + struct gfs2_holder j_gh, ji_gh, thaw_gh; unsigned long t; int ro = 0; unsigned int pass; @@ -508,11 +508,11 @@ void gfs2_recover_func(struct work_struct *work) t = jiffies; - /* Acquire a shared hold on the transaction lock */ + /* Acquire a shared hold on the freeze lock */ - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | LM_FLAG_PRIORITY | - GL_NOCACHE, &t_gh); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, + LM_FLAG_NOEXP | LM_FLAG_PRIORITY, + &thaw_gh); if (error) goto fail_gunlock_ji; @@ -538,7 +538,7 @@ void gfs2_recover_func(struct work_struct *work) fs_warn(sdp, "jid=%u: Can't replay: read-only block " "device\n", jd->jd_jid); error = -EROFS; - goto fail_gunlock_tr; + goto fail_gunlock_thaw; } fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid); @@ -549,14 +549,14 @@ void gfs2_recover_func(struct work_struct *work) head.lh_blkno, pass); lops_after_scan(jd, error, pass); if (error) - goto fail_gunlock_tr; + goto fail_gunlock_thaw; } error = clean_journal(jd, &head); if (error) - goto fail_gunlock_tr; + goto fail_gunlock_thaw; - gfs2_glock_dq_uninit(&t_gh); + gfs2_glock_dq_uninit(&thaw_gh); t = DIV_ROUND_UP(jiffies - t, HZ); fs_info(sdp, "jid=%u: Journal replayed in %lus\n", jd->jd_jid, t); @@ -572,8 +572,8 @@ void gfs2_recover_func(struct work_struct *work) fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); goto done; -fail_gunlock_tr: - gfs2_glock_dq_uninit(&t_gh); +fail_gunlock_thaw: + gfs2_glock_dq_uninit(&thaw_gh); fail_gunlock_ji: if (jlocked) { gfs2_glock_dq_uninit(&ji_gh); @@ -587,7 +587,7 @@ fail: gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); done: clear_bit(JDF_RECOVERY, &jd->jd_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&jd->jd_flags, JDF_RECOVERY); } diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 2226136c764..6142836cce9 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -23,9 +23,9 @@ static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk) extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, struct buffer_head **bh); -extern int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); -extern int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); -extern void gfs2_revoke_clean(struct gfs2_sbd *sdp); +extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); +extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); +extern void gfs2_revoke_clean(struct gfs2_jdesc *jd); extern int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 69317435faa..f4cb9c0d6bb 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> @@ -57,6 +59,11 @@ * 3 = Used (metadata) */ +struct gfs2_extent { + struct gfs2_rbm rbm; + u32 len; +}; + static const char valid_change[16] = { /* current */ /* n */ 0, 1, 1, 1, @@ -65,8 +72,9 @@ static const char valid_change[16] = { 1, 0, 0, 0 }; -static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, - const struct gfs2_inode *ip, bool nowrap); +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, + const struct gfs2_inode *ip, bool nowrap, + const struct gfs2_alloc_parms *ap); /** @@ -81,32 +89,32 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, unsigned char new_state) { unsigned char *byte1, *byte2, *end, cur_state; - unsigned int buflen = rbm->bi->bi_len; + struct gfs2_bitmap *bi = rbm_bi(rbm); + unsigned int buflen = bi->bi_len; const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); - end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; + byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY); + end = bi->bi_bh->b_data + bi->bi_offset + buflen; BUG_ON(byte1 >= end); cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; if (unlikely(!valid_change[new_state * 4 + cur_state])) { - printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " - "new_state=%d\n", rbm->offset, cur_state, new_state); - printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", - (unsigned long long)rbm->rgd->rd_addr, - rbm->bi->bi_start); - printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", - rbm->bi->bi_offset, rbm->bi->bi_len); + pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n", + rbm->offset, cur_state, new_state); + pr_warn("rgrp=0x%llx bi_start=0x%x\n", + (unsigned long long)rbm->rgd->rd_addr, bi->bi_start); + pr_warn("bi_offset=0x%x bi_len=0x%x\n", + bi->bi_offset, bi->bi_len); dump_stack(); gfs2_consist_rgrpd(rbm->rgd); return; } *byte1 ^= (cur_state ^ new_state) << bit; - if (do_clone && rbm->bi->bi_clone) { - byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); + if (do_clone && bi->bi_clone) { + byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY); cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; *byte2 ^= (cur_state ^ new_state) << bit; } @@ -121,7 +129,8 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) { - const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; + struct gfs2_bitmap *bi = rbm_bi(rbm); + const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset; const u8 *byte; unsigned int bit; @@ -252,29 +261,53 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) { u64 rblock = block - rbm->rgd->rd_data0; - u32 x; if (WARN_ON_ONCE(rblock > UINT_MAX)) return -EINVAL; if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) return -E2BIG; - rbm->bi = rbm->rgd->rd_bits; + rbm->bii = 0; rbm->offset = (u32)(rblock); /* Check if the block is within the first block */ - if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) + if (rbm->offset < rbm_bi(rbm)->bi_blocks) return 0; /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ rbm->offset += (sizeof(struct gfs2_rgrp) - sizeof(struct gfs2_meta_header)) * GFS2_NBBY; - x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; - rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; - rbm->bi += x; + rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; + rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; return 0; } /** + * gfs2_rbm_incr - increment an rbm structure + * @rbm: The rbm with rgd already set correctly + * + * This function takes an existing rbm structure and increments it to the next + * viable block offset. + * + * Returns: If incrementing the offset would cause the rbm to go past the + * end of the rgrp, true is returned, otherwise false. + * + */ + +static bool gfs2_rbm_incr(struct gfs2_rbm *rbm) +{ + if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */ + rbm->offset++; + return false; + } + if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */ + return true; + + rbm->offset = 0; + rbm->bii++; + return false; +} + +/** * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned * @rbm: Position to search (value/result) * @n_unaligned: Number of unaligned blocks to check @@ -285,7 +318,6 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) { - u64 block; u32 n; u8 res; @@ -296,8 +328,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le (*len)--; if (*len == 0) return true; - block = gfs2_rbm_to_block(rbm); - if (gfs2_rbm_from_block(rbm, block + 1)) + if (gfs2_rbm_incr(rbm)) return true; } @@ -306,7 +337,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le /** * gfs2_free_extlen - Return extent length of free blocks - * @rbm: Starting position + * @rrbm: Starting position * @len: Max length to check * * Starting at the block specified by the rbm, see how many free blocks @@ -328,6 +359,7 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) u32 chunk_size; u8 *ptr, *start, *end; u64 block; + struct gfs2_bitmap *bi; if (n_unaligned && gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) @@ -336,11 +368,12 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) n_unaligned = len & 3; /* Start is now byte aligned */ while (len > 3) { - start = rbm.bi->bi_bh->b_data; - if (rbm.bi->bi_clone) - start = rbm.bi->bi_clone; - end = start + rbm.bi->bi_bh->b_size; - start += rbm.bi->bi_offset; + bi = rbm_bi(&rbm); + start = bi->bi_bh->b_data; + if (bi->bi_clone) + start = bi->bi_clone; + end = start + bi->bi_bh->b_size; + start += bi->bi_offset; BUG_ON(rbm.offset & 3); start += (rbm.offset / GFS2_NBBY); bytes = min_t(u32, len / GFS2_NBBY, (end - start)); @@ -605,12 +638,18 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) RB_CLEAR_NODE(&rs->rs_node); if (rs->rs_free) { + struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm); + /* return reserved blocks to the rgrp */ BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; + /* The rgrp extent failure point is likely not to increase; + it will only do so if the freed blocks are somehow + contiguous with a span of free blocks that follows. Still, + it will force the number to be recalculated later. */ + rgd->rd_extfail_pt += rs->rs_free; rs->rs_free = 0; - clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); - smp_mb__after_clear_bit(); + clear_bit(GBF_FULL, &bi->bi_flags); } } @@ -634,14 +673,13 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) /** * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation + * @wcount: The inode's write count, or NULL * */ -void gfs2_rs_delete(struct gfs2_inode *ip) +void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) { - struct inode *inode = &ip->i_inode; - down_write(&ip->i_rw_mutex); - if (ip->i_res && atomic_read(&inode->i_writecount) <= 1) { + if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) { gfs2_rs_deltree(ip->i_res); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); @@ -700,11 +738,11 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) { - printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); - printk(KERN_INFO " ri_length = %u\n", rgd->rd_length); - printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); - printk(KERN_INFO " ri_data = %u\n", rgd->rd_data); - printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes); + pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); + pr_info("ri_length = %u\n", rgd->rd_length); + pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); + pr_info("ri_data = %u\n", rgd->rd_data); + pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes); } /** @@ -743,18 +781,21 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) bi->bi_offset = sizeof(struct gfs2_rgrp); bi->bi_start = 0; bi->bi_len = bytes; + bi->bi_blocks = bytes * GFS2_NBBY; /* header block */ } else if (x == 0) { bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp); bi->bi_offset = sizeof(struct gfs2_rgrp); bi->bi_start = 0; bi->bi_len = bytes; + bi->bi_blocks = bytes * GFS2_NBBY; /* last block */ } else if (x + 1 == length) { bytes = bytes_left; bi->bi_offset = sizeof(struct gfs2_meta_header); bi->bi_start = rgd->rd_bitbytes - bytes_left; bi->bi_len = bytes; + bi->bi_blocks = bytes * GFS2_NBBY; /* other blocks */ } else { bytes = sdp->sd_sb.sb_bsize - @@ -762,6 +803,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) bi->bi_offset = sizeof(struct gfs2_meta_header); bi->bi_start = rgd->rd_bitbytes - bytes_left; bi->bi_len = bytes; + bi->bi_blocks = bytes * GFS2_NBBY; } bytes_left -= bytes; @@ -846,6 +888,7 @@ static int rgd_insert(struct gfs2_rgrpd *rgd) static int read_rindex_entry(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + const unsigned bsize = sdp->sd_sb.sb_bsize; loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); struct gfs2_rindex buf; int error; @@ -883,6 +926,8 @@ static int read_rindex_entry(struct gfs2_inode *ip) goto fail; rgd->rd_gl->gl_object = rgd; + rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize; + rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1; rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~GFS2_RDF_UPTODATE; if (rgd->rd_data > sdp->sd_max_rg_data) @@ -1059,7 +1104,7 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd) * Returns: errno */ -int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) +static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) { struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_glock *gl = rgd->rd_gl; @@ -1096,8 +1141,10 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; + /* max out the rgrp allocation failure point */ + rgd->rd_extfail_pt = rgd->rd_free; } - if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { + if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); @@ -1124,14 +1171,14 @@ fail: return error; } -int update_rgrp_lvb(struct gfs2_rgrpd *rgd) +static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) { u32 rl_flags; if (rgd->rd_flags & GFS2_RDF_UPTODATE) return 0; - if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) + if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) return gfs2_rgrp_bh_get(rgd); rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); @@ -1154,7 +1201,7 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) return 0; - return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object); + return gfs2_rgrp_bh_get(rgd); } /** @@ -1392,12 +1439,12 @@ static void rs_insert(struct gfs2_inode *ip) * rg_mblk_search - find a group of multiple free blocks to form a reservation * @rgd: the resource group descriptor * @ip: pointer to the inode for which we're reserving blocks - * @requested: number of blocks required for this allocation + * @ap: the allocation parameters * */ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, - unsigned requested) + const struct gfs2_alloc_parms *ap) { struct gfs2_rbm rbm = { .rgd = rgd, }; u64 goal; @@ -1410,7 +1457,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, if (S_ISDIR(inode->i_mode)) extlen = 1; else { - extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); + extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target); extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); } if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) @@ -1425,7 +1472,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) return; - ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); + ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap); if (ret == 0) { rs->rs_rbm = rbm; rs->rs_free = extlen; @@ -1490,6 +1537,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, * @rbm: The current position in the resource group * @ip: The inode for which we are searching for blocks * @minext: The minimum extent length + * @maxext: A pointer to the maximum extent structure * * This checks the current position in the rgrp to see whether there is * a reservation covering this block. If not then this function is a @@ -1502,7 +1550,8 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, const struct gfs2_inode *ip, - u32 minext) + u32 minext, + struct gfs2_extent *maxext) { u64 block = gfs2_rbm_to_block(rbm); u32 extlen = 1; @@ -1515,8 +1564,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, */ if (minext) { extlen = gfs2_free_extlen(rbm, minext); - nblock = block + extlen; - if (extlen < minext) + if (extlen <= maxext->len) goto fail; } @@ -1525,9 +1573,17 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, * and skip if parts of it are already reserved */ nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); - if (nblock == block) - return 0; + if (nblock == block) { + if (!minext || extlen >= minext) + return 0; + + if (extlen > maxext->len) { + maxext->len = extlen; + maxext->rbm = *rbm; + } fail: + nblock = block + extlen; + } ret = gfs2_rbm_from_block(rbm, nblock); if (ret < 0) return ret; @@ -1538,30 +1594,38 @@ fail: * gfs2_rbm_find - Look for blocks of a particular state * @rbm: Value/result starting position and final position * @state: The state which we want to find - * @minext: The requested extent length (0 for a single block) + * @minext: Pointer to the requested extent length (NULL for a single block) + * This is updated to be the actual reservation size. * @ip: If set, check for reservations * @nowrap: Stop looking at the end of the rgrp, rather than wrapping * around until we've reached the starting point. + * @ap: the allocation parameters * * Side effects: * - If looking for free blocks, we set GBF_FULL on each bitmap which * has no free blocks in it. + * - If looking for free blocks, we set rd_extfail_pt on each rgrp which + * has come up short on a free block search. * * Returns: 0 on success, -ENOSPC if there is no block of the requested state */ -static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, - const struct gfs2_inode *ip, bool nowrap) +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, + const struct gfs2_inode *ip, bool nowrap, + const struct gfs2_alloc_parms *ap) { struct buffer_head *bh; - struct gfs2_bitmap *initial_bi; + int initial_bii; u32 initial_offset; + int first_bii = rbm->bii; + u32 first_offset = rbm->offset; u32 offset; u8 *buffer; - int index; int n = 0; int iters = rbm->rgd->rd_length; int ret; + struct gfs2_bitmap *bi; + struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, }; /* If we are not starting at the beginning of a bitmap, then we * need to add one to the bitmap count to ensure that we search @@ -1571,52 +1635,55 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, iters++; while(1) { - if (test_bit(GBF_FULL, &rbm->bi->bi_flags) && + bi = rbm_bi(rbm); + if (test_bit(GBF_FULL, &bi->bi_flags) && (state == GFS2_BLKST_FREE)) goto next_bitmap; - bh = rbm->bi->bi_bh; - buffer = bh->b_data + rbm->bi->bi_offset; + bh = bi->bi_bh; + buffer = bh->b_data + bi->bi_offset; WARN_ON(!buffer_uptodate(bh)); - if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) - buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; + if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) + buffer = bi->bi_clone + bi->bi_offset; initial_offset = rbm->offset; - offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); + offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state); if (offset == BFITNOENT) goto bitmap_full; rbm->offset = offset; if (ip == NULL) return 0; - initial_bi = rbm->bi; - ret = gfs2_reservation_check_and_update(rbm, ip, minext); + initial_bii = rbm->bii; + ret = gfs2_reservation_check_and_update(rbm, ip, + minext ? *minext : 0, + &maxext); if (ret == 0) return 0; if (ret > 0) { - n += (rbm->bi - initial_bi); + n += (rbm->bii - initial_bii); goto next_iter; } if (ret == -E2BIG) { - index = 0; + rbm->bii = 0; rbm->offset = 0; - n += (rbm->bi - initial_bi); + n += (rbm->bii - initial_bii); goto res_covered_end_of_rgrp; } return ret; bitmap_full: /* Mark bitmap as full and fall through */ - if ((state == GFS2_BLKST_FREE) && initial_offset == 0) - set_bit(GBF_FULL, &rbm->bi->bi_flags); + if ((state == GFS2_BLKST_FREE) && initial_offset == 0) { + struct gfs2_bitmap *bi = rbm_bi(rbm); + set_bit(GBF_FULL, &bi->bi_flags); + } next_bitmap: /* Find next bitmap in the rgrp */ rbm->offset = 0; - index = rbm->bi - rbm->rgd->rd_bits; - index++; - if (index == rbm->rgd->rd_length) - index = 0; + rbm->bii++; + if (rbm->bii == rbm->rgd->rd_length) + rbm->bii = 0; res_covered_end_of_rgrp: - rbm->bi = &rbm->rgd->rd_bits[index]; - if ((index == 0) && nowrap) + if ((rbm->bii == 0) && nowrap) break; n++; next_iter: @@ -1624,6 +1691,24 @@ next_iter: break; } + if (minext == NULL || state != GFS2_BLKST_FREE) + return -ENOSPC; + + /* If the extent was too small, and it's smaller than the smallest + to have failed before, remember for future reference that it's + useless to search this rgrp again for this amount or more. */ + if ((first_offset == 0) && (first_bii == 0) && + (*minext < rbm->rgd->rd_extfail_pt)) + rbm->rgd->rd_extfail_pt = *minext; + + /* If the maximum extent we found is big enough to fulfill the + minimum requirements, use it anyway. */ + if (maxext.len) { + *rbm = maxext.rbm; + *minext = maxext.len; + return 0; + } + return -ENOSPC; } @@ -1645,11 +1730,12 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip struct gfs2_inode *ip; int error; int found = 0; - struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 }; + struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 }; while (1) { down_write(&sdp->sd_log_flush_lock); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL, + true, NULL); up_write(&sdp->sd_log_flush_lock); if (error == -ENOSPC) break; @@ -1800,12 +1886,12 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for - * @requested: the number of blocks to be reserved + * @ap: the allocation parameters * * Returns: errno */ -int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) +int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; @@ -1817,17 +1903,16 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - if (gfs2_assert_warn(sdp, requested)) + if (gfs2_assert_warn(sdp, ap->target)) return -EINVAL; if (gfs2_rs_active(rs)) { begin = rs->rs_rbm.rgd; - flags = 0; /* Yoda: Do or do not. There is no try */ } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { rs->rs_rbm.rgd = begin = ip->i_rgd; } else { rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } - if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV)) + if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV)) skip = gfs2_orlov_skip(ip); if (rs->rs_rbm.rgd == NULL) return -EBADSLT; @@ -1861,7 +1946,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) } /* Skip unuseable resource groups */ - if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) + if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | + GFS2_RDF_ERROR)) || + (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) @@ -1869,27 +1956,28 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) /* Get a reservation if we don't already have one */ if (!gfs2_rs_active(rs)) - rg_mblk_search(rs->rs_rbm.rgd, ip, requested); + rg_mblk_search(rs->rs_rbm.rgd, ip, ap); /* Skip rgrps when we can't get a reservation on first pass */ if (!gfs2_rs_active(rs) && (loops < 1)) goto check_rgrp; /* If rgrp has enough free space, use it */ - if (rs->rs_rbm.rgd->rd_free_clone >= requested) { + if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) { ip->i_rgd = rs->rs_rbm.rgd; return 0; } - /* Drop reservation, if we couldn't use reserved rgrp */ - if (gfs2_rs_active(rs)) - gfs2_rs_deltree(rs); check_rgrp: /* Check for unlinked inodes which can be reclaimed */ if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, ip->i_no_addr); skip_rgrp: + /* Drop reservation, if we couldn't use reserved rgrp */ + if (gfs2_rs_active(rs)) + gfs2_rs_deltree(rs); + /* Unlock rgrp if required */ if (!rg_locked) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); @@ -1913,7 +2001,7 @@ next_rgrp: } /* Flushing the log may release space */ if (loops == 2) - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } return -ENOSPC; @@ -1973,14 +2061,14 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, *n = 1; block = gfs2_rbm_to_block(rbm); - gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm->bi->bi_bh); + gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh); gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; - gfs2_trans_add_meta(pos.rgd->rd_gl, pos.bi->bi_bh); + gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh); gfs2_setbit(&pos, true, GFS2_BLKST_USED); (*n)++; block++; @@ -2001,6 +2089,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, u32 blen, unsigned char new_state) { struct gfs2_rbm rbm; + struct gfs2_bitmap *bi; rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); if (!rbm.rgd) { @@ -2011,15 +2100,15 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, while (blen--) { gfs2_rbm_from_block(&rbm, bstart); + bi = rbm_bi(&rbm); bstart++; - if (!rbm.bi->bi_clone) { - rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size, - GFP_NOFS | __GFP_NOFAIL); - memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset, - rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, - rbm.bi->bi_len); + if (!bi->bi_clone) { + bi->bi_clone = kmalloc(bi->bi_bh->b_size, + GFP_NOFS | __GFP_NOFAIL); + memcpy(bi->bi_clone + bi->bi_offset, + bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); } - gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.bi->bi_bh); + gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh); gfs2_setbit(&rbm, false, new_state); } @@ -2033,25 +2122,24 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, * */ -int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) +void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) { struct gfs2_rgrpd *rgd = gl->gl_object; struct gfs2_blkreserv *trs; const struct rb_node *n; if (rgd == NULL) - return 0; - gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", + return; + gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", (unsigned long long)rgd->rd_addr, rgd->rd_flags, rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, - rgd->rd_reserved); + rgd->rd_reserved, rgd->rd_extfail_pt); spin_lock(&rgd->rd_rsspin); for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { trs = rb_entry(n, struct gfs2_blkreserv, rs_node); dump_rs(seq, trs); } spin_unlock(&rgd->rd_rsspin); - return 0; } static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) @@ -2103,6 +2191,35 @@ out: } /** + * gfs2_set_alloc_start - Set starting point for block allocation + * @rbm: The rbm which will be set to the required location + * @ip: The gfs2 inode + * @dinode: Flag to say if allocation includes a new inode + * + * This sets the starting point from the reservation if one is active + * otherwise it falls back to guessing a start point based on the + * inode's goal block or the last allocation point in the rgrp. + */ + +static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, + const struct gfs2_inode *ip, bool dinode) +{ + u64 goal; + + if (gfs2_rs_active(ip->i_res)) { + *rbm = ip->i_res->rs_rbm; + return; + } + + if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal)) + goal = ip->i_goal; + else + goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0; + + gfs2_rbm_from_block(rbm, goal); +} + +/** * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode * @ip: the inode to allocate the block for * @bn: Used to return the starting block number @@ -2120,30 +2237,24 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, struct buffer_head *dibh; struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; unsigned int ndata; - u64 goal; u64 block; /* block, within the file system scope */ int error; - if (gfs2_rs_active(ip->i_res)) - goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm); - else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) - goal = ip->i_goal; - else - goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; - - gfs2_rbm_from_block(&rbm, goal); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); + gfs2_set_alloc_start(&rbm, ip, dinode); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL); if (error == -ENOSPC) { - gfs2_rbm_from_block(&rbm, goal); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); + gfs2_set_alloc_start(&rbm, ip, dinode); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false, + NULL); } /* Since all blocks are reserved in advance, this shouldn't happen */ if (error) { - fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n", + fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n", (unsigned long long)ip->i_no_addr, error, *nblocks, - test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); + test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags), + rbm.rgd->rd_extfail_pt); goto rgrp_error; } @@ -2169,7 +2280,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, } } if (rbm.rgd->rd_free < *nblocks) { - printk(KERN_WARNING "nblocks=%u\n", *nblocks); + pr_warn("nblocks=%u\n", *nblocks); goto rgrp_error; } @@ -2187,7 +2298,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) - gfs2_trans_add_unrevoke(sdp, block, 1); + gfs2_trans_add_unrevoke(sdp, block, *nblocks); gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid); @@ -2411,7 +2522,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) /** * gfs2_rlist_free - free a resource group list - * @list: the list of resource groups + * @rlist: the list of resource groups * */ diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 5b3f4a896e6..463ab2e95d1 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -40,7 +40,7 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); #define GFS2_AF_ORLOV 1 -extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags); +extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap); extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, @@ -48,7 +48,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, extern int gfs2_rs_alloc(struct gfs2_inode *ip); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip); +extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); @@ -68,7 +68,7 @@ extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); -extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); +extern void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, struct buffer_head *bh, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e5639dec66c..1319b5c4ec6 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/bio.h> #include <linux/sched.h> #include <linux/slab.h> @@ -175,8 +177,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) break; case Opt_debug: if (args->ar_errors == GFS2_ERRORS_PANIC) { - printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " - "are mutually exclusive.\n"); + pr_warn("-o debug and -o errors=panic are mutually exclusive\n"); return -EINVAL; } args->ar_debug = 1; @@ -228,21 +229,21 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) case Opt_commit: rv = match_int(&tmp[0], &args->ar_commit); if (rv || args->ar_commit <= 0) { - printk(KERN_WARNING "GFS2: commit mount option requires a positive numeric argument\n"); + pr_warn("commit mount option requires a positive numeric argument\n"); return rv ? rv : -EINVAL; } break; case Opt_statfs_quantum: rv = match_int(&tmp[0], &args->ar_statfs_quantum); if (rv || args->ar_statfs_quantum < 0) { - printk(KERN_WARNING "GFS2: statfs_quantum mount option requires a non-negative numeric argument\n"); + pr_warn("statfs_quantum mount option requires a non-negative numeric argument\n"); return rv ? rv : -EINVAL; } break; case Opt_quota_quantum: rv = match_int(&tmp[0], &args->ar_quota_quantum); if (rv || args->ar_quota_quantum <= 0) { - printk(KERN_WARNING "GFS2: quota_quantum mount option requires a positive numeric argument\n"); + pr_warn("quota_quantum mount option requires a positive numeric argument\n"); return rv ? rv : -EINVAL; } break; @@ -250,7 +251,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) rv = match_int(&tmp[0], &args->ar_statfs_percent); if (rv || args->ar_statfs_percent < 0 || args->ar_statfs_percent > 100) { - printk(KERN_WARNING "statfs_percent mount option requires a numeric argument between 0 and 100\n"); + pr_warn("statfs_percent mount option requires a numeric argument between 0 and 100\n"); return rv ? rv : -EINVAL; } break; @@ -259,8 +260,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) break; case Opt_err_panic: if (args->ar_debug) { - printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " - "are mutually exclusive.\n"); + pr_warn("-o debug and -o errors=panic are mutually exclusive\n"); return -EINVAL; } args->ar_errors = GFS2_ERRORS_PANIC; @@ -279,7 +279,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) break; case Opt_error: default: - printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o); + pr_warn("invalid mount option: %s\n", o); return -EINVAL; } } @@ -295,9 +295,8 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) void gfs2_jindex_free(struct gfs2_sbd *sdp) { - struct list_head list, *head; + struct list_head list; struct gfs2_jdesc *jd; - struct gfs2_journal_extent *jext; spin_lock(&sdp->sd_jindex_spin); list_add(&list, &sdp->sd_jindex_list); @@ -307,14 +306,7 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp) while (!list_empty(&list)) { jd = list_entry(list.next, struct gfs2_jdesc, jd_list); - head = &jd->extent_list; - while (!list_empty(head)) { - jext = list_entry(head->next, - struct gfs2_journal_extent, - extent_list); - list_del(&jext->extent_list); - kfree(jext); - } + gfs2_free_journal_extents(jd); list_del(&jd->jd_list); iput(jd->jd_inode); kfree(jd); @@ -369,6 +361,33 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) return 0; } +static int init_threads(struct gfs2_sbd *sdp) +{ + struct task_struct *p; + int error = 0; + + p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); + if (IS_ERR(p)) { + error = PTR_ERR(p); + fs_err(sdp, "can't start logd thread: %d\n", error); + return error; + } + sdp->sd_logd_process = p; + + p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); + if (IS_ERR(p)) { + error = PTR_ERR(p); + fs_err(sdp, "can't start quotad thread: %d\n", error); + goto fail; + } + sdp->sd_quotad_process = p; + return 0; + +fail: + kthread_stop(sdp->sd_logd_process); + return error; +} + /** * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one * @sdp: the filesystem @@ -380,14 +399,19 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; - struct gfs2_holder t_gh; + struct gfs2_holder thaw_gh; struct gfs2_log_header_host head; int error; - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh); + error = init_threads(sdp); if (error) return error; + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, + &thaw_gh); + if (error) + goto fail_threads; + j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); error = gfs2_find_jhead(sdp->sd_jdesc, &head); @@ -410,14 +434,16 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - gfs2_glock_dq_uninit(&t_gh); + gfs2_glock_dq_uninit(&thaw_gh); return 0; fail: - t_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_uninit(&t_gh); - + thaw_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_uninit(&thaw_gh); +fail_threads: + kthread_stop(sdp->sd_quotad_process); + kthread_stop(sdp->sd_logd_process); return error; } @@ -610,15 +636,21 @@ struct lfcc { */ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, - struct gfs2_holder *t_gh) + struct gfs2_holder *freeze_gh) { struct gfs2_inode *ip; struct gfs2_jdesc *jd; struct lfcc *lfcc; LIST_HEAD(list); struct gfs2_log_header_host lh; + struct gfs2_inode *dip = GFS2_I(sdp->sd_root_dir->d_inode); int error; + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, + &sdp->sd_freeze_root_gh); + if (error) + return error; + atomic_set(&sdp->sd_frozen_root, 1); list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL); if (!lfcc) { @@ -634,8 +666,8 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, list_add(&lfcc->list, &list); } - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED, - GL_NOCACHE, t_gh); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE, + GL_NOCACHE, freeze_gh); list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { error = gfs2_jdesc_check(jd); @@ -651,7 +683,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, } if (error) - gfs2_glock_dq_uninit(t_gh); + gfs2_glock_dq_uninit(freeze_gh); out: while (!list_empty(&list)) { @@ -660,6 +692,11 @@ out: gfs2_glock_dq_uninit(&lfcc->gh); kfree(lfcc); } + if (error) { + atomic_dec(&sdp->sd_frozen_root); + wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0); + gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh); + } return error; } @@ -717,7 +754,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) int ret = 0; if (wbc->sync_mode == WB_SYNC_ALL) - gfs2_log_flush(GFS2_SB(inode), ip->i_gl); + gfs2_log_flush(GFS2_SB(inode), ip->i_gl, NORMAL_FLUSH); if (bdi->dirty_exceeded) gfs2_ail1_flush(sdp, wbc); else @@ -797,25 +834,30 @@ out: static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) { - struct gfs2_holder t_gh; + struct gfs2_holder thaw_gh; int error; - flush_workqueue(gfs2_delete_workqueue); - gfs2_quota_sync(sdp->sd_vfs, 0); - gfs2_statfs_sync(sdp->sd_vfs, 0); - - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, - &t_gh); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE, + &thaw_gh); if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) return error; - gfs2_meta_syncfs(sdp); - gfs2_log_shutdown(sdp); - + down_write(&sdp->sd_log_flush_lock); clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + up_write(&sdp->sd_log_flush_lock); - if (t_gh.gh_gl) - gfs2_glock_dq_uninit(&t_gh); + kthread_stop(sdp->sd_quotad_process); + kthread_stop(sdp->sd_logd_process); + + flush_workqueue(gfs2_delete_workqueue); + gfs2_quota_sync(sdp->sd_vfs, 0); + gfs2_statfs_sync(sdp->sd_vfs, 0); + + gfs2_log_flush(sdp, NULL, SHUTDOWN_FLUSH); + gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); + + if (thaw_gh.gh_gl) + gfs2_glock_dq_uninit(&thaw_gh); gfs2_quota_cleanup(sdp); @@ -857,9 +899,6 @@ restart: } spin_unlock(&sdp->sd_jindex_spin); - kthread_stop(sdp->sd_quotad_process); - kthread_stop(sdp->sd_logd_process); - if (!(sb->s_flags & MS_RDONLY)) { error = gfs2_make_fs_ro(sdp); if (error) @@ -875,7 +914,7 @@ restart: iput(sdp->sd_quota_inode); gfs2_glock_put(sdp->sd_rename_gl); - gfs2_glock_put(sdp->sd_trans_gl); + gfs2_glock_put(sdp->sd_freeze_gl); if (!sdp->sd_args.ar_spectator) { gfs2_glock_dq_uninit(&sdp->sd_journal_gh); @@ -910,8 +949,8 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) struct gfs2_sbd *sdp = sb->s_fs_info; gfs2_quota_sync(sb, -1); - if (wait && sdp) - gfs2_log_flush(sdp, NULL); + if (wait && sdp && !atomic_read(&sdp->sd_log_freeze)) + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); return 0; } @@ -961,6 +1000,9 @@ static int gfs2_unfreeze(struct super_block *sb) struct gfs2_sbd *sdp = sb->s_fs_info; gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); + atomic_dec(&sdp->sd_frozen_root); + wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0); + gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh); return 0; } @@ -1142,6 +1184,8 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) struct gfs2_tune *gt = &sdp->sd_tune; int error; + sync_filesystem(sb); + spin_lock(>->gt_spin); args.ar_commit = gt->gt_logd_secs; args.ar_quota_quantum = gt->gt_quota_quantum; @@ -1223,7 +1267,7 @@ static int gfs2_drop_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); - if (inode->i_nlink) { + if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) && inode->i_nlink) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) clear_nlink(inode); @@ -1438,6 +1482,11 @@ static void gfs2_evict_inode(struct inode *inode) struct gfs2_holder gh; int error; + if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) { + clear_inode(inode); + return; + } + if (inode->i_nlink || (sb->s_flags & MS_RDONLY)) goto out; @@ -1493,7 +1542,7 @@ static void gfs2_evict_inode(struct inode *inode) goto out_unlock; out_truncate: - gfs2_log_flush(sdp, ip->i_gl); + gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); filemap_fdatawrite(metamapping); @@ -1525,8 +1574,8 @@ out_unlock: fs_warn(sdp, "gfs2_evict_inode: %d\n", error); out: /* Case 3 starts here */ - truncate_inode_pages(&inode->i_data, 0); - gfs2_rs_delete(ip); + truncate_inode_pages_final(&inode->i_data); + gfs2_rs_delete(ip, NULL); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index aa5c4804496..3ab566ba569 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/completion.h> @@ -138,9 +140,8 @@ static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len) if (simple_strtol(buf, NULL, 0) != 1) return -EINVAL; - gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: withdrawing from cluster at user's request\n", - sdp->sd_fsname); + gfs2_lm_withdraw(sdp, "withdrawing from cluster at user's request\n"); + return len; } @@ -239,8 +240,8 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len if (gltype > LM_TYPE_JOURNAL) return -EINVAL; - if (gltype == LM_TYPE_NONDISK && glnum == GFS2_TRANS_LOCK) - glops = &gfs2_trans_glops; + if (gltype == LM_TYPE_NONDISK && glnum == GFS2_FREEZE_LOCK) + glops = &gfs2_freeze_glops; else glops = gfs2_glops_list[gltype]; if (glops == NULL) @@ -332,7 +333,7 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len) set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); else if (val == 0) { clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); gfs2_glock_thaw(sdp); } else { ret = -EINVAL; @@ -406,6 +407,9 @@ int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) struct gfs2_jdesc *jd; int rv; + /* Wait for our primary journal to be initialized */ + wait_for_completion(&sdp->sd_journal_ready); + spin_lock(&sdp->sd_jindex_spin); rv = -EBUSY; if (sdp->sd_jdesc->jd_jid == jid) @@ -481,7 +485,7 @@ static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) rv = jid = -EINVAL; sdp->sd_lockstruct.ls_jid = jid; clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); out: spin_unlock(&sdp->sd_jindex_spin); @@ -587,7 +591,6 @@ TUNE_ATTR(max_readahead, 0); TUNE_ATTR(complain_secs, 0); TUNE_ATTR(statfs_slow, 0); TUNE_ATTR(new_files_jdata, 0); -TUNE_ATTR(quota_simul_sync, 1); TUNE_ATTR(statfs_quantum, 1); TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); @@ -597,7 +600,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_max_readahead.attr, &tune_attr_complain_secs.attr, &tune_attr_statfs_slow.attr, - &tune_attr_quota_simul_sync.attr, &tune_attr_statfs_quantum.attr, &tune_attr_quota_scale.attr, &tune_attr_new_files_jdata.attr, diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 2b20d7046bf..0546ab4e28e 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -46,64 +48,41 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, tr->tr_blocks = blocks; tr->tr_revokes = revokes; tr->tr_reserved = 1; + tr->tr_alloced = 1; if (blocks) tr->tr_reserved += 6 + blocks; if (revokes) tr->tr_reserved += gfs2_struct2blk(sdp, revokes, sizeof(u64)); - sb_start_intwrite(sdp->sd_vfs); - gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); + INIT_LIST_HEAD(&tr->tr_databuf); + INIT_LIST_HEAD(&tr->tr_buf); - error = gfs2_glock_nq(&tr->tr_t_gh); - if (error) - goto fail_holder_uninit; + sb_start_intwrite(sdp->sd_vfs); error = gfs2_log_reserve(sdp, tr->tr_reserved); if (error) - goto fail_gunlock; + goto fail; current->journal_info = tr; return 0; -fail_gunlock: - gfs2_glock_dq(&tr->tr_t_gh); - -fail_holder_uninit: +fail: sb_end_intwrite(sdp->sd_vfs); - gfs2_holder_uninit(&tr->tr_t_gh); kfree(tr); return error; } -/** - * gfs2_log_release - Release a given number of log blocks - * @sdp: The GFS2 superblock - * @blks: The number of blocks - * - */ - -static void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) -{ - - atomic_add(blks, &sdp->sd_log_blks_free); - trace_gfs2_log_blocks(sdp, blks); - gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= - sdp->sd_jdesc->jd_blocks); - up_read(&sdp->sd_log_flush_lock); -} - static void gfs2_print_trans(const struct gfs2_trans *tr) { - printk(KERN_WARNING "GFS2: Transaction created at: %pSR\n", - (void *)tr->tr_ip); - printk(KERN_WARNING "GFS2: blocks=%u revokes=%u reserved=%u touched=%d\n", - tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched); - printk(KERN_WARNING "GFS2: Buf %u/%u Databuf %u/%u Revoke %u/%u\n", - tr->tr_num_buf_new, tr->tr_num_buf_rm, - tr->tr_num_databuf_new, tr->tr_num_databuf_rm, - tr->tr_num_revoke, tr->tr_num_revoke_rm); + pr_warn("Transaction created at: %pSR\n", (void *)tr->tr_ip); + pr_warn("blocks=%u revokes=%u reserved=%u touched=%u\n", + tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched); + pr_warn("Buf %u/%u Databuf %u/%u Revoke %u/%u\n", + tr->tr_num_buf_new, tr->tr_num_buf_rm, + tr->tr_num_databuf_new, tr->tr_num_databuf_rm, + tr->tr_num_revoke, tr->tr_num_revoke_rm); } void gfs2_trans_end(struct gfs2_sbd *sdp) @@ -115,11 +94,8 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) if (!tr->tr_touched) { gfs2_log_release(sdp, tr->tr_reserved); - if (tr->tr_t_gh.gh_gl) { - gfs2_glock_dq(&tr->tr_t_gh); - gfs2_holder_uninit(&tr->tr_t_gh); + if (tr->tr_alloced) kfree(tr); - } sb_end_intwrite(sdp->sd_vfs); return; } @@ -133,16 +109,12 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) gfs2_print_trans(tr); gfs2_log_commit(sdp, tr); - if (tr->tr_t_gh.gh_gl) { - gfs2_glock_dq(&tr->tr_t_gh); - gfs2_holder_uninit(&tr->tr_t_gh); - if (!tr->tr_attached) + if (tr->tr_alloced && !tr->tr_attached) kfree(tr); - } up_read(&sdp->sd_log_flush_lock); if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); sb_end_intwrite(sdp->sd_vfs); } @@ -210,8 +182,7 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); gfs2_pin(sdp, bd->bd_bh); tr->tr_num_databuf_new++; - sdp->sd_log_num_databuf++; - list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf); + list_add_tail(&bd->bd_list, &tr->tr_databuf); } gfs2_log_unlock(sdp); unlock_buffer(bh); @@ -230,16 +201,14 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) { - printk(KERN_ERR - "Attempting to add uninitialised block to journal (inplace block=%lld)\n", + pr_err("Attempting to add uninitialised block to journal (inplace block=%lld)\n", (unsigned long long)bd->bd_bh->b_blocknr); BUG(); } gfs2_pin(sdp, bd->bd_bh); mh->__pad0 = cpu_to_be64(0); mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); - sdp->sd_log_num_buf++; - list_add(&bd->bd_list, &sdp->sd_log_le_buf); + list_add(&bd->bd_list, &tr->tr_buf); tr->tr_num_buf_new++; } diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 6402fb69d71..86d2035ac66 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> @@ -30,22 +32,27 @@ mempool_t *gfs2_page_pool __read_mostly; void gfs2_assert_i(struct gfs2_sbd *sdp) { - printk(KERN_EMERG "GFS2: fsid=%s: fatal assertion failed\n", - sdp->sd_fsname); + fs_emerg(sdp, "fatal assertion failed\n"); } -int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) +int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; const struct lm_lockops *lm = ls->ls_ops; va_list args; + struct va_format vaf; if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) return 0; va_start(args, fmt); - vprintk(fmt, args); + + vaf.fmt = fmt; + vaf.va = &args; + + fs_err(sdp, "%pV", &vaf); + va_end(args); if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { @@ -66,7 +73,7 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) } if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) - panic("GFS2: fsid=%s: panic requested.\n", sdp->sd_fsname); + panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname); return -1; } @@ -82,10 +89,9 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, { int me; me = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, assertion, - sdp->sd_fsname, function, file, line); + "fatal: assertion \"%s\" failed\n" + " function = %s, file = %s, line = %u\n", + assertion, function, file, line); dump_stack(); return (me) ? -1 : -2; } @@ -105,11 +111,8 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, return -2; if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) - printk(KERN_WARNING - "GFS2: fsid=%s: warning: assertion \"%s\" failed\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, assertion, - sdp->sd_fsname, function, file, line); + fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n", + assertion, function, file, line); if (sdp->sd_args.ar_debug) BUG(); @@ -138,10 +141,8 @@ int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function, { int rv; rv = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: filesystem consistency error\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, function, file, line); + "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n", + function, file, line); return rv; } @@ -157,13 +158,12 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); int rv; rv = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: filesystem consistency error\n" - "GFS2: fsid=%s: inode = %llu %llu\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, (unsigned long long)ip->i_no_formal_ino, - (unsigned long long)ip->i_no_addr, - sdp->sd_fsname, function, file, line); + "fatal: filesystem consistency error\n" + " inode = %llu %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)ip->i_no_formal_ino, + (unsigned long long)ip->i_no_addr, + function, file, line); return rv; } @@ -179,12 +179,11 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, struct gfs2_sbd *sdp = rgd->rd_sbd; int rv; rv = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: filesystem consistency error\n" - "GFS2: fsid=%s: RG = %llu\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, (unsigned long long)rgd->rd_addr, - sdp->sd_fsname, function, file, line); + "fatal: filesystem consistency error\n" + " RG = %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)rgd->rd_addr, + function, file, line); return rv; } @@ -200,12 +199,11 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, { int me; me = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: invalid metadata block\n" - "GFS2: fsid=%s: bh = %llu (%s)\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, - sdp->sd_fsname, function, file, line); + "fatal: invalid metadata block\n" + " bh = %llu (%s)\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, type, + function, file, line); return (me) ? -1 : -2; } @@ -221,12 +219,11 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, { int me; me = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: invalid metadata block\n" - "GFS2: fsid=%s: bh = %llu (type: exp=%u, found=%u)\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, t, - sdp->sd_fsname, function, file, line); + "fatal: invalid metadata block\n" + " bh = %llu (type: exp=%u, found=%u)\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, type, t, + function, file, line); return (me) ? -1 : -2; } @@ -241,10 +238,9 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, { int rv; rv = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: I/O error\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, function, file, line); + "fatal: I/O error\n" + " function = %s, file = %s, line = %u\n", + function, file, line); return rv; } @@ -259,32 +255,11 @@ int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, { int rv; rv = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: I/O error\n" - "GFS2: fsid=%s: block = %llu\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, (unsigned long long)bh->b_blocknr, - sdp->sd_fsname, function, file, line); + "fatal: I/O error\n" + " block = %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, + function, file, line); return rv; } -void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, - unsigned int bit, int new_value) -{ - unsigned int c, o, b = bit; - int old_value; - - c = b / (8 * PAGE_SIZE); - b %= 8 * PAGE_SIZE; - o = b / 8; - b %= 8; - - old_value = (bitmap[c][o] & (1 << b)); - gfs2_assert_withdraw(sdp, !old_value != !new_value); - - if (new_value) - bitmap[c][o] |= 1 << b; - else - bitmap[c][o] &= ~(1 << b); -} - diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 80535739ac7..cbdcbdf3961 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -10,22 +10,23 @@ #ifndef __UTIL_DOT_H__ #define __UTIL_DOT_H__ +#ifdef pr_fmt +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#endif + #include <linux/mempool.h> #include "incore.h" -#define fs_printk(level, fs, fmt, arg...) \ - printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg) - -#define fs_info(fs, fmt, arg...) \ - fs_printk(KERN_INFO , fs , fmt , ## arg) - -#define fs_warn(fs, fmt, arg...) \ - fs_printk(KERN_WARNING , fs , fmt , ## arg) - -#define fs_err(fs, fmt, arg...) \ - fs_printk(KERN_ERR, fs , fmt , ## arg) - +#define fs_emerg(fs, fmt, ...) \ + pr_emerg("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) +#define fs_warn(fs, fmt, ...) \ + pr_warn("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) +#define fs_err(fs, fmt, ...) \ + pr_err("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) +#define fs_info(fs, fmt, ...) \ + pr_info("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) void gfs2_assert_i(struct gfs2_sbd *sdp); @@ -85,7 +86,7 @@ static inline int gfs2_meta_check(struct gfs2_sbd *sdp, struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; u32 magic = be32_to_cpu(mh->mh_magic); if (unlikely(magic != GFS2_MAGIC)) { - printk(KERN_ERR "GFS2: Magic number missing at %llu\n", + pr_err("Magic number missing at %llu\n", (unsigned long long)bh->b_blocknr); return -EIO; } @@ -164,9 +165,7 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, #define gfs2_tune_get(sdp, field) \ gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) -void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, - unsigned int bit, int new_value); -int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...); +__printf(2, 3) +int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...); #endif /* __UTIL_DOT_H__ */ - diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index ecd37f30ab9..0b81f783f78 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -13,6 +13,7 @@ #include <linux/buffer_head.h> #include <linux/xattr.h> #include <linux/gfs2_ondisk.h> +#include <linux/posix_acl_xattr.h> #include <asm/uaccess.h> #include "gfs2.h" @@ -723,6 +724,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, unsigned int blks, ea_skeleton_call_t skeleton_call, void *private) { + struct gfs2_alloc_parms ap = { .target = blks }; struct buffer_head *dibh; int error; @@ -734,7 +736,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (error) return error; - error = gfs2_inplace_reserve(ip, blks, 0); + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_gunlock_q; @@ -1499,7 +1501,8 @@ static const struct xattr_handler gfs2_xattr_security_handler = { const struct xattr_handler *gfs2_xattr_handlers[] = { &gfs2_xattr_user_handler, &gfs2_xattr_security_handler, - &gfs2_xattr_system_handler, + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, NULL, }; |
