diff options
Diffstat (limited to 'fs/gfs2')
42 files changed, 3151 insertions, 2520 deletions
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index eb08c9e43c2..90c6a8faaec 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -20,13 +20,12 @@ config GFS2_FS be found here: http://sources.redhat.com/cluster The "nolock" lock module is now built in to GFS2 by default. If - you want to use the DLM, be sure to enable HOTPLUG and IPv4/6 - networking. + you want to use the DLM, be sure to enable IPv4/6 networking. config GFS2_FS_LOCKING_DLM bool "GFS2 DLM locking" depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \ - HOTPLUG && DLM && CONFIGFS_FS && SYSFS + CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS) help Multiple node locking module for GFS2 diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index f850020ad90..3088e2a38e3 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -49,10 +49,6 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) if (!ip->i_eattr) return NULL; - acl = get_cached_acl(&ip->i_inode, type); - if (acl != ACL_NOT_CACHED) - return acl; - name = gfs2_acl_name(type); if (name == NULL) return ERR_PTR(-EINVAL); @@ -68,19 +64,7 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) return acl; } -static int gfs2_set_mode(struct inode *inode, umode_t mode) -{ - int error = 0; - - if (mode != inode->i_mode) { - inode->i_mode = mode; - mark_inode_dirty(inode); - } - - return error; -} - -static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl) +int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int error; int len; @@ -88,219 +72,50 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl) const char *name = gfs2_acl_name(type); BUG_ON(name == NULL); - len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); - if (len == 0) - return 0; - data = kmalloc(len, GFP_NOFS); - if (data == NULL) - return -ENOMEM; - error = posix_acl_to_xattr(&init_user_ns, acl, data, len); - if (error < 0) - goto out; - error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); - if (!error) - set_cached_acl(inode, type, acl); -out: - kfree(data); - return error; -} - -int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode) -{ - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct posix_acl *acl; - umode_t mode = inode->i_mode; - int error = 0; - - if (!sdp->sd_args.ar_posix_acl) - return 0; - if (S_ISLNK(inode->i_mode)) - return 0; - - acl = gfs2_get_acl(&dip->i_inode, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (!acl) { - mode &= ~current_umask(); - return gfs2_set_mode(inode, mode); - } - - if (S_ISDIR(inode->i_mode)) { - error = gfs2_acl_set(inode, ACL_TYPE_DEFAULT, acl); - if (error) - goto out; - } - - error = posix_acl_create(&acl, GFP_NOFS, &mode); - if (error < 0) - return error; - - if (error == 0) - goto munge; - - error = gfs2_acl_set(inode, ACL_TYPE_ACCESS, acl); - if (error) - goto out; -munge: - error = gfs2_set_mode(inode, mode); -out: - posix_acl_release(acl); - return error; -} - -int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) -{ - struct inode *inode = &ip->i_inode; - struct posix_acl *acl; - char *data; - unsigned int len; - int error; - - acl = gfs2_get_acl(&ip->i_inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (!acl) - return gfs2_setattr_simple(inode, attr); - - error = posix_acl_chmod(&acl, GFP_NOFS, attr->ia_mode); - if (error) - return error; - - len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); - data = kmalloc(len, GFP_NOFS); - error = -ENOMEM; - if (data == NULL) - goto out; - posix_acl_to_xattr(&init_user_ns, acl, data, len); - error = gfs2_xattr_acl_chmod(ip, attr, data); - kfree(data); - set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl); - -out: - posix_acl_release(acl); - return error; -} - -static int gfs2_acl_type(const char *name) -{ - if (strcmp(name, GFS2_POSIX_ACL_ACCESS) == 0) - return ACL_TYPE_ACCESS; - if (strcmp(name, GFS2_POSIX_ACL_DEFAULT) == 0) - return ACL_TYPE_DEFAULT; - return -EINVAL; -} - -static int gfs2_xattr_system_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int xtype) -{ - struct inode *inode = dentry->d_inode; - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct posix_acl *acl; - int type; - int error; - - if (!sdp->sd_args.ar_posix_acl) - return -EOPNOTSUPP; - - type = gfs2_acl_type(name); - if (type < 0) - return type; - - acl = gfs2_get_acl(inode, type); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl == NULL) - return -ENODATA; - - error = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); - posix_acl_release(acl); - - return error; -} - -static int gfs2_xattr_system_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, - int xtype) -{ - struct inode *inode = dentry->d_inode; - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct posix_acl *acl = NULL; - int error = 0, type; - - if (!sdp->sd_args.ar_posix_acl) - return -EOPNOTSUPP; - type = gfs2_acl_type(name); - if (type < 0) - return type; - if (flags & XATTR_CREATE) - return -EINVAL; - if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) - return value ? -EACCES : 0; - if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) - return -EPERM; - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - if (!value) - goto set_acl; - - acl = posix_acl_from_xattr(&init_user_ns, value, size); - if (!acl) { - /* - * acl_set_file(3) may request that we set default ACLs with - * zero length -- defend (gracefully) against that here. - */ - goto out; - } - if (IS_ERR(acl)) { - error = PTR_ERR(acl); - goto out; - } - - error = posix_acl_valid(acl); - if (error) - goto out_release; - - error = -EINVAL; - if (acl->a_count > GFS2_ACL_MAX_ENTRIES) - goto out_release; + if (acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode))) + return -E2BIG; if (type == ACL_TYPE_ACCESS) { umode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + if (error < 0) + return error; - if (error <= 0) { - posix_acl_release(acl); + if (error == 0) acl = NULL; - if (error < 0) - return error; + if (mode != inode->i_mode) { + inode->i_mode = mode; + mark_inode_dirty(inode); } - - error = gfs2_set_mode(inode, mode); - if (error) - goto out_release; } -set_acl: - error = __gfs2_xattr_set(inode, name, value, size, 0, GFS2_EATYPE_SYS); - if (!error) { - if (acl) - set_cached_acl(inode, type, acl); - else - forget_cached_acl(inode, type); + if (acl) { + len = posix_acl_to_xattr(&init_user_ns, acl, NULL, 0); + if (len == 0) + return 0; + data = kmalloc(len, GFP_NOFS); + if (data == NULL) + return -ENOMEM; + error = posix_acl_to_xattr(&init_user_ns, acl, data, len); + if (error < 0) + goto out; + } else { + data = NULL; + len = 0; } -out_release: - posix_acl_release(acl); + + error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS); + if (error) + goto out; + + if (acl) + set_cached_acl(inode, type, acl); + else + forget_cached_acl(inode, type); out: + kfree(data); return error; } - -const struct xattr_handler gfs2_xattr_system_handler = { - .prefix = XATTR_SYSTEM_PREFIX, - .flags = GFS2_EATYPE_SYS, - .get = gfs2_xattr_system_get, - .set = gfs2_xattr_system_set, -}; - diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index 0da38dc7efe..2d65ec4cd4b 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -14,11 +14,9 @@ #define GFS2_POSIX_ACL_ACCESS "posix_acl_access" #define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" -#define GFS2_ACL_MAX_ENTRIES 25 +#define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12) extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type); -extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); -extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); -extern const struct xattr_handler gfs2_xattr_system_handler; +extern int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); #endif /* __ACL_DOT_H__ */ diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 30de4f2a2ea..805b37fed63 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -20,6 +20,8 @@ #include <linux/swap.h> #include <linux/gfs2_ondisk.h> #include <linux/backing-dev.h> +#include <linux/aio.h> +#include <trace/events/writeback.h> #include "gfs2.h" #include "incore.h" @@ -51,7 +53,7 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, continue; if (gfs2_is_jdata(ip)) set_buffer_uptodate(bh); - gfs2_trans_add_bh(ip->i_gl, bh, 0); + gfs2_trans_add_data(ip->i_gl, bh); } } @@ -109,7 +111,7 @@ static int gfs2_writepage_common(struct page *page, /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_CACHE_SIZE-1); if (page->index > end_index || (page->index == end_index && !offset)) { - page->mapping->a_ops->invalidatepage(page, 0); + page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); goto out; } return 1; @@ -121,14 +123,13 @@ out: } /** - * gfs2_writeback_writepage - Write page for writeback mappings + * gfs2_writepage - Write page for writeback mappings * @page: The page * @wbc: The writeback control * */ -static int gfs2_writeback_writepage(struct page *page, - struct writeback_control *wbc) +static int gfs2_writepage(struct page *page, struct writeback_control *wbc) { int ret; @@ -140,32 +141,6 @@ static int gfs2_writeback_writepage(struct page *page, } /** - * gfs2_ordered_writepage - Write page for ordered data files - * @page: The page to write - * @wbc: The writeback control - * - */ - -static int gfs2_ordered_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - int ret; - - ret = gfs2_writepage_common(page, wbc); - if (ret <= 0) - return ret; - - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1); - return block_write_full_page(page, gfs2_get_block_noalloc, wbc); -} - -/** * __gfs2_jdata_writepage - The core of jdata writepage * @page: The page to write * @wbc: The writeback control @@ -230,16 +205,14 @@ out_ignore: } /** - * gfs2_writeback_writepages - Write a bunch of dirty pages back to disk + * gfs2_writepages - Write a bunch of dirty pages back to disk * @mapping: The mapping to write * @wbc: Write-back control * - * For the data=writeback case we can already ignore buffer heads - * and write whole extents at once. This is a big reduction in the - * number of I/O requests we send and the bmap calls we make in this case. + * Used for both ordered and writeback modes. */ -static int gfs2_writeback_writepages(struct address_space *mapping, - struct writeback_control *wbc) +static int gfs2_writepages(struct address_space *mapping, + struct writeback_control *wbc) { return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc); } @@ -258,13 +231,11 @@ static int gfs2_writeback_writepages(struct address_space *mapping, static int gfs2_write_jdata_pagevec(struct address_space *mapping, struct writeback_control *wbc, struct pagevec *pvec, - int nr_pages, pgoff_t end) + int nr_pages, pgoff_t end, + pgoff_t *done_index) { struct inode *inode = mapping->host; struct gfs2_sbd *sdp = GFS2_SB(inode); - loff_t i_size = i_size_read(inode); - pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; - unsigned offset = i_size & (PAGE_CACHE_SIZE-1); unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); int i; int ret; @@ -276,39 +247,83 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, for(i = 0; i < nr_pages; i++) { struct page *page = pvec->pages[i]; + /* + * At this point, the page may be truncated or + * invalidated (changing page->mapping to NULL), or + * even swizzled back from swapper_space to tmpfs file + * mapping. However, page->index will not change + * because we have a reference on the page. + */ + if (page->index > end) { + /* + * can't be range_cyclic (1st pass) because + * end == -1 in that case. + */ + ret = 1; + break; + } + + *done_index = page->index; + lock_page(page); if (unlikely(page->mapping != mapping)) { +continue_unlock: unlock_page(page); continue; } - if (!wbc->range_cyclic && page->index > end) { - ret = 1; - unlock_page(page); - continue; + if (!PageDirty(page)) { + /* someone wrote it for us */ + goto continue_unlock; } - if (wbc->sync_mode != WB_SYNC_NONE) - wait_on_page_writeback(page); - - if (PageWriteback(page) || - !clear_page_dirty_for_io(page)) { - unlock_page(page); - continue; + if (PageWriteback(page)) { + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + else + goto continue_unlock; } - /* Is the page fully outside i_size? (truncate in progress) */ - if (page->index > end_index || (page->index == end_index && !offset)) { - page->mapping->a_ops->invalidatepage(page, 0); - unlock_page(page); - continue; - } + BUG_ON(PageWriteback(page)); + if (!clear_page_dirty_for_io(page)) + goto continue_unlock; + + trace_wbc_writepage(wbc, mapping->backing_dev_info); ret = __gfs2_jdata_writepage(page, wbc); + if (unlikely(ret)) { + if (ret == AOP_WRITEPAGE_ACTIVATE) { + unlock_page(page); + ret = 0; + } else { + + /* + * done_index is set past this page, + * so media errors will not choke + * background writeout for the entire + * file. This has consequences for + * range_cyclic semantics (ie. it may + * not be suitable for data integrity + * writeout). + */ + *done_index = page->index + 1; + ret = 1; + break; + } + } - if (ret || (--(wbc->nr_to_write) <= 0)) + /* + * We stop writing back only if we are not doing + * integrity sync. In case of integrity sync we have to + * keep going until we have written all the pages + * we tagged for writeback prior to entering this loop. + */ + if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { ret = 1; + break; + } + } gfs2_trans_end(sdp); return ret; @@ -333,51 +348,69 @@ static int gfs2_write_cache_jdata(struct address_space *mapping, int done = 0; struct pagevec pvec; int nr_pages; + pgoff_t uninitialized_var(writeback_index); pgoff_t index; pgoff_t end; - int scanned = 0; + pgoff_t done_index; + int cycled; int range_whole = 0; + int tag; pagevec_init(&pvec, 0); if (wbc->range_cyclic) { - index = mapping->writeback_index; /* Start from prev offset */ + writeback_index = mapping->writeback_index; /* prev offset */ + index = writeback_index; + if (index == 0) + cycled = 1; + else + cycled = 0; end = -1; } else { index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; - scanned = 1; + cycled = 1; /* ignore range_cyclic tests */ } + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) + tag = PAGECACHE_TAG_TOWRITE; + else + tag = PAGECACHE_TAG_DIRTY; retry: - while (!done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { - scanned = 1; - ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end); + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) + tag_pages_for_writeback(mapping, index, end); + done_index = index; + while (!done && (index <= end)) { + nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); + if (nr_pages == 0) + break; + + ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end, &done_index); if (ret) done = 1; if (ret > 0) ret = 0; - pagevec_release(&pvec); cond_resched(); } - if (!scanned && !done) { + if (!cycled && !done) { /* + * range_cyclic: * We hit the last page and there is more work to be done: wrap * back to the start of the file */ - scanned = 1; + cycled = 1; index = 0; + end = writeback_index - 1; goto retry; } if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) - mapping->writeback_index = index; + mapping->writeback_index = done_index; + return ret; } @@ -398,7 +431,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping, ret = gfs2_write_cache_jdata(mapping, wbc); if (ret == 0 && wbc->sync_mode == WB_SYNC_ALL) { - gfs2_log_flush(sdp, ip->i_gl); + gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); ret = gfs2_write_cache_jdata(mapping, wbc); } return ret; @@ -544,7 +577,6 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, p = kmap_atomic(page); memcpy(buf + copied, p + offset, amt); kunmap_atomic(p); - mark_page_accessed(page); page_cache_release(page); copied += amt; index++; @@ -638,12 +670,14 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); if (alloc_required) { + struct gfs2_alloc_parms ap = { .aflags = 0, }; error = gfs2_quota_lock_check(ip); if (error) goto out_unlock; requested = data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip, requested, 0); + ap.target = requested; + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_qunlock; } @@ -842,6 +876,8 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, unsigned int from = pos & (PAGE_CACHE_SIZE - 1); unsigned int to = from + len; int ret; + struct gfs2_trans *tr = current->journal_info; + BUG_ON(!tr); BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL); @@ -852,8 +888,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, goto failed; } - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - if (gfs2_is_stuffed(ip)) return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); @@ -861,6 +895,11 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, gfs2_page_add_databufs(ip, page, from, to); ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); + if (tr->tr_num_buf_new) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + else + gfs2_trans_add_meta(ip->i_gl, dibh); + if (inode == sdp->sd_rindex) { adjust_fs_space(inode); @@ -944,27 +983,33 @@ static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh) unlock_buffer(bh); } -static void gfs2_invalidatepage(struct page *page, unsigned long offset) +static void gfs2_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) { struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); + unsigned int stop = offset + length; + int partial_page = (offset || length < PAGE_CACHE_SIZE); struct buffer_head *bh, *head; unsigned long pos = 0; BUG_ON(!PageLocked(page)); - if (offset == 0) + if (!partial_page) ClearPageChecked(page); if (!page_has_buffers(page)) goto out; bh = head = page_buffers(page); do { + if (pos + bh->b_size > stop) + return; + if (offset <= pos) gfs2_discard(sdp, bh); pos += bh->b_size; bh = bh->b_this_page; } while (bh != head); out: - if (offset == 0) + if (!partial_page) try_to_release_page(page, 0); } @@ -995,11 +1040,11 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + struct address_space *mapping = inode->i_mapping; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int rv; @@ -1020,9 +1065,39 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, if (rv != 1) goto out; /* dio not valid, fall back to buffered i/o */ - rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, gfs2_get_block_direct, - NULL, NULL, 0); + /* + * Now since we are holding a deferred (CW) lock at this point, you + * might be wondering why this is ever needed. There is a case however + * where we've granted a deferred local lock against a cached exclusive + * glock. That is ok provided all granted local locks are deferred, but + * it also means that it is possible to encounter pages which are + * cached and possibly also mapped. So here we check for that and sort + * them out ahead of the dio. The glock state machine will take care of + * everything else. + * + * If in fact the cached glock state (gl->gl_state) is deferred (CW) in + * the first place, mapping->nr_pages will always be zero. + */ + if (mapping->nrpages) { + loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); + loff_t len = iov_iter_count(iter); + loff_t end = PAGE_ALIGN(offset + len) - 1; + + rv = 0; + if (len == 0) + goto out; + if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) + unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); + rv = filemap_write_and_wait_range(mapping, lstart, end); + if (rv) + goto out; + if (rw == WRITE) + truncate_inode_pages_range(mapping, lstart, end); + } + + rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, + iter, offset, + gfs2_get_block_direct, NULL, NULL, 0); out: gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); @@ -1057,37 +1132,29 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) if (atomic_read(&bh->b_count)) goto cannot_release; bd = bh->b_private; - if (bd && bd->bd_ail) + if (bd && bd->bd_tr) goto cannot_release; if (buffer_pinned(bh) || buffer_dirty(bh)) goto not_possible; bh = bh->b_this_page; } while(bh != head); spin_unlock(&sdp->sd_ail_lock); - gfs2_log_unlock(sdp); head = bh = page_buffers(page); do { - gfs2_log_lock(sdp); bd = bh->b_private; if (bd) { gfs2_assert_warn(sdp, bd->bd_bh == bh); - if (!list_empty(&bd->bd_list)) { - if (!buffer_pinned(bh)) - list_del_init(&bd->bd_list); - else - bd = NULL; - } - if (bd) - bd->bd_bh = NULL; + if (!list_empty(&bd->bd_list)) + list_del_init(&bd->bd_list); + bd->bd_bh = NULL; bh->b_private = NULL; - } - gfs2_log_unlock(sdp); - if (bd) kmem_cache_free(gfs2_bufdata_cachep, bd); + } bh = bh->b_this_page; } while (bh != head); + gfs2_log_unlock(sdp); return try_to_free_buffers(page); @@ -1101,8 +1168,8 @@ cannot_release: } static const struct address_space_operations gfs2_writeback_aops = { - .writepage = gfs2_writeback_writepage, - .writepages = gfs2_writeback_writepages, + .writepage = gfs2_writepage, + .writepages = gfs2_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, .write_begin = gfs2_write_begin, @@ -1117,7 +1184,8 @@ static const struct address_space_operations gfs2_writeback_aops = { }; static const struct address_space_operations gfs2_ordered_aops = { - .writepage = gfs2_ordered_writepage, + .writepage = gfs2_writepage, + .writepages = gfs2_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, .write_begin = gfs2_write_begin, diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index a68e91bcef3..e6ee5b6e8d9 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -22,6 +22,7 @@ #include "meta_io.h" #include "quota.h" #include "rgrp.h" +#include "log.h" #include "super.h" #include "trans.h" #include "dir.h" @@ -93,7 +94,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, if (!gfs2_is_jdata(ip)) mark_buffer_dirty(bh); if (!gfs2_is_writeback(ip)) - gfs2_trans_add_bh(ip->i_gl, bh, 0); + gfs2_trans_add_data(ip->i_gl, bh); if (release) { unlock_page(page); @@ -153,7 +154,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) /* Set up the pointer to the new block */ - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); di = (struct gfs2_dinode *)dibh->b_data; gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); @@ -405,7 +406,7 @@ static inline __be64 *gfs2_indirect_init(struct metapath *mp, BUG_ON(i < 1); BUG_ON(mp->mp_bh[i] != NULL); mp->mp_bh[i] = gfs2_meta_new(gl, bn); - gfs2_trans_add_bh(gl, mp->mp_bh[i], 1); + gfs2_trans_add_meta(gl, mp->mp_bh[i]); gfs2_metatype_set(mp->mp_bh[i], GFS2_METATYPE_IN, GFS2_FORMAT_IN); gfs2_buffer_clear_tail(mp->mp_bh[i], sizeof(struct gfs2_meta_header)); ptr += offset; @@ -468,7 +469,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, BUG_ON(sheight < 1); BUG_ON(dibh == NULL); - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); if (height == sheight) { struct buffer_head *bh; @@ -544,7 +545,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, /* Branching from existing tree */ case ALLOC_GROW_DEPTH: if (i > 1 && i < height) - gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[i-1], 1); + gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[i-1]); for (; i < height && n > 0; i++, n--) gfs2_indirect_init(mp, ip->i_gl, i, mp->mp_list[i-1], bn++); @@ -556,7 +557,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, case ALLOC_DATA: BUG_ON(n > dblks); BUG_ON(mp->mp_bh[end_of_metadata] == NULL); - gfs2_trans_add_bh(ip->i_gl, mp->mp_bh[end_of_metadata], 1); + gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[end_of_metadata]); dblks = n; ptr = metapointer(end_of_metadata, mp); dblock = bn; @@ -706,7 +707,7 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi * @top: The first pointer in the buffer * @bottom: One more than the last pointer * @height: the height this buffer is at - * @data: a pointer to a struct strip_mine + * @sm: a pointer to a struct strip_mine * * Returns: errno */ @@ -786,7 +787,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, goto out_rlist; if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ - gfs2_rs_deltree(ip, ip->i_res); + gfs2_rs_deltree(ip->i_res); error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + RES_STATFS + RES_QUOTA, @@ -796,8 +797,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, down_write(&ip->i_rw_mutex); - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); + gfs2_trans_add_meta(ip->i_gl, bh); bstart = 0; blen = 0; @@ -981,7 +982,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) } if (!gfs2_is_writeback(ip)) - gfs2_trans_add_bh(ip->i_gl, bh, 0); + gfs2_trans_add_data(ip->i_gl, bh); zero_user(page, offset, length); mark_buffer_dirty(bh); @@ -991,6 +992,8 @@ unlock: return err; } +#define GFS2_JTRUNC_REVOKES 8192 + /** * gfs2_journaled_truncate - Wrapper for truncate_pagecache for jdata files * @inode: The inode being truncated @@ -1002,8 +1005,6 @@ unlock: * if the number of pages being truncated gets too large. */ -#define GFS2_JTRUNC_REVOKES 8192 - static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize) { struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -1015,7 +1016,7 @@ static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize chunk = oldsize - newsize; if (chunk > max_chunk) chunk = max_chunk; - truncate_pagecache(inode, oldsize, oldsize - chunk); + truncate_pagecache(inode, oldsize - chunk); oldsize -= chunk; gfs2_trans_end(sdp); error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES); @@ -1046,7 +1047,7 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) if (error) goto out; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); if (gfs2_is_stuffed(ip)) { gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); @@ -1066,7 +1067,7 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) if (journaled) error = gfs2_journaled_truncate(inode, oldsize, newsize); else - truncate_pagecache(inode, oldsize, newsize); + truncate_pagecache(inode, newsize); if (error) { brelse(dibh); @@ -1098,7 +1099,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size) if (error) return error; - error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (error) return error; @@ -1137,11 +1138,12 @@ static int trunc_end(struct gfs2_inode *ip) ip->i_height = 0; ip->i_goal = ip->i_no_addr; gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + gfs2_ordered_del_inode(ip); } ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1214,6 +1216,7 @@ static int do_grow(struct inode *inode, u64 size) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_alloc_parms ap = { .target = 1, }; struct buffer_head *dibh; int error; int unstuff = 0; @@ -1224,13 +1227,15 @@ static int do_grow(struct inode *inode, u64 size) if (error) return error; - error = gfs2_inplace_reserve(ip, 1, 0); + error = gfs2_inplace_reserve(ip, &ap); if (error) goto do_grow_qunlock; unstuff = 1; } - error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0); + error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT + + (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ? + 0 : RES_QUOTA), 0); if (error) goto do_grow_release; @@ -1246,7 +1251,7 @@ static int do_grow(struct inode *inode, u64 size) i_size_write(inode, size); ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -1275,6 +1280,7 @@ do_grow_qunlock: int gfs2_setattr_size(struct inode *inode, u64 newsize) { + struct gfs2_inode *ip = GFS2_I(inode); int ret; u64 oldsize; @@ -1284,13 +1290,27 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) if (ret) return ret; + ret = get_write_access(inode); + if (ret) + return ret; + inode_dio_wait(inode); + ret = gfs2_rs_alloc(ip); + if (ret) + goto out; + oldsize = inode->i_size; - if (newsize >= oldsize) - return do_grow(inode, newsize); + if (newsize >= oldsize) { + ret = do_grow(inode, newsize); + goto out; + } - return do_shrink(inode, oldsize, newsize); + gfs2_rs_deltree(ip->i_res); + ret = do_shrink(inode, oldsize, newsize); +out: + put_write_access(inode); + return ret; } int gfs2_truncatei_resume(struct gfs2_inode *ip) @@ -1308,6 +1328,121 @@ int gfs2_file_dealloc(struct gfs2_inode *ip) } /** + * gfs2_free_journal_extents - Free cached journal bmap info + * @jd: The journal + * + */ + +void gfs2_free_journal_extents(struct gfs2_jdesc *jd) +{ + struct gfs2_journal_extent *jext; + + while(!list_empty(&jd->extent_list)) { + jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list); + list_del(&jext->list); + kfree(jext); + } +} + +/** + * gfs2_add_jextent - Add or merge a new extent to extent cache + * @jd: The journal descriptor + * @lblock: The logical block at start of new extent + * @dblock: The physical block at start of new extent + * @blocks: Size of extent in fs blocks + * + * Returns: 0 on success or -ENOMEM + */ + +static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks) +{ + struct gfs2_journal_extent *jext; + + if (!list_empty(&jd->extent_list)) { + jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list); + if ((jext->dblock + jext->blocks) == dblock) { + jext->blocks += blocks; + return 0; + } + } + + jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS); + if (jext == NULL) + return -ENOMEM; + jext->dblock = dblock; + jext->lblock = lblock; + jext->blocks = blocks; + list_add_tail(&jext->list, &jd->extent_list); + jd->nr_extents++; + return 0; +} + +/** + * gfs2_map_journal_extents - Cache journal bmap info + * @sdp: The super block + * @jd: The journal to map + * + * Create a reusable "extent" mapping from all logical + * blocks to all physical blocks for the given journal. This will save + * us time when writing journal blocks. Most journals will have only one + * extent that maps all their logical blocks. That's because gfs2.mkfs + * arranges the journal blocks sequentially to maximize performance. + * So the extent would map the first block for the entire file length. + * However, gfs2_jadd can happen while file activity is happening, so + * those journals may not be sequential. Less likely is the case where + * the users created their own journals by mounting the metafs and + * laying it out. But it's still possible. These journals might have + * several extents. + * + * Returns: 0 on success, or error on failure + */ + +int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) +{ + u64 lblock = 0; + u64 lblock_stop; + struct gfs2_inode *ip = GFS2_I(jd->jd_inode); + struct buffer_head bh; + unsigned int shift = sdp->sd_sb.sb_bsize_shift; + u64 size; + int rc; + + lblock_stop = i_size_read(jd->jd_inode) >> shift; + size = (lblock_stop - lblock) << shift; + jd->nr_extents = 0; + WARN_ON(!list_empty(&jd->extent_list)); + + do { + bh.b_state = 0; + bh.b_blocknr = 0; + bh.b_size = size; + rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0); + if (rc || !buffer_mapped(&bh)) + goto fail; + rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift); + if (rc) + goto fail; + size -= bh.b_size; + lblock += (bh.b_size >> ip->i_inode.i_blkbits); + } while(size > 0); + + fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid, + jd->nr_extents); + return 0; + +fail: + fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n", + rc, jd->jd_jid, + (unsigned long long)(i_size_read(jd->jd_inode) - size), + jd->nr_extents); + fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n", + rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr, + bh.b_state, (unsigned long long)bh.b_size); + gfs2_free_journal_extents(jd); + return rc; +} + +/** * gfs2_write_alloc_required - figure out if a write will require an allocation * @ip: the file being written to * @offset: the offset to write to diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index 42fea03e2bd..81ded5e2aaa 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h @@ -55,5 +55,7 @@ extern int gfs2_truncatei_resume(struct gfs2_inode *ip); extern int gfs2_file_dealloc(struct gfs2_inode *ip); extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, unsigned int len); +extern int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd); +extern void gfs2_free_journal_extents(struct gfs2_jdesc *jd); #endif /* __BMAP_DOT_H__ */ diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 4fddb3c22d2..d3a5d4e29ba 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -93,12 +93,9 @@ invalid_gunlock: if (!had_lock) gfs2_glock_dq_uninit(&d_gh); invalid: - if (inode && S_ISDIR(inode->i_mode)) { - if (have_submounts(dentry)) - goto valid; - shrink_dcache_parent(dentry); - } - d_drop(dentry); + if (check_submounts_and_drop(dentry) != 0) + goto valid; + dput(parent); return 0; @@ -109,8 +106,7 @@ fail: return 0; } -static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode, - struct qstr *str) +static int gfs2_dhash(const struct dentry *dentry, struct qstr *str) { str->hash = gfs2_disk_hash(str->name, str->len); return 0; diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 9a35670fdc3..1a349f9a968 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -53,6 +53,8 @@ * but never before the maximum hash table size has been reached. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/buffer_head.h> @@ -93,7 +95,7 @@ int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head *bh; bh = gfs2_meta_new(ip->i_gl, block); - gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_trans_add_meta(ip->i_gl, bh); gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); *bhp = bh; @@ -127,7 +129,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, if (error) return error; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); if (ip->i_inode.i_size < offset + size) i_size_write(&ip->i_inode, offset + size); @@ -209,7 +211,7 @@ static int gfs2_dir_write_data(struct gfs2_inode *ip, const char *buf, if (error) goto fail; - gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_trans_add_meta(ip->i_gl, bh); memcpy(bh->b_data + o, buf, amount); brelse(bh); @@ -231,7 +233,7 @@ out: i_size_write(&ip->i_inode, offset + copied); ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); @@ -354,22 +356,31 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) return ERR_PTR(-EIO); } - hc = kmalloc(hsize, GFP_NOFS); - ret = -ENOMEM; + hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN); + if (hc == NULL) + hc = __vmalloc(hsize, GFP_NOFS, PAGE_KERNEL); + if (hc == NULL) return ERR_PTR(-ENOMEM); ret = gfs2_dir_read_data(ip, hc, hsize); if (ret < 0) { - kfree(hc); + if (is_vmalloc_addr(hc)) + vfree(hc); + else + kfree(hc); return ERR_PTR(ret); } spin_lock(&inode->i_lock); - if (ip->i_hash_cache) - kfree(hc); - else + if (ip->i_hash_cache) { + if (is_vmalloc_addr(hc)) + vfree(hc); + else + kfree(hc); + } else { ip->i_hash_cache = hc; + } spin_unlock(&inode->i_lock); return ip->i_hash_cache; @@ -385,7 +396,10 @@ void gfs2_dir_hash_inval(struct gfs2_inode *ip) { __be64 *hc = ip->i_hash_cache; ip->i_hash_cache = NULL; - kfree(hc); + if (is_vmalloc_addr(hc)) + vfree(hc); + else + kfree(hc); } static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) @@ -495,8 +509,8 @@ static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset, goto error; return 0; error: - printk(KERN_WARNING "gfs2_check_dirent: %s (%s)\n", msg, - first ? "first in block" : "not first in block"); + pr_warn("%s: %s (%s)\n", + __func__, msg, first ? "first in block" : "not first in block"); return -EIO; } @@ -519,8 +533,7 @@ static int gfs2_dirent_offset(const void *buf) } return offset; wrong_type: - printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n", - be32_to_cpu(h->mh_type)); + pr_warn("%s: wrong block type %u\n", __func__, be32_to_cpu(h->mh_type)); return -1; } @@ -647,7 +660,7 @@ static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh, return; } - gfs2_trans_add_bh(dip->i_gl, bh, 1); + gfs2_trans_add_meta(dip->i_gl, bh); /* If there is no prev entry, this is the first entry in the block. The de_rec_len is already as big as it needs to be. Just zero @@ -690,7 +703,7 @@ static struct gfs2_dirent *gfs2_init_dirent(struct inode *inode, offset = GFS2_DIRENT_SIZE(be16_to_cpu(dent->de_name_len)); totlen = be16_to_cpu(dent->de_rec_len); BUG_ON(offset + name->len > totlen); - gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_trans_add_meta(ip->i_gl, bh); ndent = (struct gfs2_dirent *)((char *)dent + offset); dent->de_rec_len = cpu_to_be16(offset); gfs2_qstr2dirent(name, totlen - offset, ndent); @@ -716,7 +729,7 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp); if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) { - /* printk(KERN_INFO "block num=%llu\n", leaf_no); */ + /* pr_info("block num=%llu\n", leaf_no); */ error = -EIO; } @@ -822,6 +835,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, struct gfs2_leaf *leaf; struct gfs2_dirent *dent; struct qstr name = { .name = "" }; + struct timespec tv = CURRENT_TIME; error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL); if (error) @@ -831,14 +845,18 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, return NULL; gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); - gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_trans_add_meta(ip->i_gl, bh); gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); leaf = (struct gfs2_leaf *)bh->b_data; leaf->lf_depth = cpu_to_be16(depth); leaf->lf_entries = 0; leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE); leaf->lf_next = 0; - memset(leaf->lf_reserved, 0, sizeof(leaf->lf_reserved)); + leaf->lf_inode = cpu_to_be64(ip->i_no_addr); + leaf->lf_dist = cpu_to_be32(1); + leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); + leaf->lf_sec = cpu_to_be64(tv.tv_sec); + memset(leaf->lf_reserved2, 0, sizeof(leaf->lf_reserved2)); dent = (struct gfs2_dirent *)(leaf+1); gfs2_qstr2dirent(&name, bh->b_size - sizeof(struct gfs2_leaf), dent); *pbh = bh; @@ -916,7 +934,7 @@ static int dir_make_exhash(struct inode *inode) /* We're done with the new leaf block, now setup the new hash table. */ - gfs2_trans_add_bh(dip->i_gl, dibh, 1); + gfs2_trans_add_meta(dip->i_gl, dibh); gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); lp = (__be64 *)(dibh->b_data + sizeof(struct gfs2_dinode)); @@ -976,7 +994,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) return 1; /* can't split */ } - gfs2_trans_add_bh(dip->i_gl, obh, 1); + gfs2_trans_add_meta(dip->i_gl, obh); nleaf = new_leaf(inode, &nbh, be16_to_cpu(oleaf->lf_depth) + 1); if (!nleaf) { @@ -989,7 +1007,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth)); half_len = len >> 1; if (!half_len) { - printk(KERN_WARNING "i_depth %u lf_depth %u index %u\n", dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); + pr_warn("i_depth %u lf_depth %u index %u\n", + dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); gfs2_consist_inode(dip); error = -EIO; goto fail_brelse; @@ -1069,7 +1088,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) error = gfs2_meta_inode_buffer(dip, &dibh); if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { - gfs2_trans_add_bh(dip->i_gl, dibh, 1); + gfs2_trans_add_meta(dip->i_gl, dibh); gfs2_add_inode_blocks(&dip->i_inode, 1); gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); @@ -1113,10 +1132,14 @@ static int dir_double_exhash(struct gfs2_inode *dip) if (IS_ERR(hc)) return PTR_ERR(hc); - h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS); + hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS | __GFP_NOWARN); + if (hc2 == NULL) + hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL); + if (!hc2) return -ENOMEM; + h = hc2; error = gfs2_meta_inode_buffer(dip, &dibh); if (error) goto out_kfree; @@ -1145,7 +1168,10 @@ fail: gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); out_kfree: - kfree(hc2); + if (is_vmalloc_addr(hc2)) + vfree(hc2); + else + kfree(hc2); return error; } @@ -1194,9 +1220,7 @@ static int compare_dents(const void *a, const void *b) /** * do_filldir_main - read out directory entries * @dip: The GFS2 inode - * @offset: The offset in the file to read from - * @opaque: opaque data to pass to filldir - * @filldir: The function to pass entries to + * @ctx: what to feed the entries to * @darr: an array of struct gfs2_dirent pointers to read * @entries: the number of entries in darr * @copied: pointer to int that's non-zero if a entry has been copied out @@ -1206,11 +1230,10 @@ static int compare_dents(const void *a, const void *b) * the possibility that they will fall into different readdir buffers or * that someone will want to seek to that location. * - * Returns: errno, >0 on exception from filldir + * Returns: errno, >0 if the actor tells you to stop */ -static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, - void *opaque, filldir_t filldir, +static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx, const struct gfs2_dirent **darr, u32 entries, int *copied) { @@ -1218,7 +1241,6 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, u64 off, off_next; unsigned int x, y; int run = 0; - int error = 0; sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL); @@ -1235,9 +1257,9 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, off_next = be32_to_cpu(dent_next->de_hash); off_next = gfs2_disk_hash2offset(off_next); - if (off < *offset) + if (off < ctx->pos) continue; - *offset = off; + ctx->pos = off; if (off_next == off) { if (*copied && !run) @@ -1246,26 +1268,25 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, } else run = 0; } else { - if (off < *offset) + if (off < ctx->pos) continue; - *offset = off; + ctx->pos = off; } - error = filldir(opaque, (const char *)(dent + 1), + if (!dir_emit(ctx, (const char *)(dent + 1), be16_to_cpu(dent->de_name_len), - off, be64_to_cpu(dent->de_inum.no_addr), - be16_to_cpu(dent->de_type)); - if (error) + be64_to_cpu(dent->de_inum.no_addr), + be16_to_cpu(dent->de_type))) return 1; *copied = 1; } - /* Increment the *offset by one, so the next time we come into the + /* Increment the ctx->pos by one, so the next time we come into the do_filldir fxn, we get the next entry instead of the last one in the current leaf */ - (*offset)++; + ctx->pos++; return 0; } @@ -1289,8 +1310,8 @@ static void gfs2_free_sort_buffer(void *ptr) kfree(ptr); } -static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, - filldir_t filldir, int *copied, unsigned *depth, +static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, + int *copied, unsigned *depth, u64 leaf_no) { struct gfs2_inode *ip = GFS2_I(inode); @@ -1368,8 +1389,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, } while(lfn); BUG_ON(entries2 != entries); - error = do_filldir_main(ip, offset, opaque, filldir, darr, - entries, copied); + error = do_filldir_main(ip, ctx, darr, entries, copied); out_free: for(i = 0; i < leaf; i++) brelse(larr[i]); @@ -1428,15 +1448,13 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, /** * dir_e_read - Reads the entries from a directory into a filldir buffer * @dip: dinode pointer - * @offset: the hash of the last entry read shifted to the right once - * @opaque: buffer for the filldir function to fill - * @filldir: points to the filldir function to use + * @ctx: actor to feed the entries to * * Returns: errno */ -static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, - filldir_t filldir, struct file_ra_state *f_ra) +static int dir_e_read(struct inode *inode, struct dir_context *ctx, + struct file_ra_state *f_ra) { struct gfs2_inode *dip = GFS2_I(inode); u32 hsize, len = 0; @@ -1447,7 +1465,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, unsigned depth = 0; hsize = 1 << dip->i_depth; - hash = gfs2_dir_offset2hash(*offset); + hash = gfs2_dir_offset2hash(ctx->pos); index = hash >> (32 - dip->i_depth); if (dip->i_hash_cache == NULL) @@ -1459,7 +1477,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, gfs2_dir_readahead(inode, hsize, index, f_ra); while (index < hsize) { - error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, + error = gfs2_dir_read_leaf(inode, ctx, &copied, &depth, be64_to_cpu(lp[index])); if (error) @@ -1474,8 +1492,8 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, return error; } -int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, - filldir_t filldir, struct file_ra_state *f_ra) +int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, + struct file_ra_state *f_ra) { struct gfs2_inode *dip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -1489,7 +1507,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, return 0; if (dip->i_diskflags & GFS2_DIF_EXHASH) - return dir_e_read(inode, offset, opaque, filldir, f_ra); + return dir_e_read(inode, ctx, f_ra); if (!gfs2_is_stuffed(dip)) { gfs2_consist_inode(dip); @@ -1521,7 +1539,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, error = -EIO; goto out; } - error = do_filldir_main(dip, offset, opaque, filldir, darr, + error = do_filldir_main(dip, ctx, darr, dip->i_entries, &copied); out: kfree(darr); @@ -1537,9 +1555,9 @@ out: /** * gfs2_dir_search - Search a directory - * @dip: The GFS2 inode - * @filename: - * @inode: + * @dip: The GFS2 dir inode + * @name: The name we are looking up + * @fail_on_exist: Fail if the name exists rather than looking it up * * This routine searches a directory for a file or another directory. * Assumes a glock is held on dip. @@ -1547,22 +1565,25 @@ out: * Returns: errno */ -struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name) +struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name, + bool fail_on_exist) { struct buffer_head *bh; struct gfs2_dirent *dent; - struct inode *inode; + u64 addr, formal_ino; + u16 dtype; dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); if (dent) { if (IS_ERR(dent)) return ERR_CAST(dent); - inode = gfs2_inode_lookup(dir->i_sb, - be16_to_cpu(dent->de_type), - be64_to_cpu(dent->de_inum.no_addr), - be64_to_cpu(dent->de_inum.no_formal_ino), 0); + dtype = be16_to_cpu(dent->de_type); + addr = be64_to_cpu(dent->de_inum.no_addr); + formal_ino = be64_to_cpu(dent->de_inum.no_formal_ino); brelse(bh); - return inode; + if (fail_on_exist) + return ERR_PTR(-EEXIST); + return gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, 0); } return ERR_PTR(-ENOENT); } @@ -1598,11 +1619,31 @@ out: return ret; } +/** + * dir_new_leaf - Add a new leaf onto hash chain + * @inode: The directory + * @name: The name we are adding + * + * This adds a new dir leaf onto an existing leaf when there is not + * enough space to add a new dir entry. This is a last resort after + * we've expanded the hash table to max size and also split existing + * leaf blocks, so it will only occur for very large directories. + * + * The dist parameter is set to 1 for leaf blocks directly attached + * to the hash table, 2 for one layer of indirection, 3 for two layers + * etc. We are thus able to tell the difference between an old leaf + * with dist set to zero (i.e. "don't know") and a new one where we + * set this information for debug/fsck purposes. + * + * Returns: 0 on success, or -ve on error + */ + static int dir_new_leaf(struct inode *inode, const struct qstr *name) { struct buffer_head *bh, *obh; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_leaf *leaf, *oleaf; + u32 dist = 1; int error; u32 index; u64 bn; @@ -1612,6 +1653,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) if (error) return error; do { + dist++; oleaf = (struct gfs2_leaf *)obh->b_data; bn = be64_to_cpu(oleaf->lf_next); if (!bn) @@ -1622,13 +1664,14 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) return error; } while(1); - gfs2_trans_add_bh(ip->i_gl, obh, 1); + gfs2_trans_add_meta(ip->i_gl, obh); leaf = new_leaf(inode, &bh, be16_to_cpu(oleaf->lf_depth)); if (!leaf) { brelse(obh); return -ENOSPC; } + leaf->lf_dist = cpu_to_be32(dist); oleaf->lf_next = cpu_to_be64(bh->b_blocknr); brelse(bh); brelse(obh); @@ -1636,48 +1679,71 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) error = gfs2_meta_inode_buffer(ip, &bh); if (error) return error; - gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_trans_add_meta(ip->i_gl, bh); gfs2_add_inode_blocks(&ip->i_inode, 1); gfs2_dinode_out(ip, bh->b_data); brelse(bh); return 0; } +static u16 gfs2_inode_ra_len(const struct gfs2_inode *ip) +{ + u64 where = ip->i_no_addr + 1; + if (ip->i_eattr == where) + return 1; + return 0; +} + /** * gfs2_dir_add - Add new filename into directory - * @dip: The GFS2 inode - * @filename: The new name - * @inode: The inode number of the entry - * @type: The type of the entry + * @inode: The directory inode + * @name: The new name + * @nip: The GFS2 inode to be linked in to the directory + * @da: The directory addition info + * + * If the call to gfs2_diradd_alloc_required resulted in there being + * no need to allocate any new directory blocks, then it will contain + * a pointer to the directory entry and the bh in which it resides. We + * can use that without having to repeat the search. If there was no + * free space, then we must now create more space. * * Returns: 0 on success, error code on failure */ int gfs2_dir_add(struct inode *inode, const struct qstr *name, - const struct gfs2_inode *nip) + const struct gfs2_inode *nip, struct gfs2_diradd *da) { struct gfs2_inode *ip = GFS2_I(inode); - struct buffer_head *bh; - struct gfs2_dirent *dent; + struct buffer_head *bh = da->bh; + struct gfs2_dirent *dent = da->dent; + struct timespec tv; struct gfs2_leaf *leaf; int error; while(1) { - dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, - &bh); + if (da->bh == NULL) { + dent = gfs2_dirent_search(inode, name, + gfs2_dirent_find_space, &bh); + } if (dent) { if (IS_ERR(dent)) return PTR_ERR(dent); dent = gfs2_init_dirent(inode, dent, name, bh); gfs2_inum_out(nip, dent); dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); + dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip)); + tv = CURRENT_TIME; if (ip->i_diskflags & GFS2_DIF_EXHASH) { leaf = (struct gfs2_leaf *)bh->b_data; be16_add_cpu(&leaf->lf_entries, 1); + leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); + leaf->lf_sec = cpu_to_be64(tv.tv_sec); } + da->dent = NULL; + da->bh = NULL; brelse(bh); ip->i_entries++; - ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; + ip->i_inode.i_mtime = ip->i_inode.i_ctime = tv; if (S_ISDIR(nip->i_inode.i_mode)) inc_nlink(&ip->i_inode); mark_inode_dirty(inode); @@ -1728,6 +1794,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) const struct qstr *name = &dentry->d_name; struct gfs2_dirent *dent, *prev = NULL; struct buffer_head *bh; + struct timespec tv = CURRENT_TIME; /* Returns _either_ the entry (if its first in block) or the previous entry otherwise */ @@ -1753,13 +1820,15 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry) if (!entries) gfs2_consist_inode(dip); leaf->lf_entries = cpu_to_be16(--entries); + leaf->lf_nsec = cpu_to_be32(tv.tv_nsec); + leaf->lf_sec = cpu_to_be64(tv.tv_sec); } brelse(bh); if (!dip->i_entries) gfs2_consist_inode(dip); dip->i_entries--; - dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; + dip->i_inode.i_mtime = dip->i_inode.i_ctime = tv; if (S_ISDIR(dentry->d_inode->i_mode)) drop_nlink(&dip->i_inode); mark_inode_dirty(&dip->i_inode); @@ -1795,7 +1864,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, if (IS_ERR(dent)) return PTR_ERR(dent); - gfs2_trans_add_bh(dip->i_gl, bh, 1); + gfs2_trans_add_meta(dip->i_gl, bh); gfs2_inum_out(nip, dent); dent->de_type = cpu_to_be16(new_type); @@ -1804,7 +1873,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, error = gfs2_meta_inode_buffer(dip, &bh); if (error) return error; - gfs2_trans_add_bh(dip->i_gl, bh, 1); + gfs2_trans_add_meta(dip->i_gl, bh); } dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; @@ -1845,11 +1914,13 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); - ht = kzalloc(size, GFP_NOFS); + ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN); + if (ht == NULL) + ht = vzalloc(size); if (!ht) return -ENOMEM; - error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + error = gfs2_quota_hold(dip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (error) goto out; @@ -1917,7 +1988,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, if (error) goto out_end_trans; - gfs2_trans_add_bh(dip->i_gl, dibh, 1); + gfs2_trans_add_meta(dip->i_gl, dibh); /* On the last dealloc, make this a regular file in case we crash. (We don't want to free these blocks a second time.) */ if (last_dealloc) @@ -1933,7 +2004,10 @@ out_rlist: gfs2_rlist_free(&rlist); gfs2_quota_unhold(dip); out: - kfree(ht); + if (is_vmalloc_addr(ht)) + vfree(ht); + else + kfree(ht); return error; } @@ -1998,22 +2072,36 @@ out: * gfs2_diradd_alloc_required - find if adding entry will require an allocation * @ip: the file being written to * @filname: the filename that's going to be added + * @da: The structure to return dir alloc info * - * Returns: 1 if alloc required, 0 if not, -ve on error + * Returns: 0 if ok, -ve on error */ -int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name) +int gfs2_diradd_alloc_required(struct inode *inode, const struct qstr *name, + struct gfs2_diradd *da) { + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); + const unsigned int extra = sizeof(struct gfs2_dinode) - sizeof(struct gfs2_leaf); struct gfs2_dirent *dent; struct buffer_head *bh; + da->nr_blocks = 0; + da->bh = NULL; + da->dent = NULL; + dent = gfs2_dirent_search(inode, name, gfs2_dirent_find_space, &bh); if (!dent) { - return 1; + da->nr_blocks = sdp->sd_max_dirres; + if (!(ip->i_diskflags & GFS2_DIF_EXHASH) && + (GFS2_DIRENT_SIZE(name->len) < extra)) + da->nr_blocks = 1; + return 0; } if (IS_ERR(dent)) return PTR_ERR(dent); - brelse(bh); + da->bh = bh; + da->dent = dent; return 0; } diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 98c960beab3..126c65dda02 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -16,23 +16,39 @@ struct inode; struct gfs2_inode; struct gfs2_inum; +struct buffer_head; +struct gfs2_dirent; + +struct gfs2_diradd { + unsigned nr_blocks; + struct gfs2_dirent *dent; + struct buffer_head *bh; +}; extern struct inode *gfs2_dir_search(struct inode *dir, - const struct qstr *filename); + const struct qstr *filename, + bool fail_on_exist); extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, const struct gfs2_inode *ip); extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, - const struct gfs2_inode *ip); + const struct gfs2_inode *ip, struct gfs2_diradd *da); +static inline void gfs2_dir_no_add(struct gfs2_diradd *da) +{ + if (da->bh) + brelse(da->bh); + da->bh = NULL; +} extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); -extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, - filldir_t filldir, struct file_ra_state *f_ra); +extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, + struct file_ra_state *f_ra); extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, const struct gfs2_inode *nip, unsigned int new_type); extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); extern int gfs2_diradd_alloc_required(struct inode *dir, - const struct qstr *filename); + const struct qstr *filename, + struct gfs2_diradd *da); extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, struct buffer_head **bhp); extern void gfs2_dir_hash_inval(struct gfs2_inode *ip); diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 4767774a5f3..8b9b3775e2e 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -37,10 +37,10 @@ static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len, if (parent && (*len < GFS2_LARGE_FH_SIZE)) { *len = GFS2_LARGE_FH_SIZE; - return 255; + return FILEID_INVALID; } else if (*len < GFS2_SMALL_FH_SIZE) { *len = GFS2_SMALL_FH_SIZE; - return 255; + return FILEID_INVALID; } fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); @@ -64,6 +64,7 @@ static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len, } struct get_name_filldir { + struct dir_context ctx; struct gfs2_inum_host inum; char *name; }; @@ -88,9 +89,11 @@ static int gfs2_get_name(struct dentry *parent, char *name, struct inode *dir = parent->d_inode; struct inode *inode = child->d_inode; struct gfs2_inode *dip, *ip; - struct get_name_filldir gnfd; + struct get_name_filldir gnfd = { + .ctx.actor = get_name_filldir, + .name = name + }; struct gfs2_holder gh; - u64 offset = 0; int error; struct file_ra_state f_ra = { .start = 0 }; @@ -106,13 +109,12 @@ static int gfs2_get_name(struct dentry *parent, char *name, *name = 0; gnfd.inum.no_addr = ip->i_no_addr; gnfd.inum.no_formal_ino = ip->i_no_formal_ino; - gnfd.name = name; error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); if (error) return error; - error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir, &f_ra); + error = gfs2_dir_read(dir, &gnfd.ctx, &f_ra); gfs2_glock_dq_uninit(&gh); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 991ab2d484d..26b3f952e6b 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -25,6 +25,7 @@ #include <asm/uaccess.h> #include <linux/dlm.h> #include <linux/dlm_plock.h> +#include <linux/aio.h> #include "gfs2.h" #include "incore.h" @@ -81,35 +82,28 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence) } /** - * gfs2_readdir - Read directory entries from a directory + * gfs2_readdir - Iterator for a directory * @file: The directory to read from - * @dirent: Buffer for dirents - * @filldir: Function used to do the copying + * @ctx: What to feed directory entries to * * Returns: errno */ -static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) +static int gfs2_readdir(struct file *file, struct dir_context *ctx) { struct inode *dir = file->f_mapping->host; struct gfs2_inode *dip = GFS2_I(dir); struct gfs2_holder d_gh; - u64 offset = file->f_pos; int error; - gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); - error = gfs2_glock_nq(&d_gh); - if (error) { - gfs2_holder_uninit(&d_gh); + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + if (error) return error; - } - error = gfs2_dir_read(dir, &offset, dirent, filldir, &file->f_ra); + error = gfs2_dir_read(dir, ctx, &file->f_ra); gfs2_glock_dq_uninit(&d_gh); - file->f_pos = offset; - return error; } @@ -157,7 +151,7 @@ static const u32 gfs2_to_fsflags[32] = { static int gfs2_get_flags(struct file *filp, u32 __user *ptr) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int error; @@ -209,15 +203,15 @@ void gfs2_set_inode_flags(struct inode *inode) GFS2_DIF_INHERIT_JDATA) /** - * gfs2_set_flags - set flags on an inode - * @inode: The inode - * @flags: The flags to set + * do_gfs2_set_flags - set flags on an inode + * @filp: file pointer + * @reqflags: The flags to set * @mask: Indicates which flags are valid * */ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *bh; @@ -262,7 +256,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) } if ((flags ^ new_flags) & GFS2_DIF_JDATA) { if (flags & GFS2_DIF_JDATA) - gfs2_log_flush(sdp, ip->i_gl); + gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); error = filemap_fdatawrite(inode->i_mapping); if (error) goto out; @@ -276,7 +270,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) error = gfs2_meta_inode_buffer(ip, &bh); if (error) goto out_trans_end; - gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_trans_add_meta(ip->i_gl, bh); ip->i_diskflags = new_flags; gfs2_dinode_out(ip, bh->b_data); brelse(bh); @@ -293,7 +287,7 @@ out_drop_write: static int gfs2_set_flags(struct file *filp, u32 __user *ptr) { - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); u32 fsflags, gfsflags; if (get_user(fsflags, ptr)) @@ -324,7 +318,7 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /** * gfs2_size_hint - Give a hint to the size of a write request - * @file: The struct file + * @filep: The struct file * @offset: The file offset of the write * @size: The length of the write * @@ -336,7 +330,7 @@ static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size) { - struct inode *inode = filep->f_dentry->d_inode; + struct inode *inode = file_inode(filep); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift; @@ -377,7 +371,7 @@ static int gfs2_allocate_page_backing(struct page *page) /** * gfs2_page_mkwrite - Make a shared, mmap()ed, page writable * @vma: The virtual memory area - * @page: The page which is about to become writable + * @vmf: The virtual memory fault containing the page to become writable * * When the page becomes writable, we need to ensure that we have * blocks allocated on disk to back that page. @@ -386,9 +380,10 @@ static int gfs2_allocate_page_backing(struct page *page) static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = file_inode(vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned long last_index; u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; @@ -401,16 +396,20 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) /* Update file times before taking page lock */ file_update_time(vma->vm_file); + ret = get_write_access(inode); + if (ret) + goto out; + ret = gfs2_rs_alloc(ip); if (ret) - return ret; + goto out_write_access; gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) - goto out; + goto out_uninit; set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); @@ -432,7 +431,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); - ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); + ap.target = data_blocks + ind_blocks; + ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_quota_unlock; @@ -479,18 +479,22 @@ out_quota_unlock: gfs2_quota_unlock(ip); out_unlock: gfs2_glock_dq(&gh); -out: +out_uninit: gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); - wait_on_page_writeback(page); + wait_for_stable_page(page); } +out_write_access: + put_write_access(inode); +out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); } static const struct vm_operations_struct gfs2_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = gfs2_page_mkwrite, .remap_pages = generic_file_remap_pages, }; @@ -530,21 +534,30 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) } /** - * gfs2_open - open a file - * @inode: the inode to open - * @file: the struct file for this opening + * gfs2_open_common - This is common to open and atomic_open + * @inode: The inode being opened + * @file: The file being opened * - * Returns: errno + * This maybe called under a glock or not depending upon how it has + * been called. We must always be called under a glock for regular + * files, however. For other file types, it does not matter whether + * we hold the glock or not. + * + * Returns: Error code or 0 for success */ -static int gfs2_open(struct inode *inode, struct file *file) +int gfs2_open_common(struct inode *inode, struct file *file) { - struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_holder i_gh; struct gfs2_file *fp; - int error; + int ret; - fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL); + if (S_ISREG(inode->i_mode)) { + ret = generic_file_open(inode, file); + if (ret) + return ret; + } + + fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS); if (!fp) return -ENOMEM; @@ -552,29 +565,43 @@ static int gfs2_open(struct inode *inode, struct file *file) gfs2_assert_warn(GFS2_SB(inode), !file->private_data); file->private_data = fp; + return 0; +} + +/** + * gfs2_open - open a file + * @inode: the inode to open + * @file: the struct file for this opening + * + * After atomic_open, this function is only used for opening files + * which are already cached. We must still get the glock for regular + * files to ensure that we have the file size uptodate for the large + * file check which is in the common code. That is only an issue for + * regular files though. + * + * Returns: errno + */ + +static int gfs2_open(struct inode *inode, struct file *file) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder i_gh; + int error; + bool need_unlock = false; if (S_ISREG(ip->i_inode.i_mode)) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) - goto fail; + return error; + need_unlock = true; + } - if (!(file->f_flags & O_LARGEFILE) && - i_size_read(inode) > MAX_NON_LFS) { - error = -EOVERFLOW; - goto fail_gunlock; - } + error = gfs2_open_common(inode, file); + if (need_unlock) gfs2_glock_dq_uninit(&i_gh); - } - - return 0; -fail_gunlock: - gfs2_glock_dq_uninit(&i_gh); -fail: - file->private_data = NULL; - kfree(fp); return error; } @@ -593,10 +620,10 @@ static int gfs2_release(struct inode *inode, struct file *file) kfree(file->private_data); file->private_data = NULL; - if ((file->f_mode & FMODE_WRITE) && - (atomic_read(&inode->i_writecount) == 1)) - gfs2_rs_delete(ip); + if (!(file->f_mode & FMODE_WRITE)) + return 0; + gfs2_rs_delete(ip, &inode->i_writecount); return 0; } @@ -626,7 +653,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); + int sync_state = inode->i_state & I_DIRTY; struct gfs2_inode *ip = GFS2_I(inode); int ret = 0, ret1 = 0; @@ -636,6 +663,8 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, return ret1; } + if (!gfs2_is_jdata(ip)) + sync_state &= ~I_DIRTY_PAGES; if (datasync) sync_state &= ~I_DIRTY_SYNC; @@ -655,7 +684,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, } /** - * gfs2_file_aio_write - Perform a write to a file + * gfs2_file_write_iter - Perform a write to a file * @iocb: The io context * @iov: The data to write * @nr_segs: Number of @iov segments @@ -668,20 +697,17 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, * */ -static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - size_t writesize = iov_length(iov, nr_segs); - struct dentry *dentry = file->f_dentry; - struct gfs2_inode *ip = GFS2_I(dentry->d_inode); + struct gfs2_inode *ip = GFS2_I(file_inode(file)); int ret; ret = gfs2_rs_alloc(ip); if (ret) return ret; - gfs2_size_hint(file, pos, writesize); + gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); if (file->f_flags & O_APPEND) { struct gfs2_holder gh; @@ -692,7 +718,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, gfs2_glock_dq_uninit(&gh); } - return generic_file_aio_write(iocb, iov, nr_segs, pos); + return generic_file_write_iter(iocb, from); } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, @@ -709,7 +735,7 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, if (unlikely(error)) return error; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); if (gfs2_is_stuffed(ip)) { error = gfs2_unstuff_dinode(ip, NULL); @@ -772,9 +798,10 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes; int error; @@ -783,6 +810,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; loff_t max_chunk_size = UINT_MAX & bsize_mask; + struct gfs2_holder gh; + next = (next + 1) << sdp->sd_sb.sb_bsize_shift; /* We only support the FALLOC_FL_KEEP_SIZE mode */ @@ -803,8 +832,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, if (error) return error; - gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); - error = gfs2_glock_nq(&ip->i_gh); + mutex_lock(&inode->i_mutex); + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + error = gfs2_glock_nq(&gh); if (unlikely(error)) goto out_uninit; @@ -825,7 +856,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, retry: gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); - error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks, 0); + ap.target = data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip, &ap); if (error) { if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { bytes >>= 1; @@ -871,9 +903,10 @@ out_trans_fail: out_qunlock: gfs2_quota_unlock(ip); out_unlock: - gfs2_glock_dq(&ip->i_gh); + gfs2_glock_dq(&gh); out_uninit: - gfs2_holder_uninit(&ip->i_gh); + gfs2_holder_uninit(&gh); + mutex_unlock(&inode->i_mutex); return error; } @@ -889,7 +922,7 @@ out_uninit: * cluster; until we do, disable leases (by just returning -EINVAL), * unless the administrator has requested purely local locking. * - * Locking: called under lock_flocks + * Locking: called under i_lock * * Returns: errno */ @@ -924,8 +957,11 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) cmd = F_SETLK; fl->fl_type = F_UNLCK; } - if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { + if (fl->fl_type == F_UNLCK) + posix_lock_file_wait(file, fl); return -EIO; + } if (IS_GETLK(cmd)) return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); else if (fl->fl_type == F_UNLCK) @@ -938,14 +974,14 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_file *fp = file->private_data; struct gfs2_holder *fl_gh = &fp->f_fl_gh; - struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode); + struct gfs2_inode *ip = GFS2_I(file_inode(file)); struct gfs2_glock *gl; unsigned int state; int flags; int error = 0; state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; - flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; + flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT; mutex_lock(&fp->f_fl_mutex); @@ -955,7 +991,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) goto out; flock_lock_file_wait(file, &(struct file_lock){.fl_type = F_UNLCK}); - gfs2_glock_dq_wait(fl_gh); + gfs2_glock_dq(fl_gh); gfs2_holder_reinit(state, flags, fl_gh); } else { error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, @@ -1020,10 +1056,10 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) const struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = gfs2_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, .open = gfs2_open, @@ -1032,13 +1068,13 @@ const struct file_operations gfs2_file_fops = { .lock = gfs2_lock, .flock = gfs2_flock, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .setlease = gfs2_setlease, .fallocate = gfs2_fallocate, }; const struct file_operations gfs2_dir_fops = { - .readdir = gfs2_readdir, + .iterate = gfs2_readdir, .unlocked_ioctl = gfs2_ioctl, .open = gfs2_open, .release = gfs2_release, @@ -1052,23 +1088,23 @@ const struct file_operations gfs2_dir_fops = { const struct file_operations gfs2_file_fops_nolock = { .llseek = gfs2_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = gfs2_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, .open = gfs2_open, .release = gfs2_release, .fsync = gfs2_fsync, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .setlease = generic_setlease, .fallocate = gfs2_fallocate, }; const struct file_operations gfs2_dir_fops_nolock = { - .readdir = gfs2_readdir, + .iterate = gfs2_readdir, .unlocked_ioctl = gfs2_ioctl, .open = gfs2_open, .release = gfs2_release, diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 992c5c0cb50..ee4e04fe60f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -30,6 +32,8 @@ #include <linux/rculist_bl.h> #include <linux/bit_spinlock.h> #include <linux/percpu.h> +#include <linux/list_sort.h> +#include <linux/lockref.h> #include "gfs2.h" #include "incore.h" @@ -128,10 +132,10 @@ void gfs2_glock_free(struct gfs2_glock *gl) * */ -void gfs2_glock_hold(struct gfs2_glock *gl) +static void gfs2_glock_hold(struct gfs2_glock *gl) { - GLOCK_BUG_ON(gl, atomic_read(&gl->gl_ref) == 0); - atomic_inc(&gl->gl_ref); + GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); + lockref_get(&gl->gl_lockref); } /** @@ -186,20 +190,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) } /** - * gfs2_glock_put_nolock() - Decrement reference count on glock - * @gl: The glock to put - * - * This function should only be used if the caller has its own reference - * to the glock, in addition to the one it is dropping. - */ - -void gfs2_glock_put_nolock(struct gfs2_glock *gl) -{ - if (atomic_dec_and_test(&gl->gl_ref)) - GLOCK_BUG_ON(gl, 1); -} - -/** * gfs2_glock_put() - Decrement reference count on glock * @gl: The glock to put * @@ -210,17 +200,22 @@ void gfs2_glock_put(struct gfs2_glock *gl) struct gfs2_sbd *sdp = gl->gl_sbd; struct address_space *mapping = gfs2_glock2aspace(gl); - if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { - __gfs2_glock_remove_from_lru(gl); - spin_unlock(&lru_lock); - spin_lock_bucket(gl->gl_hash); - hlist_bl_del_rcu(&gl->gl_list); - spin_unlock_bucket(gl->gl_hash); - GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); - GLOCK_BUG_ON(gl, mapping && mapping->nrpages); - trace_gfs2_glock_put(gl); - sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); - } + if (lockref_put_or_lock(&gl->gl_lockref)) + return; + + lockref_mark_dead(&gl->gl_lockref); + + spin_lock(&lru_lock); + __gfs2_glock_remove_from_lru(gl); + spin_unlock(&lru_lock); + spin_unlock(&gl->gl_lockref.lock); + spin_lock_bucket(gl->gl_hash); + hlist_bl_del_rcu(&gl->gl_list); + spin_unlock_bucket(gl->gl_hash); + GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); + GLOCK_BUG_ON(gl, mapping && mapping->nrpages); + trace_gfs2_glock_put(gl); + sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); } /** @@ -243,7 +238,7 @@ static struct gfs2_glock *search_bucket(unsigned int hash, continue; if (gl->gl_sbd != sdp) continue; - if (atomic_inc_not_zero(&gl->gl_ref)) + if (lockref_get_not_dead(&gl->gl_lockref)) return gl; } @@ -282,7 +277,7 @@ static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holde static void gfs2_holder_wake(struct gfs2_holder *gh) { clear_bit(HIF_WAIT, &gh->gh_iflags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&gh->gh_iflags, HIF_WAIT); } @@ -395,10 +390,11 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) held2 = (new_state != LM_ST_UNLOCKED); if (held1 != held2) { + GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref)); if (held2) - gfs2_glock_hold(gl); + gl->gl_lockref.count++; else - gfs2_glock_put_nolock(gl); + gl->gl_lockref.count--; } if (held1 && held2 && list_empty(&gl->gl_holders)) clear_bit(GLF_QUEUED, &gl->gl_flags); @@ -415,7 +411,7 @@ static void gfs2_demote_wake(struct gfs2_glock *gl) { gl->gl_demote_state = LM_ST_EXCLUSIVE; clear_bit(GLF_DEMOTE, &gl->gl_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&gl->gl_flags, GLF_DEMOTE); } @@ -474,7 +470,7 @@ retry: do_xmote(gl, gh, LM_ST_UNLOCKED); break; default: /* Everything else */ - printk(KERN_ERR "GFS2: wanted %u got %u\n", gl->gl_target, state); + pr_err("wanted %u got %u\n", gl->gl_target, state); GLOCK_BUG_ON(gl, 1); } spin_unlock(&gl->gl_spin); @@ -548,7 +544,7 @@ __acquires(&gl->gl_spin) /* lock_dlm */ ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); if (ret) { - printk(KERN_ERR "GFS2: lm_lock ret %d\n", ret); + pr_err("lm_lock ret %d\n", ret); GLOCK_BUG_ON(gl, 1); } } else { /* lock_nolock */ @@ -624,15 +620,15 @@ out: out_sched: clear_bit(GLF_LOCK, &gl->gl_flags); - smp_mb__after_clear_bit(); - gfs2_glock_hold(gl); + smp_mb__after_atomic(); + gl->gl_lockref.count++; if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) - gfs2_glock_put_nolock(gl); + gl->gl_lockref.count--; return; out_unlock: clear_bit(GLF_LOCK, &gl->gl_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); return; } @@ -735,14 +731,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, cachep = gfs2_glock_aspace_cachep; else cachep = gfs2_glock_cachep; - gl = kmem_cache_alloc(cachep, GFP_KERNEL); + gl = kmem_cache_alloc(cachep, GFP_NOFS); if (!gl) return -ENOMEM; memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); if (glops->go_flags & GLOF_LVB) { - gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_KERNEL); + gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_NOFS); if (!gl->gl_lksb.sb_lvbptr) { kmem_cache_free(cachep, gl); return -ENOMEM; @@ -753,7 +749,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_sbd = sdp; gl->gl_flags = 0; gl->gl_name = name; - atomic_set(&gl->gl_ref, 1); + gl->gl_lockref.count = 1; gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; @@ -911,7 +907,7 @@ int gfs2_glock_wait(struct gfs2_holder *gh) */ static void handle_callback(struct gfs2_glock *gl, unsigned int state, - unsigned long delay) + unsigned long delay, bool remote) { int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; @@ -924,8 +920,8 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, gl->gl_demote_state = LM_ST_UNLOCKED; } if (gl->gl_ops->go_callback) - gl->gl_ops->go_callback(gl); - trace_gfs2_demote_rq(gl); + gl->gl_ops->go_callback(gl, remote); + trace_gfs2_demote_rq(gl, remote); } void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) @@ -941,7 +937,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) vaf.fmt = fmt; vaf.va = &args; - printk(KERN_ERR " %pV", &vaf); + pr_err("%pV", &vaf); } va_end(args); @@ -1016,13 +1012,13 @@ do_cancel: return; trap_recursive: - print_symbol(KERN_ERR "original: %s\n", gh2->gh_ip); - printk(KERN_ERR "pid: %d\n", pid_nr(gh2->gh_owner_pid)); - printk(KERN_ERR "lock type: %d req lock state : %d\n", + pr_err("original: %pSR\n", (void *)gh2->gh_ip); + pr_err("pid: %d\n", pid_nr(gh2->gh_owner_pid)); + pr_err("lock type: %d req lock state : %d\n", gh2->gh_gl->gl_name.ln_type, gh2->gh_state); - print_symbol(KERN_ERR "new: %s\n", gh->gh_ip); - printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); - printk(KERN_ERR "lock type: %d req lock state : %d\n", + pr_err("new: %pSR\n", (void *)gh->gh_ip); + pr_err("pid: %d\n", pid_nr(gh->gh_owner_pid)); + pr_err("lock type: %d req lock state : %d\n", gh->gh_gl->gl_name.ln_type, gh->gh_state); gfs2_dump_glock(NULL, gl); BUG(); @@ -1051,9 +1047,13 @@ int gfs2_glock_nq(struct gfs2_holder *gh) spin_lock(&gl->gl_spin); add_to_queue(gh); - if ((LM_FLAG_NOEXP & gh->gh_flags) && - test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) + if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && + test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { set_bit(GLF_REPLY_PENDING, &gl->gl_flags); + gl->gl_lockref.count++; + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gl->gl_lockref.count--; + } run_queue(gl, 1); spin_unlock(&gl->gl_spin); @@ -1090,7 +1090,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) spin_lock(&gl->gl_spin); if (gh->gh_flags & GL_NOCACHE) - handle_callback(gl, LM_ST_UNLOCKED, 0); + handle_callback(gl, LM_ST_UNLOCKED, 0, false); list_del_init(&gh->gh_list); if (find_first_holder(gl) == NULL) { @@ -1278,19 +1278,6 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) gfs2_glock_dq(&ghs[num_gh]); } -/** - * gfs2_glock_dq_uninit_m - release multiple glocks - * @num_gh: the number of structures - * @ghs: an array of struct gfs2_holder structures - * - */ - -void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) -{ - while (num_gh--) - gfs2_glock_dq_uninit(&ghs[num_gh]); -} - void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) { unsigned long delay = 0; @@ -1308,7 +1295,7 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) } spin_lock(&gl->gl_spin); - handle_callback(gl, state, delay); + handle_callback(gl, state, delay, true); spin_unlock(&gl->gl_spin); if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); @@ -1368,70 +1355,132 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) } } - spin_unlock(&gl->gl_spin); + gl->gl_lockref.count++; set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - smp_wmb(); - gfs2_glock_hold(gl); + spin_unlock(&gl->gl_spin); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); } +static int glock_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct gfs2_glock *gla, *glb; + + gla = list_entry(a, struct gfs2_glock, gl_lru); + glb = list_entry(b, struct gfs2_glock, gl_lru); + + if (gla->gl_name.ln_number > glb->gl_name.ln_number) + return 1; + if (gla->gl_name.ln_number < glb->gl_name.ln_number) + return -1; + + return 0; +} + +/** + * gfs2_dispose_glock_lru - Demote a list of glocks + * @list: The list to dispose of + * + * Disposing of glocks may involve disk accesses, so that here we sort + * the glocks by number (i.e. disk location of the inodes) so that if + * there are any such accesses, they'll be sent in order (mostly). + * + * Must be called under the lru_lock, but may drop and retake this + * lock. While the lru_lock is dropped, entries may vanish from the + * list, but no new entries will appear on the list (since it is + * private) + */ -static int gfs2_shrink_glock_memory(struct shrinker *shrink, - struct shrink_control *sc) +static void gfs2_dispose_glock_lru(struct list_head *list) +__releases(&lru_lock) +__acquires(&lru_lock) { struct gfs2_glock *gl; - int may_demote; - int nr_skipped = 0; - int nr = sc->nr_to_scan; - gfp_t gfp_mask = sc->gfp_mask; - LIST_HEAD(skipped); - if (nr == 0) - goto out; + list_sort(NULL, list, glock_cmp); - if (!(gfp_mask & __GFP_FS)) - return -1; + while(!list_empty(list)) { + gl = list_entry(list->next, struct gfs2_glock, gl_lru); + list_del_init(&gl->gl_lru); + if (!spin_trylock(&gl->gl_spin)) { +add_back_to_lru: + list_add(&gl->gl_lru, &lru_list); + atomic_inc(&lru_count); + continue; + } + if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { + spin_unlock(&gl->gl_spin); + goto add_back_to_lru; + } + clear_bit(GLF_LRU, &gl->gl_flags); + gl->gl_lockref.count++; + if (demote_ok(gl)) + handle_callback(gl, LM_ST_UNLOCKED, 0, false); + WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gl->gl_lockref.count--; + spin_unlock(&gl->gl_spin); + cond_resched_lock(&lru_lock); + } +} + +/** + * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote + * @nr: The number of entries to scan + * + * This function selects the entries on the LRU which are able to + * be demoted, and then kicks off the process by calling + * gfs2_dispose_glock_lru() above. + */ + +static long gfs2_scan_glock_lru(int nr) +{ + struct gfs2_glock *gl; + LIST_HEAD(skipped); + LIST_HEAD(dispose); + long freed = 0; spin_lock(&lru_lock); - while(nr && !list_empty(&lru_list)) { + while ((nr-- >= 0) && !list_empty(&lru_list)) { gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); - list_del_init(&gl->gl_lru); - clear_bit(GLF_LRU, &gl->gl_flags); - atomic_dec(&lru_count); /* Test for being demotable */ - if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { - gfs2_glock_hold(gl); - spin_unlock(&lru_lock); - spin_lock(&gl->gl_spin); - may_demote = demote_ok(gl); - if (may_demote) { - handle_callback(gl, LM_ST_UNLOCKED, 0); - nr--; - } - clear_bit(GLF_LOCK, &gl->gl_flags); - smp_mb__after_clear_bit(); - if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) - gfs2_glock_put_nolock(gl); - spin_unlock(&gl->gl_spin); - spin_lock(&lru_lock); + if (!test_bit(GLF_LOCK, &gl->gl_flags)) { + list_move(&gl->gl_lru, &dispose); + atomic_dec(&lru_count); + freed++; continue; } - nr_skipped++; - list_add(&gl->gl_lru, &skipped); - set_bit(GLF_LRU, &gl->gl_flags); + + list_move(&gl->gl_lru, &skipped); } list_splice(&skipped, &lru_list); - atomic_add(nr_skipped, &lru_count); + if (!list_empty(&dispose)) + gfs2_dispose_glock_lru(&dispose); spin_unlock(&lru_lock); -out: - return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure; + + return freed; +} + +static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; + return gfs2_scan_glock_lru(sc->nr_to_scan); +} + +static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + return vfs_pressure_ratio(atomic_read(&lru_count)); } static struct shrinker glock_shrinker = { - .shrink = gfs2_shrink_glock_memory, .seeks = DEFAULT_SEEKS, + .count_objects = gfs2_glock_shrink_count, + .scan_objects = gfs2_glock_shrink_scan, }; /** @@ -1451,7 +1500,7 @@ static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, rcu_read_lock(); hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { - if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref)) + if ((gl->gl_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref)) examiner(gl); } rcu_read_unlock(); @@ -1471,18 +1520,17 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) * thaw_glock - thaw out a glock which has an unprocessed reply waiting * @gl: The glock to thaw * - * N.B. When we freeze a glock, we leave a ref to the glock outstanding, - * so this has to result in the ref count being dropped by one. */ static void thaw_glock(struct gfs2_glock *gl) { if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) - return; + goto out; set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - gfs2_glock_hold(gl); - if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) { +out: gfs2_glock_put(gl); + } } /** @@ -1497,9 +1545,8 @@ static void clear_glock(struct gfs2_glock *gl) spin_lock(&gl->gl_spin); if (gl->gl_state != LM_ST_UNLOCKED) - handle_callback(gl, LM_ST_UNLOCKED, 0); + handle_callback(gl, LM_ST_UNLOCKED, 0, false); spin_unlock(&gl->gl_spin); - gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); } @@ -1515,13 +1562,11 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp) glock_hash_walk(thaw_glock, sdp); } -static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) +static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl) { - int ret; spin_lock(&gl->gl_spin); - ret = gfs2_dump_glock(seq, gl); + gfs2_dump_glock(seq, gl); spin_unlock(&gl->gl_spin); - return ret; } static void dump_glock_func(struct gfs2_glock *gl) @@ -1540,6 +1585,7 @@ static void dump_glock_func(struct gfs2_glock *gl) void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); + flush_workqueue(glock_workqueue); glock_hash_walk(clear_glock, sdp); flush_workqueue(glock_workqueue); wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); @@ -1609,14 +1655,14 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) * @seq: the seq_file struct * @gh: the glock holder * - * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) +static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) { struct task_struct *gh_owner = NULL; char flags_buf[32]; + rcu_read_lock(); if (gh->gh_owner_pid) gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n", @@ -1626,7 +1672,7 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, gh_owner ? gh_owner->comm : "(ended)", (void *)gh->gh_ip); - return 0; + rcu_read_unlock(); } static const char *gflags2str(char *buf, const struct gfs2_glock *gl) @@ -1681,16 +1727,14 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) * example. The field's are n = number (id of the object), f = flags, * t = type, s = state, r = refcount, e = error, p = pid. * - * Returns: 0 on success, -ENOBUFS when we run out of space */ -int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) +void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) { const struct gfs2_glock_operations *glops = gl->gl_ops; unsigned long long dtime; const struct gfs2_holder *gh; char gflags_buf[32]; - int error = 0; dtime = jiffies - gl->gl_demote_time; dtime *= 1000000/HZ; /* demote time in uSec */ @@ -1705,17 +1749,13 @@ int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl) state2str(gl->gl_demote_state), dtime, atomic_read(&gl->gl_ail_count), atomic_read(&gl->gl_revokes), - atomic_read(&gl->gl_ref), gl->gl_hold_time); + (int)gl->gl_lockref.count, gl->gl_hold_time); + + list_for_each_entry(gh, &gl->gl_holders, gh_list) + dump_holder(seq, gh); - list_for_each_entry(gh, &gl->gl_holders, gh_list) { - error = dump_holder(seq, gh); - if (error) - goto out; - } if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump) - error = glops->go_dump(seq, gl); -out: - return error; + glops->go_dump(seq, gl); } static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr) @@ -1800,14 +1840,14 @@ int __init gfs2_glock_init(void) glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0); - if (IS_ERR(glock_workqueue)) - return PTR_ERR(glock_workqueue); + if (!glock_workqueue) + return -ENOMEM; gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); - if (IS_ERR(gfs2_delete_workqueue)) { + if (!gfs2_delete_workqueue) { destroy_workqueue(glock_workqueue); - return PTR_ERR(gfs2_delete_workqueue); + return -ENOMEM; } register_shrinker(&glock_shrinker); @@ -1861,7 +1901,8 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) gi->nhash = 0; } /* Skip entries for other sb and dead entries */ - } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0); + } while (gi->sdp != gi->gl->gl_sbd || + __lockref_is_dead(&gi->gl->gl_lockref)); return 0; } @@ -1912,7 +1953,8 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) { - return dump_glock(seq, iter_ptr); + dump_glock(seq, iter_ptr); + return 0; } static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index fd580b7861d..32572f71f02 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -181,8 +181,6 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl) extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); -extern void gfs2_glock_hold(struct gfs2_glock *gl); -extern void gfs2_glock_put_nolock(struct gfs2_glock *gl); extern void gfs2_glock_put(struct gfs2_glock *gl); extern void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, struct gfs2_holder *gh); @@ -201,8 +199,7 @@ extern int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number, struct gfs2_holder *gh); extern int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); extern void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); -extern void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); -extern int gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); +extern void gfs2_dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { gfs2_dump_glock(NULL, gl); BUG(); } } while(0) extern __printf(2, 3) void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 78d4184ffc7..2ffc67dce87 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -47,32 +47,28 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) * None of the buffers should be dirty, locked, or pinned. */ -static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) +static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, + unsigned int nr_revokes) { struct gfs2_sbd *sdp = gl->gl_sbd; struct list_head *head = &gl->gl_ail_list; struct gfs2_bufdata *bd, *tmp; struct buffer_head *bh; const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock); - sector_t blocknr; gfs2_log_lock(sdp); spin_lock(&sdp->sd_ail_lock); - list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) { + list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) { + if (nr_revokes == 0) + break; bh = bd->bd_bh; if (bh->b_state & b_state) { if (fsync) continue; gfs2_ail_error(gl, bh); } - blocknr = bh->b_blocknr; - bh->b_private = NULL; - gfs2_remove_from_ail(bd); /* drops ref on bh */ - - bd->bd_bh = NULL; - bd->bd_blkno = blocknr; - gfs2_trans_add_revoke(sdp, bd); + nr_revokes--; } GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); spin_unlock(&sdp->sd_ail_lock); @@ -86,40 +82,51 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) struct gfs2_trans tr; memset(&tr, 0, sizeof(tr)); + INIT_LIST_HEAD(&tr.tr_buf); + INIT_LIST_HEAD(&tr.tr_databuf); tr.tr_revokes = atomic_read(&gl->gl_ail_count); if (!tr.tr_revokes) return; - /* A shortened, inline version of gfs2_trans_begin() */ + /* A shortened, inline version of gfs2_trans_begin() + * tr->alloced is not set since the transaction structure is + * on the stack */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); tr.tr_ip = (unsigned long)__builtin_return_address(0); sb_start_intwrite(sdp->sd_vfs); - gfs2_log_reserve(sdp, tr.tr_reserved); + if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) { + sb_end_intwrite(sdp->sd_vfs); + return; + } WARN_ON_ONCE(current->journal_info); current->journal_info = &tr; - __gfs2_ail_flush(gl, 0); + __gfs2_ail_flush(gl, 0, tr.tr_revokes); gfs2_trans_end(sdp); - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) { struct gfs2_sbd *sdp = gl->gl_sbd; unsigned int revokes = atomic_read(&gl->gl_ail_count); + unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); int ret; if (!revokes) return; - ret = gfs2_trans_begin(sdp, 0, revokes); + while (revokes > max_revokes) + max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); + + ret = gfs2_trans_begin(sdp, 0, max_revokes); if (ret) return; - __gfs2_ail_flush(gl, fsync); + __gfs2_ail_flush(gl, fsync, max_revokes); gfs2_trans_end(sdp); - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } /** @@ -133,7 +140,8 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) static void rgrp_go_sync(struct gfs2_glock *gl) { - struct address_space *metamapping = gfs2_glock2aspace(gl); + struct gfs2_sbd *sdp = gl->gl_sbd; + struct address_space *mapping = &sdp->sd_aspace; struct gfs2_rgrpd *rgd; int error; @@ -141,10 +149,10 @@ static void rgrp_go_sync(struct gfs2_glock *gl) return; GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); - gfs2_log_flush(gl->gl_sbd, gl); - filemap_fdatawrite(metamapping); - error = filemap_fdatawait(metamapping); - mapping_set_error(metamapping, error); + gfs2_log_flush(sdp, gl, NORMAL_FLUSH); + filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end); + error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end); + mapping_set_error(mapping, error); gfs2_ail_empty_gl(gl); spin_lock(&gl->gl_spin); @@ -166,11 +174,12 @@ static void rgrp_go_sync(struct gfs2_glock *gl) static void rgrp_go_inval(struct gfs2_glock *gl, int flags) { - struct address_space *mapping = gfs2_glock2aspace(gl); + struct gfs2_sbd *sdp = gl->gl_sbd; + struct address_space *mapping = &sdp->sd_aspace; WARN_ON_ONCE(!(flags & DIO_METADATA)); - gfs2_assert_withdraw(gl->gl_sbd, !atomic_read(&gl->gl_ail_count)); - truncate_inode_pages(mapping, 0); + gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); + truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end); if (gl->gl_object) { struct gfs2_rgrpd *rgd = (struct gfs2_rgrpd *)gl->gl_object; @@ -192,14 +201,17 @@ static void inode_go_sync(struct gfs2_glock *gl) if (ip && !S_ISREG(ip->i_inode.i_mode)) ip = NULL; - if (ip && test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) - unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); + if (ip) { + if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) + unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0); + inode_dio_wait(&ip->i_inode); + } if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) return; GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE); - gfs2_log_flush(gl->gl_sbd, gl); + gfs2_log_flush(gl->gl_sbd, gl, NORMAL_FLUSH); filemap_fdatawrite(metamapping); if (ip) { struct address_space *mapping = ip->i_inode.i_mapping; @@ -214,7 +226,7 @@ static void inode_go_sync(struct gfs2_glock *gl) * Writeback of the data mapping may cause the dirty flag to be set * so we have to clear it again here. */ - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(GLF_DIRTY, &gl->gl_flags); } @@ -222,8 +234,8 @@ static void inode_go_sync(struct gfs2_glock *gl) * inode_go_inval - prepare a inode glock to be released * @gl: the glock * @flags: - * - * Normally we invlidate everything, but if we are moving into + * + * Normally we invalidate everything, but if we are moving into * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we * can keep hold of the metadata, since it won't have changed. * @@ -246,7 +258,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) } if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) { - gfs2_log_flush(gl->gl_sbd, NULL); + gfs2_log_flush(gl->gl_sbd, NULL, NORMAL_FLUSH); gl->gl_sbd->sd_rindex_uptodate = 0; } if (ip && S_ISREG(ip->i_inode.i_mode)) @@ -322,8 +334,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) break; }; - ip->i_inode.i_uid = be32_to_cpu(str->di_uid); - ip->i_inode.i_gid = be32_to_cpu(str->di_gid); + i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid)); + i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid)); gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink)); i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); @@ -410,6 +422,9 @@ static int inode_go_lock(struct gfs2_holder *gh) return error; } + if (gh->gh_state != LM_ST_DEFERRED) + inode_dio_wait(&ip->i_inode); + if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) && (gl->gl_state == LM_ST_EXCLUSIVE) && (gh->gh_state == LM_ST_EXCLUSIVE)) { @@ -429,49 +444,55 @@ static int inode_go_lock(struct gfs2_holder *gh) * @seq: The iterator * @ip: the inode * - * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) +static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) { const struct gfs2_inode *ip = gl->gl_object; if (ip == NULL) - return 0; + return; gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", (unsigned long long)ip->i_no_formal_ino, (unsigned long long)ip->i_no_addr, IF2DT(ip->i_inode.i_mode), ip->i_flags, (unsigned int)ip->i_diskflags, (unsigned long long)i_size_read(&ip->i_inode)); - return 0; } /** - * trans_go_sync - promote/demote the transaction glock + * freeze_go_sync - promote/demote the freeze glock * @gl: the glock * @state: the requested state * @flags: * */ -static void trans_go_sync(struct gfs2_glock *gl) +static void freeze_go_sync(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; + DEFINE_WAIT(wait); - if (gl->gl_state != LM_ST_UNLOCKED && + if (gl->gl_state == LM_ST_SHARED && test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { - gfs2_meta_syncfs(sdp); - gfs2_log_shutdown(sdp); + atomic_set(&sdp->sd_log_freeze, 1); + wake_up(&sdp->sd_logd_waitq); + do { + prepare_to_wait(&sdp->sd_log_frozen_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (atomic_read(&sdp->sd_log_freeze)) + io_schedule(); + } while(atomic_read(&sdp->sd_log_freeze)); + finish_wait(&sdp->sd_log_frozen_wait, &wait); } } /** - * trans_go_xmote_bh - After promoting/demoting the transaction glock + * freeze_go_xmote_bh - After promoting/demoting the freeze glock * @gl: the glock * */ -static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) +static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) { struct gfs2_sbd *sdp = gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); @@ -504,7 +525,7 @@ static int trans_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh) * Always returns 0 */ -static int trans_go_demote_ok(const struct gfs2_glock *gl) +static int freeze_go_demote_ok(const struct gfs2_glock *gl) { return 0; } @@ -515,19 +536,19 @@ static int trans_go_demote_ok(const struct gfs2_glock *gl) * * gl_spin lock is held while calling this */ -static void iopen_go_callback(struct gfs2_glock *gl) +static void iopen_go_callback(struct gfs2_glock *gl, bool remote) { struct gfs2_inode *ip = (struct gfs2_inode *)gl->gl_object; struct gfs2_sbd *sdp = gl->gl_sbd; - if (sdp->sd_vfs->s_flags & MS_RDONLY) + if (!remote || (sdp->sd_vfs->s_flags & MS_RDONLY)) return; if (gl->gl_demote_state == LM_ST_UNLOCKED && gl->gl_state == LM_ST_SHARED && ip) { - gfs2_glock_hold(gl); + gl->gl_lockref.count++; if (queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) - gfs2_glock_put_nolock(gl); + gl->gl_lockref.count--; } } @@ -552,13 +573,13 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_unlock = gfs2_rgrp_go_unlock, .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, - .go_flags = GLOF_ASPACE | GLOF_LVB, + .go_flags = GLOF_LVB, }; -const struct gfs2_glock_operations gfs2_trans_glops = { - .go_sync = trans_go_sync, - .go_xmote_bh = trans_go_xmote_bh, - .go_demote_ok = trans_go_demote_ok, +const struct gfs2_glock_operations gfs2_freeze_glops = { + .go_sync = freeze_go_sync, + .go_xmote_bh = freeze_go_xmote_bh, + .go_demote_ok = freeze_go_demote_ok, .go_type = LM_TYPE_NONDISK, }; diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h index bf95a2dc166..7455d2629bc 100644 --- a/fs/gfs2/glops.h +++ b/fs/gfs2/glops.h @@ -15,7 +15,7 @@ extern const struct gfs2_glock_operations gfs2_meta_glops; extern const struct gfs2_glock_operations gfs2_inode_glops; extern const struct gfs2_glock_operations gfs2_rgrp_glops; -extern const struct gfs2_glock_operations gfs2_trans_glops; +extern const struct gfs2_glock_operations gfs2_freeze_glops; extern const struct gfs2_glock_operations gfs2_iopen_glops; extern const struct gfs2_glock_operations gfs2_flock_glops; extern const struct gfs2_glock_operations gfs2_nondisk_glops; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index c373a24fedd..67d310c9ada 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -21,6 +21,7 @@ #include <linux/rbtree.h> #include <linux/ktime.h> #include <linux/percpu.h> +#include <linux/lockref.h> #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -31,7 +32,6 @@ struct gfs2_holder; struct gfs2_glock; struct gfs2_quota_data; struct gfs2_trans; -struct gfs2_ail; struct gfs2_jdesc; struct gfs2_sbd; struct lm_lockops; @@ -52,9 +52,8 @@ struct gfs2_log_header_host { */ struct gfs2_log_operations { - void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); - void (*lo_before_commit) (struct gfs2_sbd *sdp); - void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); + void (*lo_before_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); + void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); void (*lo_before_scan) (struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, int pass); int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start, @@ -73,6 +72,7 @@ struct gfs2_bitmap { u32 bi_offset; u32 bi_start; u32 bi_len; + u32 bi_blocks; }; struct gfs2_rgrpd { @@ -93,6 +93,7 @@ struct gfs2_rgrpd { struct gfs2_rgrp_lvb *rd_rgl; u32 rd_last_alloc; u32 rd_flags; + u32 rd_extfail_pt; /* extent failure point */ #define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ #define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ #define GFS2_RDF_ERROR 0x40000000 /* error in rg */ @@ -103,19 +104,25 @@ struct gfs2_rgrpd { struct gfs2_rbm { struct gfs2_rgrpd *rgd; - struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */ u32 offset; /* The offset is bitmap relative */ + int bii; /* Bitmap index */ }; +static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm) +{ + return rbm->rgd->rd_bits + rbm->bii; +} + static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) { - return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset; + return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) + + rbm->offset; } static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1, const struct gfs2_rbm *rbm2) { - return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) && + return (rbm1->rgd == rbm2->rgd) && (rbm1->bii == rbm2->bii) && (rbm1->offset == rbm2->offset); } @@ -140,7 +147,7 @@ struct gfs2_bufdata { struct list_head bd_list; const struct gfs2_log_operations *bd_ops; - struct gfs2_ail *bd_ail; + struct gfs2_trans *bd_tr; struct list_head bd_ail_st_list; struct list_head bd_ail_gl_list; }; @@ -211,8 +218,8 @@ struct gfs2_glock_operations { int (*go_demote_ok) (const struct gfs2_glock *gl); int (*go_lock) (struct gfs2_holder *gh); void (*go_unlock) (struct gfs2_holder *gh); - int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); - void (*go_callback) (struct gfs2_glock *gl); + void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); + void (*go_callback)(struct gfs2_glock *gl, bool remote); const int go_type; const unsigned long go_flags; #define GLOF_ASPACE 1 @@ -280,6 +287,20 @@ struct gfs2_blkreserv { unsigned int rs_qa_qd_num; }; +/* + * Allocation parameters + * @target: The number of blocks we'd ideally like to allocate + * @aflags: The flags (e.g. Orlov flag) + * + * The intent is to gradually expand this structure over time in + * order to give more information, e.g. alignment, min extent size + * to the allocation code. + */ +struct gfs2_alloc_parms { + u32 target; + u32 aflags; +}; + enum { GLF_LOCK = 1, GLF_DEMOTE = 3, @@ -302,9 +323,9 @@ struct gfs2_glock { struct gfs2_sbd *gl_sbd; unsigned long gl_flags; /* GLF_... */ struct lm_lockname gl_name; - atomic_t gl_ref; - spinlock_t gl_spin; + struct lockref gl_lockref; +#define gl_spin gl_lockref.lock /* State fields protected by gl_spin */ unsigned int gl_state:2, /* Current state */ @@ -330,7 +351,15 @@ struct gfs2_glock { atomic_t gl_ail_count; atomic_t gl_revokes; struct delayed_work gl_work; - struct work_struct gl_delete; + union { + /* For inode and iopen glocks only */ + struct work_struct gl_delete; + /* For rgrp glocks only */ + struct { + loff_t start; + loff_t end; + } gl_vm; + }; struct rcu_head gl_rcu; }; @@ -341,6 +370,8 @@ enum { GIF_QD_LOCKED = 1, GIF_ALLOC_FAILED = 2, GIF_SW_PAGED = 3, + GIF_ORDERED = 4, + GIF_FREE_VFS_INODE = 5, }; struct gfs2_inode { @@ -357,6 +388,7 @@ struct gfs2_inode { struct gfs2_rgrpd *i_rgd; u64 i_goal; /* goal block for allocations */ struct rw_semaphore i_rw_mutex; + struct list_head i_ordered; struct list_head i_trunc_list; __be64 *i_hash_cache; u32 i_entries; @@ -391,19 +423,20 @@ struct gfs2_revoke_replay { }; enum { - QDF_USER = 0, QDF_CHANGE = 1, QDF_LOCKED = 2, QDF_REFRESH = 3, }; struct gfs2_quota_data { + struct hlist_bl_node qd_hlist; struct list_head qd_list; - struct list_head qd_reclaim; - - atomic_t qd_count; + struct kqid qd_id; + struct gfs2_sbd *qd_sbd; + struct lockref qd_lockref; + struct list_head qd_lru; + unsigned qd_hash; - u32 qd_id; unsigned long qd_flags; /* QDF_... */ s64 qd_change; @@ -421,6 +454,7 @@ struct gfs2_quota_data { u64 qd_sync_gen; unsigned long qd_last_warn; + struct rcu_head qd_rcu; }; struct gfs2_trans { @@ -429,10 +463,9 @@ struct gfs2_trans { unsigned int tr_blocks; unsigned int tr_revokes; unsigned int tr_reserved; - - struct gfs2_holder tr_t_gh; - - int tr_touched; + unsigned int tr_touched:1; + unsigned int tr_attached:1; + unsigned int tr_alloced:1; unsigned int tr_num_buf_new; unsigned int tr_num_databuf_new; @@ -440,18 +473,18 @@ struct gfs2_trans { unsigned int tr_num_databuf_rm; unsigned int tr_num_revoke; unsigned int tr_num_revoke_rm; -}; -struct gfs2_ail { - struct list_head ai_list; + struct list_head tr_list; + struct list_head tr_databuf; + struct list_head tr_buf; - unsigned int ai_first; - struct list_head ai_ail1_list; - struct list_head ai_ail2_list; + unsigned int tr_first; + struct list_head tr_ail1_list; + struct list_head tr_ail2_list; }; struct gfs2_journal_extent { - struct list_head extent_list; + struct list_head list; unsigned int lblock; /* First logical block */ u64 dblock; /* First disk block */ @@ -461,6 +494,7 @@ struct gfs2_journal_extent { struct gfs2_jdesc { struct list_head jd_list; struct list_head extent_list; + unsigned int nr_extents; struct work_struct jd_work; struct inode *jd_inode; unsigned long jd_flags; @@ -468,6 +502,15 @@ struct gfs2_jdesc { unsigned int jd_jid; unsigned int jd_blocks; int jd_recover_error; + /* Replay stuff */ + + unsigned int jd_found_blocks; + unsigned int jd_found_revokes; + unsigned int jd_replayed_blocks; + + struct list_head jd_revoke_list; + unsigned int jd_replay_tail; + }; struct gfs2_statfs_change_host { @@ -518,7 +561,6 @@ struct gfs2_tune { unsigned int gt_logd_secs; - unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */ unsigned int gt_quota_scale_num; /* Numerator */ unsigned int gt_quota_scale_den; /* Denominator */ @@ -588,6 +630,7 @@ struct lm_lockstruct { struct dlm_lksb ls_control_lksb; /* control_lock */ char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */ struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */ + char *ls_lvb_bits; spinlock_t ls_recover_spin; /* protects following fields */ unsigned long ls_recover_flags; /* DFL_ */ @@ -637,10 +680,11 @@ struct gfs2_sbd { struct lm_lockstruct sd_lockstruct; struct gfs2_holder sd_live_gh; struct gfs2_glock *sd_rename_gl; - struct gfs2_glock *sd_trans_gl; + struct gfs2_glock *sd_freeze_gl; wait_queue_head_t sd_glock_wait; atomic_t sd_glock_disposal; struct completion sd_locking_init; + struct completion sd_wdack; struct delayed_work sd_control_work; /* Inode Stuff */ @@ -684,6 +728,8 @@ struct gfs2_sbd { struct gfs2_holder sd_sc_gh; struct gfs2_holder sd_qc_gh; + struct completion sd_journal_ready; + /* Daemon stuff */ struct task_struct *sd_logd_process; @@ -694,35 +740,33 @@ struct gfs2_sbd { struct list_head sd_quota_list; atomic_t sd_quota_count; struct mutex sd_quota_mutex; + struct mutex sd_quota_sync_mutex; wait_queue_head_t sd_quota_wait; struct list_head sd_trunc_list; spinlock_t sd_trunc_lock; unsigned int sd_quota_slots; - unsigned int sd_quota_chunks; - unsigned char **sd_quota_bitmap; + unsigned long *sd_quota_bitmap; + spinlock_t sd_bitmap_lock; u64 sd_quota_sync_gen; /* Log stuff */ + struct address_space sd_aspace; + spinlock_t sd_log_lock; + struct gfs2_trans *sd_log_tr; unsigned int sd_log_blks_reserved; - unsigned int sd_log_commited_buf; - unsigned int sd_log_commited_databuf; int sd_log_commited_revoke; atomic_t sd_log_pinned; - unsigned int sd_log_num_buf; unsigned int sd_log_num_revoke; - unsigned int sd_log_num_rg; - unsigned int sd_log_num_databuf; - struct list_head sd_log_le_buf; struct list_head sd_log_le_revoke; - struct list_head sd_log_le_databuf; struct list_head sd_log_le_ordered; + spinlock_t sd_ordered_lock; atomic_t sd_log_thresh1; atomic_t sd_log_thresh2; @@ -748,20 +792,14 @@ struct gfs2_sbd { struct list_head sd_ail1_list; struct list_head sd_ail2_list; - /* Replay stuff */ - - struct list_head sd_revoke_list; - unsigned int sd_replay_tail; - - unsigned int sd_found_blocks; - unsigned int sd_found_revokes; - unsigned int sd_replayed_blocks; - /* For quiescing the filesystem */ - struct gfs2_holder sd_freeze_gh; - struct mutex sd_freeze_lock; - unsigned int sd_freeze_count; + struct gfs2_holder sd_freeze_root_gh; + struct gfs2_holder sd_thaw_gh; + atomic_t sd_log_freeze; + atomic_t sd_frozen_root; + wait_queue_head_t sd_frozen_root_wait; + wait_queue_head_t sd_log_frozen_wait; char sd_fsname[GFS2_FSNAME_LEN]; char sd_table_name[GFS2_FSNAME_LEN]; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 2b6f5698ef1..e62e5947788 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -149,7 +149,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, ip = GFS2_I(inode); if (!inode) - return ERR_PTR(-ENOBUFS); + return ERR_PTR(-ENOMEM); if (inode->i_state & I_NEW) { struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -189,6 +189,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, return inode; fail_refresh: + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; ip->i_iopen_gh.gh_gl->gl_object = NULL; gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_iopen: @@ -312,7 +313,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, goto out; } - inode = gfs2_dir_search(dir, name); + inode = gfs2_dir_search(dir, name, false); if (IS_ERR(inode)) error = PTR_ERR(inode); out: @@ -345,17 +346,6 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, if (!dip->i_inode.i_nlink) return -ENOENT; - error = gfs2_dir_check(&dip->i_inode, name, NULL); - switch (error) { - case -ENOENT: - error = 0; - break; - case 0: - return -EEXIST; - default: - return error; - } - if (dip->i_entries == (u32)-1) return -EFBIG; if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) @@ -368,10 +358,11 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip, struct inode *inode) { if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && - (dip->i_inode.i_mode & S_ISUID) && dip->i_inode.i_uid) { + (dip->i_inode.i_mode & S_ISUID) && + !uid_eq(dip->i_inode.i_uid, GLOBAL_ROOT_UID)) { if (S_ISDIR(inode->i_mode)) inode->i_mode |= S_ISUID; - else if (dip->i_inode.i_uid != current_fsuid()) + else if (!uid_eq(dip->i_inode.i_uid, current_fsuid())) inode->i_mode &= ~07111; inode->i_uid = dip->i_inode.i_uid; } else @@ -385,21 +376,25 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip, inode->i_gid = current_fsgid(); } -static int alloc_dinode(struct gfs2_inode *ip, u32 flags) +static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, }; int error; - int dblocks = 1; - error = gfs2_inplace_reserve(ip, RES_DINODE, flags); + error = gfs2_quota_lock_check(ip); if (error) goto out; - error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS, 0); + error = gfs2_inplace_reserve(ip, &ap); + if (error) + goto out_quota; + + error = gfs2_trans_begin(sdp, (*dblocks * RES_RG_BIT) + RES_STATFS + RES_QUOTA, 0); if (error) goto out_ipreserv; - error = gfs2_alloc_blocks(ip, &ip->i_no_addr, &dblocks, 1, &ip->i_generation); + error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation); ip->i_no_formal_ino = ip->i_generation; ip->i_inode.i_ino = ip->i_no_addr; ip->i_goal = ip->i_no_addr; @@ -408,6 +403,8 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags) out_ipreserv: gfs2_inplace_release(ip); +out_quota: + gfs2_quota_unlock(ip); out: return error; } @@ -430,6 +427,33 @@ static void gfs2_init_dir(struct buffer_head *dibh, } /** + * gfs2_init_xattr - Initialise an xattr block for a new inode + * @ip: The inode in question + * + * This sets up an empty xattr block for a new inode, ready to + * take any ACLs, LSM xattrs, etc. + */ + +static void gfs2_init_xattr(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct buffer_head *bh; + struct gfs2_ea_header *ea; + + bh = gfs2_meta_new(ip->i_gl, ip->i_eattr); + gfs2_trans_add_meta(ip->i_gl, bh); + gfs2_metatype_set(bh, GFS2_METATYPE_EA, GFS2_FORMAT_EA); + gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); + + ea = GFS2_EA_BH2FIRST(bh); + ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize); + ea->ea_type = GFS2_EATYPE_UNUSED; + ea->ea_flags = GFS2_EAFLAG_LAST; + + brelse(bh); +} + +/** * init_dinode - Fill in a new dinode structure * @dip: The directory this inode is being created in * @ip: The inode @@ -439,59 +463,27 @@ static void gfs2_init_dir(struct buffer_head *dibh, */ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, - const char *symname, struct buffer_head **bhp) + const char *symname) { - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_dinode *di; struct buffer_head *dibh; - struct timespec tv = CURRENT_TIME; dibh = gfs2_meta_new(ip->i_gl, ip->i_no_addr); - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); - gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + gfs2_trans_add_meta(ip->i_gl, dibh); di = (struct gfs2_dinode *)dibh->b_data; + gfs2_dinode_out(ip, di); - di->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); - di->di_num.no_addr = cpu_to_be64(ip->i_no_addr); - di->di_mode = cpu_to_be32(ip->i_inode.i_mode); - di->di_uid = cpu_to_be32(ip->i_inode.i_uid); - di->di_gid = cpu_to_be32(ip->i_inode.i_gid); - di->di_nlink = 0; - di->di_size = cpu_to_be64(ip->i_inode.i_size); - di->di_blocks = cpu_to_be64(1); - di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev)); di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev)); - di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_no_addr); - di->di_generation = cpu_to_be64(ip->i_generation); - di->di_flags = 0; di->__pad1 = 0; - di->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) ? GFS2_FORMAT_DE : 0); - di->di_height = 0; di->__pad2 = 0; di->__pad3 = 0; - di->di_depth = 0; - di->di_entries = 0; memset(&di->__pad4, 0, sizeof(di->__pad4)); - di->di_eattr = 0; - di->di_atime_nsec = cpu_to_be32(tv.tv_nsec); - di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); - di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); memset(&di->di_reserved, 0, sizeof(di->di_reserved)); + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); switch(ip->i_inode.i_mode & S_IFMT) { - case S_IFREG: - if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || - gfs2_tune_get(sdp, gt_new_files_jdata)) - di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); - break; case S_IFDIR: - di->di_flags |= cpu_to_be32(dip->i_diskflags & - GFS2_DIF_INHERIT_JDATA); - di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA); - di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)); - di->di_entries = cpu_to_be32(2); gfs2_init_dir(dibh, dip); break; case S_IFLNK: @@ -500,74 +492,48 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, } set_buffer_uptodate(dibh); - - *bhp = dibh; + brelse(dibh); } -static int make_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip, - const char *symname, struct buffer_head **bhp) -{ - struct inode *inode = &ip->i_inode; - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - int error; - - error = gfs2_rindex_update(sdp); - if (error) - return error; - - error = gfs2_quota_lock(dip, inode->i_uid, inode->i_gid); - if (error) - return error; - - error = gfs2_quota_check(dip, inode->i_uid, inode->i_gid); - if (error) - goto out_quota; - - error = gfs2_trans_begin(sdp, RES_DINODE + RES_QUOTA, 0); - if (error) - goto out_quota; - - init_dinode(dip, ip, symname, bhp); - gfs2_quota_change(dip, +1, inode->i_uid, inode->i_gid); - gfs2_trans_end(sdp); +/** + * gfs2_trans_da_blocks - Calculate number of blocks to link inode + * @dip: The directory we are linking into + * @da: The dir add information + * @nr_inodes: The number of inodes involved + * + * This calculate the number of blocks we need to reserve in a + * transaction to link @nr_inodes into a directory. In most cases + * @nr_inodes will be 2 (the directory plus the inode being linked in) + * but in case of rename, 4 may be required. + * + * Returns: Number of blocks + */ -out_quota: - gfs2_quota_unlock(dip); - return error; +static unsigned gfs2_trans_da_blks(const struct gfs2_inode *dip, + const struct gfs2_diradd *da, + unsigned nr_inodes) +{ + return da->nr_blocks + gfs2_rg_blocks(dip, da->nr_blocks) + + (nr_inodes * RES_DINODE) + RES_QUOTA + RES_STATFS; } static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, - struct gfs2_inode *ip) + struct gfs2_inode *ip, struct gfs2_diradd *da) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - int alloc_required; - struct buffer_head *dibh; + struct gfs2_alloc_parms ap = { .target = da->nr_blocks, }; int error; - error = gfs2_rindex_update(sdp); - if (error) - return error; - - error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); - if (error) - goto fail; - - error = alloc_required = gfs2_diradd_alloc_required(&dip->i_inode, name); - if (alloc_required < 0) - goto fail_quota_locks; - if (alloc_required) { - error = gfs2_quota_check(dip, dip->i_inode.i_uid, dip->i_inode.i_gid); + if (da->nr_blocks) { + error = gfs2_quota_lock_check(dip); if (error) goto fail_quota_locks; - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); + error = gfs2_inplace_reserve(dip, &ap); if (error) goto fail_quota_locks; - error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - dip->i_rgd->rd_length + - 2 * RES_DINODE + - RES_STATFS + RES_QUOTA, 0); + error = gfs2_trans_begin(sdp, gfs2_trans_da_blks(dip, da, 2), 0); if (error) goto fail_ipreserv; } else { @@ -576,30 +542,16 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name, goto fail_quota_locks; } - error = gfs2_dir_add(&dip->i_inode, name, ip); + error = gfs2_dir_add(&dip->i_inode, name, ip, da); if (error) goto fail_end_trans; - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - goto fail_end_trans; - set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1); - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); - return 0; - fail_end_trans: gfs2_trans_end(sdp); - fail_ipreserv: - if (alloc_required) - gfs2_inplace_release(dip); - + gfs2_inplace_release(dip); fail_quota_locks: gfs2_quota_unlock(dip); - -fail: return error; } @@ -619,17 +571,11 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, return err; } -static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, - const struct qstr *qstr) -{ - return security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr, - &gfs2_initxattrs, NULL); -} - /** * gfs2_create_inode - Create a new inode * @dir: The parent directory * @dentry: The new dentry + * @file: If non-NULL, the file which is being opened * @mode: The permissions on the new inode * @dev: For device nodes, this is the device number * @symname: For symlinks, this is the link destination @@ -639,18 +585,22 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, */ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, + struct file *file, umode_t mode, dev_t dev, const char *symname, - unsigned int size, int excl) + unsigned int size, int excl, int *opened) { const struct qstr *name = &dentry->d_name; + struct posix_acl *default_acl, *acl; struct gfs2_holder ghs[2]; struct inode *inode = NULL; struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_glock *io_gl; - int error; - struct buffer_head *bh = NULL; + struct dentry *d; + int error, free_vfs_inode = 0; u32 aflags = 0; + unsigned blocks = 1; + struct gfs2_diradd da = { .bh = NULL, }; if (!name->len || name->len > GFS2_FNAMESIZE) return -ENAMETOOLONG; @@ -659,45 +609,102 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) return error; + error = gfs2_rindex_update(sdp); + if (error) + return error; + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); if (error) goto fail; error = create_ok(dip, name, mode); - if ((error == -EEXIST) && S_ISREG(mode) && !excl) { - inode = gfs2_lookupi(dir, &dentry->d_name, 0); - gfs2_glock_dq_uninit(ghs); - d_instantiate(dentry, inode); - return IS_ERR(inode) ? PTR_ERR(inode) : 0; - } if (error) goto fail_gunlock; - inode = new_inode(sdp->sd_vfs); - if (!inode) { + inode = gfs2_dir_search(dir, &dentry->d_name, !S_ISREG(mode) || excl); + error = PTR_ERR(inode); + if (!IS_ERR(inode)) { + d = d_splice_alias(inode, dentry); + error = PTR_ERR(d); + if (IS_ERR(d)) + goto fail_gunlock; + error = 0; + if (file) { + if (S_ISREG(inode->i_mode)) { + WARN_ON(d != NULL); + error = finish_open(file, dentry, gfs2_open_common, opened); + } else { + error = finish_no_open(file, d); + } + } else { + dput(d); + } gfs2_glock_dq_uninit(ghs); - return -ENOMEM; + return error; + } else if (error != -ENOENT) { + goto fail_gunlock; } + + error = gfs2_diradd_alloc_required(dir, name, &da); + if (error < 0) + goto fail_gunlock; + + inode = new_inode(sdp->sd_vfs); + error = -ENOMEM; + if (!inode) + goto fail_gunlock; + + error = posix_acl_create(dir, &mode, &default_acl, &acl); + if (error) + goto fail_free_vfs_inode; + ip = GFS2_I(inode); error = gfs2_rs_alloc(ip); if (error) - goto fail_free_inode; + goto fail_free_acls; - set_bit(GIF_INVALID, &ip->i_flags); inode->i_mode = mode; + set_nlink(inode, S_ISDIR(mode) ? 2 : 1); inode->i_rdev = dev; inode->i_size = size; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + gfs2_set_inode_blocks(inode, 1); munge_mode_uid_gid(dip, inode); ip->i_goal = dip->i_goal; + ip->i_diskflags = 0; + ip->i_eattr = 0; + ip->i_height = 0; + ip->i_depth = 0; + ip->i_entries = 0; + + switch(mode & S_IFMT) { + case S_IFREG: + if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) || + gfs2_tune_get(sdp, gt_new_files_jdata)) + ip->i_diskflags |= GFS2_DIF_JDATA; + gfs2_set_aops(inode); + break; + case S_IFDIR: + ip->i_diskflags |= (dip->i_diskflags & GFS2_DIF_INHERIT_JDATA); + ip->i_diskflags |= GFS2_DIF_JDATA; + ip->i_entries = 2; + break; + } + gfs2_set_inode_flags(inode); if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) || (dip->i_diskflags & GFS2_DIF_TOPDIR)) aflags |= GFS2_AF_ORLOV; - error = alloc_dinode(ip, aflags); + if (default_acl || acl) + blocks++; + + error = alloc_dinode(ip, aflags, &blocks); if (error) goto fail_free_inode; + gfs2_set_inode_blocks(inode, blocks); + error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); if (error) goto fail_free_inode; @@ -707,10 +714,17 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_free_inode; - error = make_dinode(dip, ip, symname, &bh); + error = gfs2_trans_begin(sdp, blocks, 0); if (error) goto fail_gunlock2; + if (blocks > 1) { + ip->i_eattr = ip->i_no_addr + 1; + gfs2_init_xattr(ip); + } + init_dinode(dip, ip, symname); + gfs2_trans_end(sdp); + error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl); if (error) goto fail_gunlock2; @@ -724,32 +738,37 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_set_iop(inode); insert_inode_hash(inode); - error = gfs2_inode_refresh(ip); - if (error) - goto fail_gunlock3; + if (default_acl) { + error = gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + posix_acl_release(default_acl); + } + if (acl) { + if (!error) + error = gfs2_set_acl(inode, acl, ACL_TYPE_ACCESS); + posix_acl_release(acl); + } - error = gfs2_acl_create(dip, inode); if (error) goto fail_gunlock3; - error = gfs2_security_init(dip, ip, name); + error = security_inode_init_security(&ip->i_inode, &dip->i_inode, name, + &gfs2_initxattrs, NULL); if (error) goto fail_gunlock3; - error = link_dinode(dip, name, ip); + error = link_dinode(dip, name, ip, &da); if (error) goto fail_gunlock3; - if (bh) - brelse(bh); - - gfs2_trans_end(sdp); - gfs2_inplace_release(dip); - gfs2_quota_unlock(dip); mark_inode_dirty(inode); - gfs2_glock_dq_uninit_m(2, ghs); d_instantiate(dentry, inode); - return 0; + if (file) { + *opened |= FILE_CREATED; + error = finish_open(file, dentry, gfs2_open_common, opened); + } + gfs2_glock_dq_uninit(ghs); + gfs2_glock_dq_uninit(ghs + 1); + return error; fail_gunlock3: gfs2_glock_dq_uninit(ghs + 1); @@ -762,18 +781,26 @@ fail_gunlock2: fail_free_inode: if (ip->i_gl) gfs2_glock_put(ip->i_gl); - gfs2_rs_delete(ip); - free_inode_nonrcu(inode); - inode = NULL; + gfs2_rs_delete(ip, NULL); +fail_free_acls: + if (default_acl) + posix_acl_release(default_acl); + if (acl) + posix_acl_release(acl); +fail_free_vfs_inode: + free_vfs_inode = 1; fail_gunlock: + gfs2_dir_no_add(&da); gfs2_glock_dq_uninit(ghs); if (inode && !IS_ERR(inode)) { - set_bit(GIF_ALLOC_FAILED, &GFS2_I(inode)->i_flags); + clear_nlink(inode); + if (!free_vfs_inode) + mark_inode_dirty(inode); + set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED, + &GFS2_I(inode)->i_flags); iput(inode); } fail: - if (bh) - brelse(bh); return error; } @@ -789,36 +816,63 @@ fail: static int gfs2_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl); + return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl, NULL); } /** - * gfs2_lookup - Look up a filename in a directory and return its inode + * __gfs2_lookup - Look up a filename in a directory and return its inode * @dir: The directory inode * @dentry: The dentry of the new inode - * @nd: passed from Linux VFS, ignored by us + * @file: File to be opened + * @opened: atomic_open flags * - * Called by the VFS layer. Lock dir and call gfs2_lookupi() * * Returns: errno */ -static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) +static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, + struct file *file, int *opened) { - struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0); - if (inode && !IS_ERR(inode)) { - struct gfs2_glock *gl = GFS2_I(inode)->i_gl; - struct gfs2_holder gh; - int error; - error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); - if (error) { - iput(inode); - return ERR_PTR(error); - } + struct inode *inode; + struct dentry *d; + struct gfs2_holder gh; + struct gfs2_glock *gl; + int error; + + inode = gfs2_lookupi(dir, &dentry->d_name, 0); + if (!inode) + return NULL; + if (IS_ERR(inode)) + return ERR_CAST(inode); + + gl = GFS2_I(inode)->i_gl; + error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + if (error) { + iput(inode); + return ERR_PTR(error); + } + + d = d_splice_alias(inode, dentry); + if (IS_ERR(d)) { + iput(inode); gfs2_glock_dq_uninit(&gh); + return d; } - return d_splice_alias(inode, dentry); + if (file && S_ISREG(inode->i_mode)) + error = finish_open(file, dentry, gfs2_open_common, opened); + + gfs2_glock_dq_uninit(&gh); + if (error) { + dput(d); + return ERR_PTR(error); + } + return d; +} + +static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, + unsigned flags) +{ + return __gfs2_lookup(dir, dentry, NULL, NULL); } /** @@ -842,7 +896,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder ghs[2]; struct buffer_head *dibh; - int alloc_required; + struct gfs2_diradd da = { .bh = NULL, }; int error; if (S_ISDIR(inode->i_mode)) @@ -897,24 +951,21 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (ip->i_inode.i_nlink == (u32)-1) goto out_gunlock; - alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name); + error = gfs2_diradd_alloc_required(dir, &dentry->d_name, &da); if (error < 0) goto out_gunlock; - error = 0; - if (alloc_required) { + if (da.nr_blocks) { + struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; error = gfs2_quota_lock_check(dip); if (error) goto out_gunlock; - error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres, 0); + error = gfs2_inplace_reserve(dip, &ap); if (error) goto out_gunlock_q; - error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(dip, sdp->sd_max_dirres) + - 2 * RES_DINODE + RES_STATFS + - RES_QUOTA, 0); + error = gfs2_trans_begin(sdp, gfs2_trans_da_blks(dip, &da, 2), 0); if (error) goto out_ipres; } else { @@ -927,11 +978,11 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, if (error) goto out_end_trans; - error = gfs2_dir_add(dir, &dentry->d_name, ip); + error = gfs2_dir_add(dir, &dentry->d_name, ip, &da); if (error) goto out_brelse; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); inc_nlink(&ip->i_inode); ip->i_inode.i_ctime = CURRENT_TIME; ihold(inode); @@ -943,12 +994,13 @@ out_brelse: out_end_trans: gfs2_trans_end(sdp); out_ipres: - if (alloc_required) + if (da.nr_blocks) gfs2_inplace_release(dip); out_gunlock_q: - if (alloc_required) + if (da.nr_blocks) gfs2_quota_unlock(dip); out_gunlock: + gfs2_dir_no_add(&da); gfs2_glock_dq(ghs + 1); out_child: gfs2_glock_dq(ghs); @@ -978,8 +1030,8 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, return -EPERM; if ((dip->i_inode.i_mode & S_ISVTX) && - dip->i_inode.i_uid != current_fsuid() && - ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER)) + !uid_eq(dip->i_inode.i_uid, current_fsuid()) && + !uid_eq(ip->i_inode.i_uid, current_fsuid()) && !capable(CAP_FOWNER)) return -EPERM; if (IS_APPEND(&dip->i_inode)) @@ -1136,7 +1188,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) return -ENAMETOOLONG; - return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size, 0); + return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0, NULL); } /** @@ -1150,7 +1202,9 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { - return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0); + struct gfs2_sbd *sdp = GFS2_SB(dir); + unsigned dsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); + return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0, NULL); } /** @@ -1165,7 +1219,48 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { - return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0); + return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0, NULL); +} + +/** + * gfs2_atomic_open - Atomically open a file + * @dir: The directory + * @dentry: The proposed new entry + * @file: The proposed new struct file + * @flags: open flags + * @mode: File mode + * @opened: Flag to say whether the file has been opened or not + * + * Returns: error code or 0 for success + */ + +static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, + struct file *file, unsigned flags, + umode_t mode, int *opened) +{ + struct dentry *d; + bool excl = !!(flags & O_EXCL); + + d = __gfs2_lookup(dir, dentry, file, opened); + if (IS_ERR(d)) + return PTR_ERR(d); + if (d != NULL) + dentry = d; + if (dentry->d_inode) { + if (!(*opened & FILE_OPENED)) { + if (d == NULL) + dget(dentry); + return finish_no_open(file, dentry); + } + dput(d); + return 0; + } + + BUG_ON(d != NULL); + if (!(flags & O_CREAT)) + return -ENOENT; + + return gfs2_create_inode(dir, dentry, file, S_IFREG | mode, 0, NULL, 0, excl, opened); } /* @@ -1199,6 +1294,10 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) } tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); + if (!tmp) { + error = -ENOENT; + break; + } if (IS_ERR(tmp)) { error = PTR_ERR(tmp); break; @@ -1235,7 +1334,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, struct gfs2_rgrpd *nrgd; unsigned int num_gh; int dir_rename = 0; - int alloc_required = 0; + struct gfs2_diradd da = { .nr_blocks = 0, }; unsigned int x; int error; @@ -1369,25 +1468,24 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock; } - if (nip == NULL) - alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); - error = alloc_required; - if (error < 0) - goto out_gunlock; + if (nip == NULL) { + error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name, &da); + if (error) + goto out_gunlock; + } - if (alloc_required) { + if (da.nr_blocks) { + struct gfs2_alloc_parms ap = { .target = da.nr_blocks, }; error = gfs2_quota_lock_check(ndip); if (error) goto out_gunlock; - error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres, 0); + error = gfs2_inplace_reserve(ndip, &ap); if (error) goto out_gunlock_q; - error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(ndip, sdp->sd_max_dirres) + - 4 * RES_DINODE + 4 * RES_LEAF + - RES_STATFS + RES_QUOTA + 4, 0); + error = gfs2_trans_begin(sdp, gfs2_trans_da_blks(ndip, &da, 4) + + 4 * RES_LEAF + 4, 0); if (error) goto out_ipreserv; } else { @@ -1412,7 +1510,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_end_trans; ip->i_inode.i_ctime = CURRENT_TIME; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -1421,19 +1519,20 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, if (error) goto out_end_trans; - error = gfs2_dir_add(ndir, &ndentry->d_name, ip); + error = gfs2_dir_add(ndir, &ndentry->d_name, ip, &da); if (error) goto out_end_trans; out_end_trans: gfs2_trans_end(sdp); out_ipreserv: - if (alloc_required) + if (da.nr_blocks) gfs2_inplace_release(ndip); out_gunlock_q: - if (alloc_required) + if (da.nr_blocks) gfs2_quota_unlock(ndip); out_gunlock: + gfs2_dir_no_add(&da); while (x--) { gfs2_glock_dq(ghs + x); gfs2_holder_uninit(ghs + x); @@ -1497,13 +1596,6 @@ out: return NULL; } -static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p) -{ - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - kfree(s); -} - /** * gfs2_permission - * @inode: The inode @@ -1521,18 +1613,26 @@ int gfs2_permission(struct inode *inode, int mask) { struct gfs2_inode *ip; struct gfs2_holder i_gh; + struct gfs2_sbd *sdp = GFS2_SB(inode); int error; int unlock = 0; + int frozen_root = 0; ip = GFS2_I(inode); if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); - if (error) - return error; - unlock = 1; + if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) && + inode == sdp->sd_root_dir->d_inode && + atomic_inc_not_zero(&sdp->sd_frozen_root))) + frozen_root = 1; + else { + if (mask & MAY_NOT_BLOCK) + return -ECHILD; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return error; + unlock = 1; + } } if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) @@ -1541,6 +1641,8 @@ int gfs2_permission(struct inode *inode, int mask) error = generic_permission(inode, mask); if (unlock) gfs2_glock_dq_uninit(&i_gh); + else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root)) + wake_up(&sdp->sd_frozen_root_wait); return error; } @@ -1580,7 +1682,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - u32 ouid, ogid, nuid, ngid; + kuid_t ouid, nuid; + kgid_t ogid, ngid; int error; ouid = inode->i_uid; @@ -1588,16 +1691,29 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) nuid = attr->ia_uid; ngid = attr->ia_gid; - if (!(attr->ia_valid & ATTR_UID) || ouid == nuid) - ouid = nuid = NO_QUOTA_CHANGE; - if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) - ogid = ngid = NO_QUOTA_CHANGE; + if (!(attr->ia_valid & ATTR_UID) || uid_eq(ouid, nuid)) + ouid = nuid = NO_UID_QUOTA_CHANGE; + if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid)) + ogid = ngid = NO_GID_QUOTA_CHANGE; - error = gfs2_quota_lock(ip, nuid, ngid); + error = get_write_access(inode); if (error) return error; - if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { + error = gfs2_rs_alloc(ip); + if (error) + goto out; + + error = gfs2_rindex_update(sdp); + if (error) + goto out; + + error = gfs2_quota_lock(ip, nuid, ngid); + if (error) + goto out; + + if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || + !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { error = gfs2_quota_check(ip, nuid, ngid); if (error) goto out_gunlock_q; @@ -1611,7 +1727,8 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (error) goto out_end_trans; - if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { + if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || + !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); gfs2_quota_change(ip, -blocks, ouid, ogid); gfs2_quota_change(ip, blocks, nuid, ngid); @@ -1621,6 +1738,8 @@ out_end_trans: gfs2_trans_end(sdp); out_gunlock_q: gfs2_quota_unlock(ip); +out: + put_write_access(inode); return error; } @@ -1662,10 +1781,11 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) error = gfs2_setattr_size(inode, attr->ia_size); else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) error = setattr_chown(inode, attr); - else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) - error = gfs2_acl_chmod(ip, attr); - else + else { error = gfs2_setattr_simple(inode, attr); + if (!error && attr->ia_valid & ATTR_MODE) + error = posix_acl_chmod(inode, inode->i_mode); + } out: if (!error) @@ -1695,19 +1815,29 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct inode *inode = dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; + struct gfs2_sbd *sdp = GFS2_SB(inode); int error; int unlock = 0; + int frozen_root = 0; if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); - if (error) - return error; - unlock = 1; + if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) && + inode == sdp->sd_root_dir->d_inode && + atomic_inc_not_zero(&sdp->sd_frozen_root))) + frozen_root = 1; + else { + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + if (error) + return error; + unlock = 1; + } } generic_fillattr(inode, stat); if (unlock) gfs2_glock_dq_uninit(&gh); + else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root)) + wake_up(&sdp->sd_frozen_root_wait); return 0; } @@ -1740,6 +1870,10 @@ static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, struct gfs2_holder gh; int ret; + /* For selinux during lookup */ + if (gfs2_glock_is_locked_by_me(ip->i_gl)) + return generic_getxattr(dentry, name, data, size); + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); ret = gfs2_glock_nq(&gh); if (ret == 0) { @@ -1821,6 +1955,7 @@ const struct inode_operations gfs2_file_iops = { .removexattr = gfs2_removexattr, .fiemap = gfs2_fiemap, .get_acl = gfs2_get_acl, + .set_acl = gfs2_set_acl, }; const struct inode_operations gfs2_dir_iops = { @@ -1842,12 +1977,14 @@ const struct inode_operations gfs2_dir_iops = { .removexattr = gfs2_removexattr, .fiemap = gfs2_fiemap, .get_acl = gfs2_get_acl, + .set_acl = gfs2_set_acl, + .atomic_open = gfs2_atomic_open, }; const struct inode_operations gfs2_symlink_iops = { .readlink = generic_readlink, .follow_link = gfs2_follow_link, - .put_link = gfs2_put_link, + .put_link = kfree_put_link, .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, @@ -1856,6 +1993,5 @@ const struct inode_operations gfs2_symlink_iops = { .listxattr = gfs2_listxattr, .removexattr = gfs2_removexattr, .fiemap = gfs2_fiemap, - .get_acl = gfs2_get_acl, }; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index c53c7477f6d..ba4d9492d42 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -109,6 +109,7 @@ extern int gfs2_permission(struct inode *inode, int mask); extern int gfs2_setattr_simple(struct inode *inode, struct iattr *attr); extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); +extern int gfs2_open_common(struct inode *inode, struct file *file); extern const struct inode_operations gfs2_file_iops; extern const struct inode_operations gfs2_dir_iops; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index b906ed17a83..4fafea1c9ec 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/fs.h> #include <linux/dlm.h> #include <linux/slab.h> @@ -176,7 +178,7 @@ static void gdlm_bast(void *arg, int mode) gfs2_glock_cb(gl, LM_ST_SHARED); break; default: - printk(KERN_ERR "unknown bast mode %d", mode); + pr_err("unknown bast mode %d\n", mode); BUG(); } } @@ -195,7 +197,7 @@ static int make_mode(const unsigned int lmstate) case LM_ST_SHARED: return DLM_LOCK_PR; } - printk(KERN_ERR "unknown LM state %d", lmstate); + pr_err("unknown LM state %d\n", lmstate); BUG(); return -1; } @@ -281,6 +283,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_sbd; struct lm_lockstruct *ls = &sdp->sd_lockstruct; + int lvb_needs_unlock = 0; int error; if (gl->gl_lksb.sb_lkid == 0) { @@ -294,8 +297,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl) gfs2_update_request_times(gl); /* don't want to skip dlm_unlock writing the lvb when lock is ex */ + + if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE)) + lvb_needs_unlock = 1; + if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && - gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) { + !lvb_needs_unlock) { gfs2_glock_free(gl); return; } @@ -303,7 +310,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl) error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, NULL, gl); if (error) { - printk(KERN_ERR "gdlm_unlock %x,%llx err=%d\n", + pr_err("gdlm_unlock %x,%llx err=%d\n", gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, error); return; @@ -461,29 +468,25 @@ static void gdlm_cancel(struct gfs2_glock *gl) static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, char *lvb_bits) { - uint32_t gen; + __le32 gen; memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); - memcpy(&gen, lvb_bits, sizeof(uint32_t)); + memcpy(&gen, lvb_bits, sizeof(__le32)); *lvb_gen = le32_to_cpu(gen); } static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, char *lvb_bits) { - uint32_t gen; + __le32 gen; memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); gen = cpu_to_le32(lvb_gen); - memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t)); + memcpy(ls->ls_control_lvb, &gen, sizeof(__le32)); } static int all_jid_bits_clear(char *lvb) { - int i; - for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) { - if (lvb[i]) - return 0; - } - return 1; + return !memchr_inv(lvb + JID_BITMAP_OFFSET, 0, + GDLM_LVB_SIZE - JID_BITMAP_OFFSET); } static void sync_wait_cb(void *arg) @@ -575,7 +578,6 @@ static void gfs2_control_func(struct work_struct *work) { struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); struct lm_lockstruct *ls = &sdp->sd_lockstruct; - char lvb_bits[GDLM_LVB_SIZE]; uint32_t block_gen, start_gen, lvb_gen, flags; int recover_set = 0; int write_lvb = 0; @@ -629,7 +631,7 @@ static void gfs2_control_func(struct work_struct *work) return; } - control_lvb_read(ls, &lvb_gen, lvb_bits); + control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits); spin_lock(&ls->ls_recover_spin); if (block_gen != ls->ls_recover_block || @@ -659,10 +661,10 @@ static void gfs2_control_func(struct work_struct *work) ls->ls_recover_result[i] = 0; - if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) + if (!test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) continue; - __clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); + __clear_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET); write_lvb = 1; } } @@ -686,7 +688,7 @@ static void gfs2_control_func(struct work_struct *work) continue; if (ls->ls_recover_submit[i] < start_gen) { ls->ls_recover_submit[i] = 0; - __set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET); + __set_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET); } } /* even if there are no bits to set, we need to write the @@ -700,7 +702,7 @@ static void gfs2_control_func(struct work_struct *work) spin_unlock(&ls->ls_recover_spin); if (write_lvb) { - control_lvb_write(ls, start_gen, lvb_bits); + control_lvb_write(ls, start_gen, ls->ls_lvb_bits); flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK; } else { flags = DLM_LKF_CONVERT; @@ -720,7 +722,7 @@ static void gfs2_control_func(struct work_struct *work) */ for (i = 0; i < recover_size; i++) { - if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) { + if (test_bit_le(i, ls->ls_lvb_bits + JID_BITMAP_OFFSET)) { fs_info(sdp, "recover generation %u jid %d\n", start_gen, i); gfs2_recover_set(sdp, i); @@ -753,7 +755,6 @@ static void gfs2_control_func(struct work_struct *work) static int control_mount(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - char lvb_bits[GDLM_LVB_SIZE]; uint32_t start_gen, block_gen, mount_gen, lvb_gen; int mounted_mode; int retries = 0; @@ -852,7 +853,7 @@ locks_done: * lvb_gen will be non-zero. */ - control_lvb_read(ls, &lvb_gen, lvb_bits); + control_lvb_read(ls, &lvb_gen, ls->ls_lvb_bits); if (lvb_gen == 0xFFFFFFFF) { /* special value to force mount attempts to fail */ @@ -882,7 +883,7 @@ locks_done: * and all lvb bits to be clear (no pending journal recoveries.) */ - if (!all_jid_bits_clear(lvb_bits)) { + if (!all_jid_bits_clear(ls->ls_lvb_bits)) { /* journals need recovery, wait until all are clear */ fs_info(sdp, "control_mount wait for journal recovery\n"); goto restart; @@ -944,7 +945,6 @@ static int dlm_recovery_wait(void *word) static int control_first_done(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - char lvb_bits[GDLM_LVB_SIZE]; uint32_t start_gen, block_gen; int error; @@ -986,8 +986,8 @@ restart: memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t)); spin_unlock(&ls->ls_recover_spin); - memset(lvb_bits, 0, sizeof(lvb_bits)); - control_lvb_write(ls, start_gen, lvb_bits); + memset(ls->ls_lvb_bits, 0, GDLM_LVB_SIZE); + control_lvb_write(ls, start_gen, ls->ls_lvb_bits); error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT); if (error) @@ -1017,6 +1017,12 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, uint32_t old_size, new_size; int i, max_jid; + if (!ls->ls_lvb_bits) { + ls->ls_lvb_bits = kzalloc(GDLM_LVB_SIZE, GFP_NOFS); + if (!ls->ls_lvb_bits) + return -ENOMEM; + } + max_jid = 0; for (i = 0; i < num_slots; i++) { if (max_jid < slots[i].slot - 1) @@ -1030,8 +1036,8 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, new_size = old_size + RECOVER_SIZE_INC; - submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); - result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); + submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS); + result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS); if (!submit || !result) { kfree(submit); kfree(result); @@ -1052,6 +1058,7 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, static void free_recover_size(struct lm_lockstruct *ls) { + kfree(ls->ls_lvb_bits); kfree(ls->ls_recover_submit); kfree(ls->ls_recover_result); ls->ls_recover_submit = NULL; @@ -1097,7 +1104,7 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot) } if (ls->ls_recover_submit[jid]) { - fs_info(sdp, "recover_slot jid %d gen %u prev %u", + fs_info(sdp, "recover_slot jid %d gen %u prev %u\n", jid, ls->ls_recover_block, ls->ls_recover_submit[jid]); } ls->ls_recover_submit[jid] = ls->ls_recover_block; @@ -1127,7 +1134,7 @@ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots, queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY); spin_unlock(&ls->ls_recover_spin); } @@ -1200,6 +1207,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) ls->ls_recover_size = 0; ls->ls_recover_submit = NULL; ls->ls_recover_result = NULL; + ls->ls_lvb_bits = NULL; error = set_recover_size(sdp, NULL, 0); if (error) @@ -1263,7 +1271,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); return 0; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f4beeb9c81c..3966fadbceb 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -18,6 +18,7 @@ #include <linux/kthread.h> #include <linux/freezer.h> #include <linux/bio.h> +#include <linux/blkdev.h> #include <linux/writeback.h> #include <linux/list_sort.h> @@ -73,7 +74,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, void gfs2_remove_from_ail(struct gfs2_bufdata *bd) { - bd->bd_ail = NULL; + bd->bd_tr = NULL; list_del_init(&bd->bd_ail_st_list); list_del_init(&bd->bd_ail_gl_list); atomic_dec(&bd->bd_gl->gl_ail_count); @@ -90,7 +91,7 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd) static int gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct writeback_control *wbc, - struct gfs2_ail *ai) + struct gfs2_trans *tr) __releases(&sdp->sd_ail_lock) __acquires(&sdp->sd_ail_lock) { @@ -99,15 +100,15 @@ __acquires(&sdp->sd_ail_lock) struct gfs2_bufdata *bd, *s; struct buffer_head *bh; - list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) { + list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) { bh = bd->bd_bh; - gfs2_assert(sdp, bd->bd_ail == ai); + gfs2_assert(sdp, bd->bd_tr == tr); if (!buffer_busy(bh)) { if (!buffer_uptodate(bh)) gfs2_io_error_bh(sdp, bh); - list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); + list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); continue; } @@ -116,7 +117,7 @@ __acquires(&sdp->sd_ail_lock) if (gl == bd->bd_gl) continue; gl = bd->bd_gl; - list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); + list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list); mapping = bh->b_page->mapping; if (!mapping) continue; @@ -144,18 +145,21 @@ __acquires(&sdp->sd_ail_lock) void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) { struct list_head *head = &sdp->sd_ail1_list; - struct gfs2_ail *ai; + struct gfs2_trans *tr; + struct blk_plug plug; trace_gfs2_ail_flush(sdp, wbc, 1); + blk_start_plug(&plug); spin_lock(&sdp->sd_ail_lock); restart: - list_for_each_entry_reverse(ai, head, ai_list) { + list_for_each_entry_reverse(tr, head, tr_list) { if (wbc->nr_to_write <= 0) break; - if (gfs2_ail1_start_one(sdp, wbc, ai)) + if (gfs2_ail1_start_one(sdp, wbc, tr)) goto restart; } spin_unlock(&sdp->sd_ail_lock); + blk_finish_plug(&plug); trace_gfs2_ail_flush(sdp, wbc, 0); } @@ -183,20 +187,20 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp) * */ -static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { struct gfs2_bufdata *bd, *s; struct buffer_head *bh; - list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, + list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) { bh = bd->bd_bh; - gfs2_assert(sdp, bd->bd_ail == ai); + gfs2_assert(sdp, bd->bd_tr == tr); if (buffer_busy(bh)) continue; if (!buffer_uptodate(bh)) gfs2_io_error_bh(sdp, bh); - list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); + list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); } } @@ -210,16 +214,17 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) static int gfs2_ail1_empty(struct gfs2_sbd *sdp) { - struct gfs2_ail *ai, *s; + struct gfs2_trans *tr, *s; + int oldest_tr = 1; int ret; spin_lock(&sdp->sd_ail_lock); - list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { - gfs2_ail1_empty_one(sdp, ai); - if (list_empty(&ai->ai_ail1_list)) - list_move(&ai->ai_list, &sdp->sd_ail2_list); + list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { + gfs2_ail1_empty_one(sdp, tr); + if (list_empty(&tr->tr_ail1_list) && oldest_tr) + list_move(&tr->tr_list, &sdp->sd_ail2_list); else - break; + oldest_tr = 0; } ret = list_empty(&sdp->sd_ail1_list); spin_unlock(&sdp->sd_ail_lock); @@ -229,13 +234,13 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp) static void gfs2_ail1_wait(struct gfs2_sbd *sdp) { - struct gfs2_ail *ai; + struct gfs2_trans *tr; struct gfs2_bufdata *bd; struct buffer_head *bh; spin_lock(&sdp->sd_ail_lock); - list_for_each_entry_reverse(ai, &sdp->sd_ail1_list, ai_list) { - list_for_each_entry(bd, &ai->ai_ail1_list, bd_ail_st_list) { + list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { + list_for_each_entry(bd, &tr->tr_ail1_list, bd_ail_st_list) { bh = bd->bd_bh; if (!buffer_locked(bh)) continue; @@ -256,46 +261,63 @@ static void gfs2_ail1_wait(struct gfs2_sbd *sdp) * */ -static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { - struct list_head *head = &ai->ai_ail2_list; + struct list_head *head = &tr->tr_ail2_list; struct gfs2_bufdata *bd; while (!list_empty(head)) { bd = list_entry(head->prev, struct gfs2_bufdata, bd_ail_st_list); - gfs2_assert(sdp, bd->bd_ail == ai); + gfs2_assert(sdp, bd->bd_tr == tr); gfs2_remove_from_ail(bd); } } static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) { - struct gfs2_ail *ai, *safe; + struct gfs2_trans *tr, *safe; unsigned int old_tail = sdp->sd_log_tail; int wrap = (new_tail < old_tail); int a, b, rm; spin_lock(&sdp->sd_ail_lock); - list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) { - a = (old_tail <= ai->ai_first); - b = (ai->ai_first < new_tail); + list_for_each_entry_safe(tr, safe, &sdp->sd_ail2_list, tr_list) { + a = (old_tail <= tr->tr_first); + b = (tr->tr_first < new_tail); rm = (wrap) ? (a || b) : (a && b); if (!rm) continue; - gfs2_ail2_empty_one(sdp, ai); - list_del(&ai->ai_list); - gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list)); - gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list)); - kfree(ai); + gfs2_ail2_empty_one(sdp, tr); + list_del(&tr->tr_list); + gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list)); + gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list)); + kfree(tr); } spin_unlock(&sdp->sd_ail_lock); } /** + * gfs2_log_release - Release a given number of log blocks + * @sdp: The GFS2 superblock + * @blks: The number of blocks + * + */ + +void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) +{ + + atomic_add(blks, &sdp->sd_log_blks_free); + trace_gfs2_log_blocks(sdp, blks); + gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= + sdp->sd_jdesc->jd_blocks); + up_read(&sdp->sd_log_flush_lock); +} + +/** * gfs2_log_reserve - Make a log reservation * @sdp: The GFS2 superblock * @blks: The number of blocks to reserve @@ -317,7 +339,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) { - unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize); + unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize); unsigned wanted = blks + reserved_blks; DEFINE_WAIT(wait); int did_wait = 0; @@ -353,7 +375,10 @@ retry: wake_up(&sdp->sd_log_waitq); down_read(&sdp->sd_log_flush_lock); - + if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { + gfs2_log_release(sdp, blks); + return -EROFS; + } return 0; } @@ -409,24 +434,22 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer static unsigned int calc_reserved(struct gfs2_sbd *sdp) { unsigned int reserved = 0; - unsigned int mbuf_limit, metabufhdrs_needed; - unsigned int dbuf_limit, databufhdrs_needed; - unsigned int revokes = 0; - - mbuf_limit = buf_limit(sdp); - metabufhdrs_needed = (sdp->sd_log_commited_buf + - (mbuf_limit - 1)) / mbuf_limit; - dbuf_limit = databuf_limit(sdp); - databufhdrs_needed = (sdp->sd_log_commited_databuf + - (dbuf_limit - 1)) / dbuf_limit; + unsigned int mbuf; + unsigned int dbuf; + struct gfs2_trans *tr = sdp->sd_log_tr; + + if (tr) { + mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm; + dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; + reserved = mbuf + dbuf; + /* Account for header blocks */ + reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp)); + reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp)); + } if (sdp->sd_log_commited_revoke > 0) - revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke, + reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke, sizeof(u64)); - - reserved = sdp->sd_log_commited_buf + metabufhdrs_needed + - sdp->sd_log_commited_databuf + databufhdrs_needed + - revokes; /* One for the overall header */ if (reserved) reserved++; @@ -435,7 +458,7 @@ static unsigned int calc_reserved(struct gfs2_sbd *sdp) static unsigned int current_tail(struct gfs2_sbd *sdp) { - struct gfs2_ail *ai; + struct gfs2_trans *tr; unsigned int tail; spin_lock(&sdp->sd_ail_lock); @@ -443,8 +466,9 @@ static unsigned int current_tail(struct gfs2_sbd *sdp) if (list_empty(&sdp->sd_ail1_list)) { tail = sdp->sd_log_head; } else { - ai = list_entry(sdp->sd_ail1_list.prev, struct gfs2_ail, ai_list); - tail = ai->ai_first; + tr = list_entry(sdp->sd_ail1_list.prev, struct gfs2_trans, + tr_list); + tail = tr->tr_first; } spin_unlock(&sdp->sd_ail_lock); @@ -482,70 +506,136 @@ static void log_flush_wait(struct gfs2_sbd *sdp) } } -static int bd_cmp(void *priv, struct list_head *a, struct list_head *b) +static int ip_cmp(void *priv, struct list_head *a, struct list_head *b) { - struct gfs2_bufdata *bda, *bdb; + struct gfs2_inode *ipa, *ipb; - bda = list_entry(a, struct gfs2_bufdata, bd_list); - bdb = list_entry(b, struct gfs2_bufdata, bd_list); + ipa = list_entry(a, struct gfs2_inode, i_ordered); + ipb = list_entry(b, struct gfs2_inode, i_ordered); - if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr) + if (ipa->i_no_addr < ipb->i_no_addr) return -1; - if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr) + if (ipa->i_no_addr > ipb->i_no_addr) return 1; return 0; } static void gfs2_ordered_write(struct gfs2_sbd *sdp) { - struct gfs2_bufdata *bd; - struct buffer_head *bh; + struct gfs2_inode *ip; LIST_HEAD(written); - gfs2_log_lock(sdp); - list_sort(NULL, &sdp->sd_log_le_ordered, &bd_cmp); + spin_lock(&sdp->sd_ordered_lock); + list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp); while (!list_empty(&sdp->sd_log_le_ordered)) { - bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_list); - list_move(&bd->bd_list, &written); - bh = bd->bd_bh; - if (!buffer_dirty(bh)) + ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered); + list_move(&ip->i_ordered, &written); + if (ip->i_inode.i_mapping->nrpages == 0) continue; - get_bh(bh); - gfs2_log_unlock(sdp); - lock_buffer(bh); - if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { - bh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE_SYNC, bh); - } else { - unlock_buffer(bh); - brelse(bh); - } - gfs2_log_lock(sdp); + spin_unlock(&sdp->sd_ordered_lock); + filemap_fdatawrite(ip->i_inode.i_mapping); + spin_lock(&sdp->sd_ordered_lock); } list_splice(&written, &sdp->sd_log_le_ordered); - gfs2_log_unlock(sdp); + spin_unlock(&sdp->sd_ordered_lock); } static void gfs2_ordered_wait(struct gfs2_sbd *sdp) { - struct gfs2_bufdata *bd; - struct buffer_head *bh; + struct gfs2_inode *ip; - gfs2_log_lock(sdp); + spin_lock(&sdp->sd_ordered_lock); while (!list_empty(&sdp->sd_log_le_ordered)) { - bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_list); - bh = bd->bd_bh; - if (buffer_locked(bh)) { - get_bh(bh); - gfs2_log_unlock(sdp); - wait_on_buffer(bh); - brelse(bh); - gfs2_log_lock(sdp); + ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered); + list_del(&ip->i_ordered); + WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags)); + if (ip->i_inode.i_mapping->nrpages == 0) continue; + spin_unlock(&sdp->sd_ordered_lock); + filemap_fdatawait(ip->i_inode.i_mapping); + spin_lock(&sdp->sd_ordered_lock); + } + spin_unlock(&sdp->sd_ordered_lock); +} + +void gfs2_ordered_del_inode(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + + spin_lock(&sdp->sd_ordered_lock); + if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags)) + list_del(&ip->i_ordered); + spin_unlock(&sdp->sd_ordered_lock); +} + +void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) +{ + struct buffer_head *bh = bd->bd_bh; + struct gfs2_glock *gl = bd->bd_gl; + + bh->b_private = NULL; + bd->bd_blkno = bh->b_blocknr; + gfs2_remove_from_ail(bd); /* drops ref on bh */ + bd->bd_bh = NULL; + bd->bd_ops = &gfs2_revoke_lops; + sdp->sd_log_num_revoke++; + atomic_inc(&gl->gl_revokes); + set_bit(GLF_LFLUSH, &gl->gl_flags); + list_add(&bd->bd_list, &sdp->sd_log_le_revoke); +} + +void gfs2_write_revokes(struct gfs2_sbd *sdp) +{ + struct gfs2_trans *tr; + struct gfs2_bufdata *bd, *tmp; + int have_revokes = 0; + int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); + + gfs2_ail1_empty(sdp); + spin_lock(&sdp->sd_ail_lock); + list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) { + list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) { + if (list_empty(&bd->bd_list)) { + have_revokes = 1; + goto done; + } + } + } +done: + spin_unlock(&sdp->sd_ail_lock); + if (have_revokes == 0) + return; + while (sdp->sd_log_num_revoke > max_revokes) + max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); + max_revokes -= sdp->sd_log_num_revoke; + if (!sdp->sd_log_num_revoke) { + atomic_dec(&sdp->sd_log_blks_free); + /* If no blocks have been reserved, we need to also + * reserve a block for the header */ + if (!sdp->sd_log_blks_reserved) + atomic_dec(&sdp->sd_log_blks_free); + } + gfs2_log_lock(sdp); + spin_lock(&sdp->sd_ail_lock); + list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) { + list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) { + if (max_revokes == 0) + goto out_of_blocks; + if (!list_empty(&bd->bd_list)) + continue; + gfs2_add_revoke(sdp, bd); + max_revokes--; } - list_del_init(&bd->bd_list); } +out_of_blocks: + spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); + + if (!sdp->sd_log_num_revoke) { + atomic_inc(&sdp->sd_log_blks_free); + if (!sdp->sd_log_blks_reserved) + atomic_inc(&sdp->sd_log_blks_free); + } } /** @@ -565,7 +655,6 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) lh = page_address(page); clear_page(lh); - gfs2_ail1_empty(sdp); tail = current_tail(sdp); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); @@ -602,9 +691,10 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) * */ -void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) +void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, + enum gfs2_flush_type type) { - struct gfs2_ail *ai; + struct gfs2_trans *tr; down_write(&sdp->sd_log_flush_lock); @@ -615,76 +705,129 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) } trace_gfs2_log_flush(sdp, 1); - ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL); - INIT_LIST_HEAD(&ai->ai_ail1_list); - INIT_LIST_HEAD(&ai->ai_ail2_list); - - if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) { - printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf, - sdp->sd_log_commited_buf); - gfs2_assert_withdraw(sdp, 0); - } - if (sdp->sd_log_num_databuf != sdp->sd_log_commited_databuf) { - printk(KERN_INFO "GFS2: log databuf %u %u\n", - sdp->sd_log_num_databuf, sdp->sd_log_commited_databuf); - gfs2_assert_withdraw(sdp, 0); + sdp->sd_log_flush_head = sdp->sd_log_head; + sdp->sd_log_flush_wrapped = 0; + tr = sdp->sd_log_tr; + if (tr) { + sdp->sd_log_tr = NULL; + INIT_LIST_HEAD(&tr->tr_ail1_list); + INIT_LIST_HEAD(&tr->tr_ail2_list); + tr->tr_first = sdp->sd_log_flush_head; } + gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); - sdp->sd_log_flush_head = sdp->sd_log_head; - sdp->sd_log_flush_wrapped = 0; - ai->ai_first = sdp->sd_log_flush_head; - gfs2_ordered_write(sdp); - lops_before_commit(sdp); + lops_before_commit(sdp, tr); gfs2_log_flush_bio(sdp, WRITE); if (sdp->sd_log_head != sdp->sd_log_flush_head) { + log_flush_wait(sdp); log_write_header(sdp, 0); } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ trace_gfs2_log_blocks(sdp, -1); log_write_header(sdp, 0); } - lops_after_commit(sdp, ai); + lops_after_commit(sdp, tr); gfs2_log_lock(sdp); sdp->sd_log_head = sdp->sd_log_flush_head; sdp->sd_log_blks_reserved = 0; - sdp->sd_log_commited_buf = 0; - sdp->sd_log_commited_databuf = 0; sdp->sd_log_commited_revoke = 0; spin_lock(&sdp->sd_ail_lock); - if (!list_empty(&ai->ai_ail1_list)) { - list_add(&ai->ai_list, &sdp->sd_ail1_list); - ai = NULL; + if (tr && !list_empty(&tr->tr_ail1_list)) { + list_add(&tr->tr_list, &sdp->sd_ail1_list); + tr = NULL; } spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); + + if (atomic_read(&sdp->sd_log_freeze)) + type = FREEZE_FLUSH; + if (type != NORMAL_FLUSH) { + if (!sdp->sd_log_idle) { + for (;;) { + gfs2_ail1_start(sdp); + gfs2_ail1_wait(sdp); + if (gfs2_ail1_empty(sdp)) + break; + } + atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ + trace_gfs2_log_blocks(sdp, -1); + sdp->sd_log_flush_wrapped = 0; + log_write_header(sdp, 0); + sdp->sd_log_head = sdp->sd_log_flush_head; + } + if (type == SHUTDOWN_FLUSH || type == FREEZE_FLUSH) + gfs2_log_shutdown(sdp); + if (type == FREEZE_FLUSH) { + int error; + + atomic_set(&sdp->sd_log_freeze, 0); + wake_up(&sdp->sd_log_frozen_wait); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, + LM_ST_SHARED, 0, + &sdp->sd_thaw_gh); + if (error) { + printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error); + gfs2_assert_withdraw(sdp, 0); + } + else + gfs2_glock_dq_uninit(&sdp->sd_thaw_gh); + } + } + trace_gfs2_log_flush(sdp, 0); up_write(&sdp->sd_log_flush_lock); - kfree(ai); + kfree(tr); +} + +/** + * gfs2_merge_trans - Merge a new transaction into a cached transaction + * @old: Original transaction to be expanded + * @new: New transaction to be merged + */ + +static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) +{ + WARN_ON_ONCE(old->tr_attached != 1); + + old->tr_num_buf_new += new->tr_num_buf_new; + old->tr_num_databuf_new += new->tr_num_databuf_new; + old->tr_num_buf_rm += new->tr_num_buf_rm; + old->tr_num_databuf_rm += new->tr_num_databuf_rm; + old->tr_num_revoke += new->tr_num_revoke; + old->tr_num_revoke_rm += new->tr_num_revoke_rm; + + list_splice_tail_init(&new->tr_databuf, &old->tr_databuf); + list_splice_tail_init(&new->tr_buf, &old->tr_buf); } static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { unsigned int reserved; unsigned int unused; + unsigned int maxres; gfs2_log_lock(sdp); - sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm; - sdp->sd_log_commited_databuf += tr->tr_num_databuf_new - - tr->tr_num_databuf_rm; - gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) || - (((int)sdp->sd_log_commited_databuf) >= 0)); + if (sdp->sd_log_tr) { + gfs2_merge_trans(sdp->sd_log_tr, tr); + } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) { + gfs2_assert_withdraw(sdp, tr->tr_alloced); + sdp->sd_log_tr = tr; + tr->tr_attached = 1; + } + sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; reserved = calc_reserved(sdp); - gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved); - unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; + maxres = sdp->sd_log_blks_reserved + tr->tr_reserved; + gfs2_assert_withdraw(sdp, maxres >= reserved); + unused = maxres - reserved; atomic_add(unused, &sdp->sd_log_blks_free); trace_gfs2_log_blocks(sdp, unused); gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= @@ -712,7 +855,6 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { log_refund(sdp, tr); - up_read(&sdp->sd_log_flush_lock); if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) || ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) > @@ -728,13 +870,8 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) void gfs2_log_shutdown(struct gfs2_sbd *sdp) { - down_write(&sdp->sd_log_flush_lock); - gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); - gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); - gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); - gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); sdp->sd_log_flush_head = sdp->sd_log_head; @@ -742,38 +879,16 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT); - gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); sdp->sd_log_head = sdp->sd_log_flush_head; sdp->sd_log_tail = sdp->sd_log_head; - - up_write(&sdp->sd_log_flush_lock); -} - - -/** - * gfs2_meta_syncfs - sync all the buffers in a filesystem - * @sdp: the filesystem - * - */ - -void gfs2_meta_syncfs(struct gfs2_sbd *sdp) -{ - gfs2_log_flush(sdp, NULL); - for (;;) { - gfs2_ail1_start(sdp); - gfs2_ail1_wait(sdp); - if (gfs2_ail1_empty(sdp)) - break; - } - gfs2_log_flush(sdp, NULL); } static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) { - return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1)); + return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1) || atomic_read(&sdp->sd_log_freeze)); } static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) @@ -800,14 +915,14 @@ int gfs2_logd(void *data) if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { gfs2_ail1_empty(sdp); - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } if (gfs2_ail_flush_reqd(sdp)) { gfs2_ail1_start(sdp); gfs2_ail1_wait(sdp); gfs2_ail1_empty(sdp); - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } if (!gfs2_ail_flush_reqd(sdp)) diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 3fd5215ea25..9499a604921 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -48,17 +48,38 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp, sdp->sd_log_head = sdp->sd_log_tail = value; } +static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + + if (!test_bit(GIF_ORDERED, &ip->i_flags)) { + spin_lock(&sdp->sd_ordered_lock); + if (!test_and_set_bit(GIF_ORDERED, &ip->i_flags)) + list_add(&ip->i_ordered, &sdp->sd_log_le_ordered); + spin_unlock(&sdp->sd_ordered_lock); + } +} +extern void gfs2_ordered_del_inode(struct gfs2_inode *ip); extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, unsigned int ssize); +extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); -extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); +enum gfs2_flush_type { + NORMAL_FLUSH = 0, + SYNC_FLUSH, + SHUTDOWN_FLUSH, + FREEZE_FLUSH +}; +extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, + enum gfs2_flush_type type); extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc); extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); -extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); extern int gfs2_logd(void *data); +extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); +extern void gfs2_write_revokes(struct gfs2_sbd *sdp); #endif /* __LOG_DOT_H__ */ diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 9ceccb1595a..2c1ae861dc9 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -16,6 +16,7 @@ #include <linux/gfs2_ondisk.h> #include <linux/bio.h> #include <linux/fs.h> +#include <linux/list_sort.h> #include "gfs2.h" #include "incore.h" @@ -37,7 +38,7 @@ * * The log lock must be held when calling this function */ -static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) +void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) { struct gfs2_bufdata *bd; @@ -53,8 +54,8 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) * to in-place disk block, remove it from the AIL. */ spin_lock(&sdp->sd_ail_lock); - if (bd->bd_ail) - list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); + if (bd->bd_tr) + list_move(&bd->bd_ail_st_list, &bd->bd_tr->tr_ail2_list); spin_unlock(&sdp->sd_ail_lock); get_bh(bh); atomic_inc(&sdp->sd_log_pinned); @@ -74,7 +75,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd) unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number; struct gfs2_bitmap *bi = rgd->rd_bits + index; - if (bi->bi_clone == 0) + if (bi->bi_clone == NULL) return; if (sdp->sd_args.ar_discard) gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL); @@ -82,6 +83,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd) bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); clear_bit(GBF_FULL, &bi->bi_flags); rgd->rd_free_clone = rgd->rd_free; + rgd->rd_extfail_pt = rgd->rd_free; } /** @@ -94,7 +96,7 @@ static void maybe_release_space(struct gfs2_bufdata *bd) */ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, - struct gfs2_ail *ai) + struct gfs2_trans *tr) { struct gfs2_bufdata *bd = bh->b_private; @@ -109,7 +111,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, maybe_release_space(bd); spin_lock(&sdp->sd_ail_lock); - if (bd->bd_ail) { + if (bd->bd_tr) { list_del(&bd->bd_ail_st_list); brelse(bh); } else { @@ -117,8 +119,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list); atomic_inc(&gl->gl_ail_count); } - bd->bd_ail = ai; - list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); + bd->bd_tr = tr; + list_add(&bd->bd_ail_st_list, &tr->tr_ail1_list); spin_unlock(&sdp->sd_ail_lock); clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); @@ -144,8 +146,8 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp) struct gfs2_journal_extent *je; u64 block; - list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) { - if (lbn >= je->lblock && lbn < je->lblock + je->blocks) { + list_for_each_entry(je, &sdp->sd_jdesc->extent_list, list) { + if ((lbn >= je->lblock) && (lbn < (je->lblock + je->blocks))) { block = je->dblock + lbn - je->lblock; gfs2_log_incr_head(sdp); return block; @@ -212,7 +214,7 @@ static void gfs2_end_log_write(struct bio *bio, int error) fs_err(sdp, "Error %d writing to log\n", error); } - bio_for_each_segment(bvec, bio, i) { + bio_for_each_segment_all(bvec, bio, i) { page = bvec->bv_page; if (page_has_buffers(page)) gfs2_end_log_write_bh(sdp, bvec, error); @@ -271,7 +273,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno) nrvecs = max(nrvecs/2, 1U); } - bio->bi_sector = blkno * (sb->s_blocksize >> 9); + bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9); bio->bi_bdev = sb->s_bdev; bio->bi_end_io = gfs2_end_log_write; bio->bi_private = sdp; @@ -300,7 +302,7 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno) u64 nblk; if (bio) { - nblk = bio->bi_sector + bio_sectors(bio); + nblk = bio_end_sector(bio); nblk >>= sdp->sd_fsb2bb_shift; if (blkno == nblk) return bio; @@ -388,32 +390,6 @@ static struct page *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type, return page; } -static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) -{ - struct gfs2_meta_header *mh; - struct gfs2_trans *tr; - - tr = current->journal_info; - tr->tr_touched = 1; - if (!list_empty(&bd->bd_list)) - return; - set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); - set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); - mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; - if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) { - printk(KERN_ERR - "Attempting to add uninitialised block to journal (inplace block=%lld)\n", - (unsigned long long)bd->bd_bh->b_blocknr); - BUG(); - } - gfs2_pin(sdp, bd->bd_bh); - mh->__pad0 = cpu_to_be64(0); - mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); - sdp->sd_log_num_buf++; - list_add(&bd->bd_list, &sdp->sd_log_le_buf); - tr->tr_num_buf_new++; -} - static void gfs2_check_magic(struct buffer_head *bh) { void *kaddr; @@ -427,6 +403,20 @@ static void gfs2_check_magic(struct buffer_head *bh) kunmap_atomic(kaddr); } +static int blocknr_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct gfs2_bufdata *bda, *bdb; + + bda = list_entry(a, struct gfs2_bufdata, bd_list); + bdb = list_entry(b, struct gfs2_bufdata, bd_list); + + if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr) + return -1; + if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr) + return 1; + return 0; +} + static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, unsigned int total, struct list_head *blist, bool is_databuf) @@ -439,13 +429,16 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, __be64 *ptr; gfs2_log_lock(sdp); + list_sort(NULL, blist, blocknr_cmp); bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list); while(total) { num = total; if (total > limit) num = limit; gfs2_log_unlock(sdp); - page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA, num + 1, num); + page = gfs2_get_log_desc(sdp, + is_databuf ? GFS2_LOG_DESC_JDATA : + GFS2_LOG_DESC_METADATA, num + 1, num); ld = page_address(page); gfs2_log_lock(sdp); ptr = (__be64 *)(ld + 1); @@ -498,39 +491,40 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, gfs2_log_unlock(sdp); } -static void buf_lo_before_commit(struct gfs2_sbd *sdp) +static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */ - - gfs2_before_commit(sdp, limit, sdp->sd_log_num_buf, - &sdp->sd_log_le_buf, 0); + unsigned int nbuf; + if (tr == NULL) + return; + nbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm; + gfs2_before_commit(sdp, limit, nbuf, &tr->tr_buf, 0); } -static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { - struct list_head *head = &sdp->sd_log_le_buf; + struct list_head *head; struct gfs2_bufdata *bd; + if (tr == NULL) + return; + + head = &tr->tr_buf; while (!list_empty(head)) { bd = list_entry(head->next, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); - sdp->sd_log_num_buf--; - - gfs2_unpin(sdp, bd->bd_bh, ai); + gfs2_unpin(sdp, bd->bd_bh, tr); } - gfs2_assert_warn(sdp, !sdp->sd_log_num_buf); } static void buf_lo_before_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, int pass) { - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - if (pass != 0) return; - sdp->sd_found_blocks = 0; - sdp->sd_replayed_blocks = 0; + jd->jd_found_blocks = 0; + jd->jd_replayed_blocks = 0; } static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, @@ -553,9 +547,9 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { blkno = be64_to_cpu(*ptr++); - sdp->sd_found_blocks++; + jd->jd_found_blocks++; - if (gfs2_revoke_check(sdp, blkno, start)) + if (gfs2_revoke_check(jd, blkno, start)) continue; error = gfs2_replay_read_block(jd, start, &bh_log); @@ -576,12 +570,34 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, if (error) break; - sdp->sd_replayed_blocks++; + jd->jd_replayed_blocks++; } return error; } +/** + * gfs2_meta_sync - Sync all buffers associated with a glock + * @gl: The glock + * + */ + +static void gfs2_meta_sync(struct gfs2_glock *gl) +{ + struct address_space *mapping = gfs2_glock2aspace(gl); + struct gfs2_sbd *sdp = gl->gl_sbd; + int error; + + if (mapping == NULL) + mapping = &sdp->sd_aspace; + + filemap_fdatawrite(mapping); + error = filemap_fdatawait(mapping); + + if (error) + gfs2_io_error(gl->gl_sbd); +} + static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); @@ -597,24 +613,10 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) gfs2_meta_sync(ip->i_gl); fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", - jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); -} - -static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) -{ - struct gfs2_glock *gl = bd->bd_gl; - struct gfs2_trans *tr; - - tr = current->journal_info; - tr->tr_touched = 1; - tr->tr_num_revoke++; - sdp->sd_log_num_revoke++; - atomic_inc(&gl->gl_revokes); - set_bit(GLF_LFLUSH, &gl->gl_flags); - list_add(&bd->bd_list, &sdp->sd_log_le_revoke); + jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks); } -static void revoke_lo_before_commit(struct gfs2_sbd *sdp) +static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { struct gfs2_meta_header *mh; unsigned int offset; @@ -623,6 +625,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) struct page *page; unsigned int length; + gfs2_write_revokes(sdp); if (!sdp->sd_log_num_revoke) return; @@ -653,7 +656,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) gfs2_log_write_page(sdp, page); } -static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { struct list_head *head = &sdp->sd_log_le_revoke; struct gfs2_bufdata *bd; @@ -672,13 +675,11 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) static void revoke_lo_before_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, int pass) { - struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); - if (pass != 0) return; - sdp->sd_found_revokes = 0; - sdp->sd_replay_tail = head->lh_tail; + jd->jd_found_revokes = 0; + jd->jd_replay_tail = head->lh_tail; } static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, @@ -710,13 +711,13 @@ static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) { blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); - error = gfs2_revoke_add(sdp, blkno, start); + error = gfs2_revoke_add(jd, blkno, start); if (error < 0) { brelse(bh); return error; } else if (error) - sdp->sd_found_revokes++; + jd->jd_found_revokes++; if (!--revokes) break; @@ -736,54 +737,16 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); if (error) { - gfs2_revoke_clean(sdp); + gfs2_revoke_clean(jd); return; } if (pass != 1) return; fs_info(sdp, "jid=%u: Found %u revoke tags\n", - jd->jd_jid, sdp->sd_found_revokes); + jd->jd_jid, jd->jd_found_revokes); - gfs2_revoke_clean(sdp); -} - -/** - * databuf_lo_add - Add a databuf to the transaction. - * - * This is used in two distinct cases: - * i) In ordered write mode - * We put the data buffer on a list so that we can ensure that its - * synced to disk at the right time - * ii) In journaled data mode - * We need to journal the data block in the same way as metadata in - * the functions above. The difference is that here we have a tag - * which is two __be64's being the block number (as per meta data) - * and a flag which says whether the data block needs escaping or - * not. This means we need a new log entry for each 251 or so data - * blocks, which isn't an enormous overhead but twice as much as - * for normal metadata blocks. - */ -static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) -{ - struct gfs2_trans *tr = current->journal_info; - struct address_space *mapping = bd->bd_bh->b_page->mapping; - struct gfs2_inode *ip = GFS2_I(mapping->host); - - if (tr) - tr->tr_touched = 1; - if (!list_empty(&bd->bd_list)) - return; - set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); - set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); - if (gfs2_is_jdata(ip)) { - gfs2_pin(sdp, bd->bd_bh); - tr->tr_num_databuf_new++; - sdp->sd_log_num_databuf++; - list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf); - } else { - list_add_tail(&bd->bd_list, &sdp->sd_log_le_ordered); - } + gfs2_revoke_clean(jd); } /** @@ -791,12 +754,14 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) * */ -static void databuf_lo_before_commit(struct gfs2_sbd *sdp) +static void databuf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { - unsigned int limit = buf_limit(sdp) / 2; - - gfs2_before_commit(sdp, limit, sdp->sd_log_num_databuf, - &sdp->sd_log_le_databuf, 1); + unsigned int limit = databuf_limit(sdp); + unsigned int nbuf; + if (tr == NULL) + return; + nbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; + gfs2_before_commit(sdp, limit, nbuf, &tr->tr_databuf, 1); } static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, @@ -820,9 +785,9 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, blkno = be64_to_cpu(*ptr++); esc = be64_to_cpu(*ptr++); - sdp->sd_found_blocks++; + jd->jd_found_blocks++; - if (gfs2_revoke_check(sdp, blkno, start)) + if (gfs2_revoke_check(jd, blkno, start)) continue; error = gfs2_replay_read_block(jd, start, &bh_log); @@ -842,7 +807,7 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, brelse(bh_log); brelse(bh_ip); - sdp->sd_replayed_blocks++; + jd->jd_replayed_blocks++; } return error; @@ -866,26 +831,27 @@ static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) gfs2_meta_sync(ip->i_gl); fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", - jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); + jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks); } -static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) { - struct list_head *head = &sdp->sd_log_le_databuf; + struct list_head *head; struct gfs2_bufdata *bd; + if (tr == NULL) + return; + + head = &tr->tr_databuf; while (!list_empty(head)) { bd = list_entry(head->next, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); - sdp->sd_log_num_databuf--; - gfs2_unpin(sdp, bd->bd_bh, ai); + gfs2_unpin(sdp, bd->bd_bh, tr); } - gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); } const struct gfs2_log_operations gfs2_buf_lops = { - .lo_add = buf_lo_add, .lo_before_commit = buf_lo_before_commit, .lo_after_commit = buf_lo_after_commit, .lo_before_scan = buf_lo_before_scan, @@ -895,7 +861,6 @@ const struct gfs2_log_operations gfs2_buf_lops = { }; const struct gfs2_log_operations gfs2_revoke_lops = { - .lo_add = revoke_lo_add, .lo_before_commit = revoke_lo_before_commit, .lo_after_commit = revoke_lo_after_commit, .lo_before_scan = revoke_lo_before_scan, @@ -904,12 +869,7 @@ const struct gfs2_log_operations gfs2_revoke_lops = { .lo_name = "revoke", }; -const struct gfs2_log_operations gfs2_rg_lops = { - .lo_name = "rg", -}; - const struct gfs2_log_operations gfs2_databuf_lops = { - .lo_add = databuf_lo_add, .lo_before_commit = databuf_lo_before_commit, .lo_after_commit = databuf_lo_after_commit, .lo_scan_elements = databuf_lo_scan_elements, @@ -920,7 +880,6 @@ const struct gfs2_log_operations gfs2_databuf_lops = { const struct gfs2_log_operations *gfs2_log_ops[] = { &gfs2_databuf_lops, &gfs2_buf_lops, - &gfs2_rg_lops, &gfs2_revoke_lops, NULL, }; diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 954a330585f..a65a7ba32ff 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -23,12 +23,12 @@ extern const struct gfs2_log_operations gfs2_glock_lops; extern const struct gfs2_log_operations gfs2_buf_lops; extern const struct gfs2_log_operations gfs2_revoke_lops; -extern const struct gfs2_log_operations gfs2_rg_lops; extern const struct gfs2_log_operations gfs2_databuf_lops; extern const struct gfs2_log_operations *gfs2_log_ops[]; extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page); extern void gfs2_log_flush_bio(struct gfs2_sbd *sdp, int rw); +extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); static inline unsigned int buf_limit(struct gfs2_sbd *sdp) { @@ -46,33 +46,22 @@ static inline unsigned int databuf_limit(struct gfs2_sbd *sdp) return limit; } -static inline void lops_init_le(struct gfs2_bufdata *bd, - const struct gfs2_log_operations *lops) -{ - INIT_LIST_HEAD(&bd->bd_list); - bd->bd_ops = lops; -} - -static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) -{ - if (bd->bd_ops->lo_add) - bd->bd_ops->lo_add(sdp, bd); -} - -static inline void lops_before_commit(struct gfs2_sbd *sdp) +static inline void lops_before_commit(struct gfs2_sbd *sdp, + struct gfs2_trans *tr) { int x; for (x = 0; gfs2_log_ops[x]; x++) if (gfs2_log_ops[x]->lo_before_commit) - gfs2_log_ops[x]->lo_before_commit(sdp); + gfs2_log_ops[x]->lo_before_commit(sdp, tr); } -static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +static inline void lops_after_commit(struct gfs2_sbd *sdp, + struct gfs2_trans *tr) { int x; for (x = 0; gfs2_log_ops[x]; x++) if (gfs2_log_ops[x]->lo_after_commit) - gfs2_log_ops[x]->lo_after_commit(sdp, ai); + gfs2_log_ops[x]->lo_after_commit(sdp, tr); } static inline void lops_before_scan(struct gfs2_jdesc *jd, diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index e04d0e09ee7..82b6ac82965 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> @@ -31,11 +33,6 @@ struct workqueue_struct *gfs2_control_wq; -static struct shrinker qd_shrinker = { - .shrink = gfs2_shrink_qd_memory, - .seeks = DEFAULT_SEEKS, -}; - static void gfs2_init_inode_once(void *foo) { struct gfs2_inode *ip = foo; @@ -81,11 +78,16 @@ static int __init init_gfs2_fs(void) gfs2_str2qstr(&gfs2_qdot, "."); gfs2_str2qstr(&gfs2_qdotdot, ".."); + gfs2_quota_hash_init(); error = gfs2_sys_init(); if (error) return error; + error = list_lru_init(&gfs2_qd_lru); + if (error) + goto fail_lru; + error = gfs2_glock_init(); if (error) goto fail; @@ -138,7 +140,7 @@ static int __init init_gfs2_fs(void) if (!gfs2_rsrv_cachep) goto fail; - register_shrinker(&qd_shrinker); + register_shrinker(&gfs2_qd_shrinker); error = register_filesystem(&gfs2_fs_type); if (error) @@ -155,7 +157,7 @@ static int __init init_gfs2_fs(void) goto fail_wq; gfs2_control_wq = alloc_workqueue("gfs2_control", - WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0); + WQ_UNBOUND | WQ_FREEZABLE, 0); if (!gfs2_control_wq) goto fail_recovery; @@ -165,7 +167,7 @@ static int __init init_gfs2_fs(void) gfs2_register_debugfs(); - printk("GFS2 installed\n"); + pr_info("GFS2 installed\n"); return 0; @@ -178,7 +180,9 @@ fail_wq: fail_unregister: unregister_filesystem(&gfs2_fs_type); fail: - unregister_shrinker(&qd_shrinker); + list_lru_destroy(&gfs2_qd_lru); +fail_lru: + unregister_shrinker(&gfs2_qd_shrinker); gfs2_glock_exit(); if (gfs2_rsrv_cachep) @@ -213,13 +217,14 @@ fail: static void __exit exit_gfs2_fs(void) { - unregister_shrinker(&qd_shrinker); + unregister_shrinker(&gfs2_qd_shrinker); gfs2_glock_exit(); gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); destroy_workqueue(gfs_recovery_wq); destroy_workqueue(gfs2_control_wq); + list_lru_destroy(&gfs2_qd_lru); rcu_barrier(); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 22255d96b27..b984a6e190b 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -97,23 +97,10 @@ const struct address_space_operations gfs2_meta_aops = { .releasepage = gfs2_releasepage, }; -/** - * gfs2_meta_sync - Sync all buffers associated with a glock - * @gl: The glock - * - */ - -void gfs2_meta_sync(struct gfs2_glock *gl) -{ - struct address_space *mapping = gfs2_glock2aspace(gl); - int error; - - filemap_fdatawrite(mapping); - error = filemap_fdatawait(mapping); - - if (error) - gfs2_io_error(gl->gl_sbd); -} +const struct address_space_operations gfs2_rgrp_aops = { + .writepage = gfs2_aspace_writepage, + .releasepage = gfs2_releasepage, +}; /** * gfs2_getbuf - Get a buffer with a given address space @@ -134,6 +121,9 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) unsigned long index; unsigned int bufnum; + if (mapping == NULL) + mapping = &sdp->sd_aspace; + shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift; index = blkno >> shift; /* convert block to page */ bufnum = blkno - (index << shift); /* block buf index within page */ @@ -146,7 +136,8 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) yield(); } } else { - page = find_lock_page(mapping, index); + page = find_get_page_flags(mapping, index, + FGP_LOCK|FGP_ACCESSED); if (!page) return NULL; } @@ -163,7 +154,6 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) map_bh(bh, sdp->sd_vfs, blkno); unlock_page(page); - mark_page_accessed(page); page_cache_release(page); return bh; @@ -271,71 +261,32 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) return 0; } -/** - * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer - * @gl: the glock the buffer belongs to - * @bh: The buffer to be attached to - * @meta: Flag to indicate whether its metadata or not - */ - -void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, - int meta) -{ - struct gfs2_bufdata *bd; - - if (meta) - lock_page(bh->b_page); - - if (bh->b_private) { - if (meta) - unlock_page(bh->b_page); - return; - } - - bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL); - bd->bd_bh = bh; - bd->bd_gl = gl; - - if (meta) - lops_init_le(bd, &gfs2_buf_lops); - else - lops_init_le(bd, &gfs2_databuf_lops); - bh->b_private = bd; - - if (meta) - unlock_page(bh->b_page); -} - void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta) { struct address_space *mapping = bh->b_page->mapping; struct gfs2_sbd *sdp = gfs2_mapping2sbd(mapping); struct gfs2_bufdata *bd = bh->b_private; + int was_pinned = 0; if (test_clear_buffer_pinned(bh)) { trace_gfs2_pin(bd, 0); atomic_dec(&sdp->sd_log_pinned); list_del_init(&bd->bd_list); - if (meta) { - gfs2_assert_warn(sdp, sdp->sd_log_num_buf); - sdp->sd_log_num_buf--; + if (meta) tr->tr_num_buf_rm++; - } else { - gfs2_assert_warn(sdp, sdp->sd_log_num_databuf); - sdp->sd_log_num_databuf--; + else tr->tr_num_databuf_rm++; - } tr->tr_touched = 1; + was_pinned = 1; brelse(bh); } if (bd) { spin_lock(&sdp->sd_ail_lock); - if (bd->bd_ail) { - gfs2_remove_from_ail(bd); - bh->b_private = NULL; - bd->bd_bh = NULL; - bd->bd_blkno = bh->b_blocknr; + if (bd->bd_tr) { gfs2_trans_add_revoke(sdp, bd); + } else if (was_pinned) { + bh->b_private = NULL; + kmem_cache_free(gfs2_bufdata_cachep, bd); } spin_unlock(&sdp->sd_ail_lock); } diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index c30973b07a7..ac5d8027d33 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -38,34 +38,30 @@ static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh, } extern const struct address_space_operations gfs2_meta_aops; +extern const struct address_space_operations gfs2_rgrp_aops; static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) { struct inode *inode = mapping->host; if (mapping->a_ops == &gfs2_meta_aops) return (((struct gfs2_glock *)mapping) - 1)->gl_sbd; + else if (mapping->a_ops == &gfs2_rgrp_aops) + return container_of(mapping, struct gfs2_sbd, sd_aspace); else return inode->i_sb->s_fs_info; } -void gfs2_meta_sync(struct gfs2_glock *gl); - -struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); -int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, - int flags, struct buffer_head **bhp); -int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); -struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create); - -void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, - int meta); - -void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, - int meta); - -void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); - -int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, - struct buffer_head **bhp); +extern struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); +extern int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, + struct buffer_head **bhp); +extern int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); +extern struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, + int create); +extern void gfs2_remove_from_journal(struct buffer_head *bh, + struct gfs2_trans *tr, int meta); +extern void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, + struct buffer_head **bhp); static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, struct buffer_head **bhp) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 0e3554edb8f..bc564c0d6d1 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -20,6 +22,7 @@ #include <linux/gfs2_ondisk.h> #include <linux/quotaops.h> #include <linux/lockdep.h> +#include <linux/module.h> #include "gfs2.h" #include "incore.h" @@ -35,6 +38,7 @@ #include "log.h" #include "quota.h" #include "dir.h" +#include "meta_io.h" #include "trace_gfs2.h" #define DO 0 @@ -50,7 +54,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt) { spin_lock_init(>->gt_spin); - gt->gt_quota_simul_sync = 64; gt->gt_quota_warn_period = 10; gt->gt_quota_scale_num = 1; gt->gt_quota_scale_den = 1; @@ -62,6 +65,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt) static struct gfs2_sbd *init_sbd(struct super_block *sb) { struct gfs2_sbd *sdp; + struct address_space *mapping; sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL); if (!sdp) @@ -81,6 +85,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_waitqueue_head(&sdp->sd_glock_wait); atomic_set(&sdp->sd_glock_disposal, 0); init_completion(&sdp->sd_locking_init); + init_completion(&sdp->sd_wdack); spin_lock_init(&sdp->sd_statfs_spin); spin_lock_init(&sdp->sd_rindex_spin); @@ -89,19 +94,32 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) INIT_LIST_HEAD(&sdp->sd_jindex_list); spin_lock_init(&sdp->sd_jindex_spin); mutex_init(&sdp->sd_jindex_mutex); + init_completion(&sdp->sd_journal_ready); INIT_LIST_HEAD(&sdp->sd_quota_list); mutex_init(&sdp->sd_quota_mutex); + mutex_init(&sdp->sd_quota_sync_mutex); init_waitqueue_head(&sdp->sd_quota_wait); INIT_LIST_HEAD(&sdp->sd_trunc_list); spin_lock_init(&sdp->sd_trunc_lock); + spin_lock_init(&sdp->sd_bitmap_lock); + + mapping = &sdp->sd_aspace; + + address_space_init_once(mapping); + mapping->a_ops = &gfs2_rgrp_aops; + mapping->host = sb->s_bdev->bd_inode; + mapping->flags = 0; + mapping_set_gfp_mask(mapping, GFP_NOFS); + mapping->private_data = NULL; + mapping->backing_dev_info = sb->s_bdi; + mapping->writeback_index = 0; spin_lock_init(&sdp->sd_log_lock); atomic_set(&sdp->sd_log_pinned, 0); - INIT_LIST_HEAD(&sdp->sd_log_le_buf); INIT_LIST_HEAD(&sdp->sd_log_le_revoke); - INIT_LIST_HEAD(&sdp->sd_log_le_databuf); INIT_LIST_HEAD(&sdp->sd_log_le_ordered); + spin_lock_init(&sdp->sd_ordered_lock); init_waitqueue_head(&sdp->sd_log_waitq); init_waitqueue_head(&sdp->sd_logd_waitq); @@ -112,10 +130,10 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_rwsem(&sdp->sd_log_flush_lock); atomic_set(&sdp->sd_log_in_flight, 0); init_waitqueue_head(&sdp->sd_log_flush_wait); - - INIT_LIST_HEAD(&sdp->sd_revoke_list); - - mutex_init(&sdp->sd_freeze_lock); + init_waitqueue_head(&sdp->sd_log_frozen_wait); + atomic_set(&sdp->sd_log_freeze, 0); + atomic_set(&sdp->sd_frozen_root, 0); + init_waitqueue_head(&sdp->sd_frozen_root_wait); return sdp; } @@ -139,7 +157,7 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) if (sb->sb_magic != GFS2_MAGIC || sb->sb_type != GFS2_METATYPE_SB) { if (!silent) - printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); + pr_warn("not a GFS2 filesystem\n"); return -EINVAL; } @@ -161,7 +179,7 @@ static void end_bio_io_page(struct bio *bio, int error) if (!error) SetPageUptodate(page); else - printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); + pr_warn("error %d reading superblock\n", error); unlock_page(page); } @@ -216,14 +234,14 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) page = alloc_page(GFP_NOFS); if (unlikely(!page)) - return -ENOBUFS; + return -ENOMEM; ClearPageUptodate(page); ClearPageDirty(page); lock_page(page); bio = bio_alloc(GFP_NOFS, 1); - bio->bi_sector = sector * (sb->s_blocksize >> 9); + bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9); bio->bi_bdev = sb->s_bdev; bio_add_page(bio, page, PAGE_SIZE, 0); @@ -406,8 +424,8 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, goto fail_live; } - error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops, - CREATE, &sdp->sd_trans_gl); + error = gfs2_glock_get(sdp, GFS2_FREEZE_LOCK, &gfs2_freeze_glops, + CREATE, &sdp->sd_freeze_gl); if (error) { fs_err(sdp, "can't create transaction glock: %d\n", error); goto fail_rename; @@ -416,7 +434,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, return 0; fail_trans: - gfs2_glock_put(sdp->sd_trans_gl); + gfs2_glock_put(sdp->sd_freeze_gl); fail_rename: gfs2_glock_put(sdp->sd_rename_gl); fail_live: @@ -504,67 +522,6 @@ out: return ret; } -/** - * map_journal_extents - create a reusable "extent" mapping from all logical - * blocks to all physical blocks for the given journal. This will save - * us time when writing journal blocks. Most journals will have only one - * extent that maps all their logical blocks. That's because gfs2.mkfs - * arranges the journal blocks sequentially to maximize performance. - * So the extent would map the first block for the entire file length. - * However, gfs2_jadd can happen while file activity is happening, so - * those journals may not be sequential. Less likely is the case where - * the users created their own journals by mounting the metafs and - * laying it out. But it's still possible. These journals might have - * several extents. - * - * TODO: This should be done in bigger chunks rather than one block at a time, - * but since it's only done at mount time, I'm not worried about the - * time it takes. - */ -static int map_journal_extents(struct gfs2_sbd *sdp) -{ - struct gfs2_jdesc *jd = sdp->sd_jdesc; - unsigned int lb; - u64 db, prev_db; /* logical block, disk block, prev disk block */ - struct gfs2_inode *ip = GFS2_I(jd->jd_inode); - struct gfs2_journal_extent *jext = NULL; - struct buffer_head bh; - int rc = 0; - - prev_db = 0; - - for (lb = 0; lb < i_size_read(jd->jd_inode) >> sdp->sd_sb.sb_bsize_shift; lb++) { - bh.b_state = 0; - bh.b_blocknr = 0; - bh.b_size = 1 << ip->i_inode.i_blkbits; - rc = gfs2_block_map(jd->jd_inode, lb, &bh, 0); - db = bh.b_blocknr; - if (rc || !db) { - printk(KERN_INFO "GFS2 journal mapping error %d: lb=" - "%u db=%llu\n", rc, lb, (unsigned long long)db); - break; - } - if (!prev_db || db != prev_db + 1) { - jext = kzalloc(sizeof(struct gfs2_journal_extent), - GFP_KERNEL); - if (!jext) { - printk(KERN_INFO "GFS2 error: out of memory " - "mapping journal extents.\n"); - rc = -ENOMEM; - break; - } - jext->dblock = db; - jext->lblock = lb; - jext->blocks = 1; - list_add_tail(&jext->extent_list, &jd->extent_list); - } else { - jext->blocks++; - } - prev_db = db; - } - return rc; -} - static void gfs2_others_may_mount(struct gfs2_sbd *sdp) { char *message = "FIRSTMOUNT=Done"; @@ -623,6 +580,8 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) break; INIT_LIST_HEAD(&jd->extent_list); + INIT_LIST_HEAD(&jd->jd_revoke_list); + INIT_WORK(&jd->jd_work, gfs2_recover_func); jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { @@ -645,6 +604,48 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) return error; } +/** + * check_journal_clean - Make sure a journal is clean for a spectator mount + * @sdp: The GFS2 superblock + * @jd: The journal descriptor + * + * Returns: 0 if the journal is clean or locked, else an error + */ +static int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) +{ + int error; + struct gfs2_holder j_gh; + struct gfs2_log_header_host head; + struct gfs2_inode *ip; + + ip = GFS2_I(jd->jd_inode); + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | + GL_EXACT | GL_NOCACHE, &j_gh); + if (error) { + fs_err(sdp, "Error locking journal for spectator mount.\n"); + return -EPERM; + } + error = gfs2_jdesc_check(jd); + if (error) { + fs_err(sdp, "Error checking journal for spectator mount.\n"); + goto out_unlock; + } + error = gfs2_find_jhead(jd, &head); + if (error) { + fs_err(sdp, "Error parsing journal for spectator mount.\n"); + goto out_unlock; + } + if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { + error = -EPERM; + fs_err(sdp, "jid=%u: Journal is dirty, so the first mounter " + "must not be a spectator.\n", jd->jd_jid); + } + +out_unlock: + gfs2_glock_dq_uninit(&j_gh); + return error; +} + static int init_journal(struct gfs2_sbd *sdp, int undo) { struct inode *master = sdp->sd_master_dir->d_inode; @@ -724,15 +725,22 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5); /* Map the extents for this journal's blocks */ - map_journal_extents(sdp); + gfs2_map_journal_extents(sdp, sdp->sd_jdesc); } trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free)); if (sdp->sd_lockstruct.ls_first) { unsigned int x; for (x = 0; x < sdp->sd_journals; x++) { - error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x), - true); + struct gfs2_jdesc *jd = gfs2_jdesc_find(sdp, x); + + if (sdp->sd_args.ar_spectator) { + error = check_journal_clean(sdp, jd); + if (error) + goto fail_jinode_gh; + continue; + } + error = gfs2_recover_journal(jd, true); if (error) { fs_err(sdp, "error recovering journal %u: %d\n", x, error); @@ -752,7 +760,15 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags); gfs2_glock_dq_uninit(&ji_gh); jindex = 0; - + if (!sdp->sd_args.ar_spectator) { + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, + &sdp->sd_thaw_gh); + if (error) { + fs_err(sdp, "can't acquire freeze glock: %d\n", error); + goto fail_jinode_gh; + } + } + gfs2_glock_dq_uninit(&sdp->sd_thaw_gh); return 0; fail_jinode_gh: @@ -781,6 +797,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) goto fail_qinode; error = init_journal(sdp, undo); + complete_all(&sdp->sd_journal_ready); if (error) goto fail; @@ -906,40 +923,6 @@ fail: return error; } -static int init_threads(struct gfs2_sbd *sdp, int undo) -{ - struct task_struct *p; - int error = 0; - - if (undo) - goto fail_quotad; - - p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); - error = IS_ERR(p); - if (error) { - fs_err(sdp, "can't start logd thread: %d\n", error); - return error; - } - sdp->sd_logd_process = p; - - p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); - error = IS_ERR(p); - if (error) { - fs_err(sdp, "can't start quotad thread: %d\n", error); - goto fail; - } - sdp->sd_quotad_process = p; - - return 0; - - -fail_quotad: - kthread_stop(sdp->sd_quotad_process); -fail: - kthread_stop(sdp->sd_logd_process); - return error; -} - static const match_table_t nolock_tokens = { { Opt_jid, "jid=%d\n", }, { Opt_err, NULL }, @@ -978,7 +961,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) lm = &gfs2_dlm_ops; #endif } else { - printk(KERN_INFO "GFS2: can't find protocol %s\n", proto); + pr_info("can't find protocol %s\n", proto); return -ENOENT; } @@ -1085,7 +1068,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent sdp = init_sbd(sb); if (!sdp) { - printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n"); + pr_warn("can't alloc struct gfs2_sbd\n"); return -ENOMEM; } sdp->sd_args = *args; @@ -1204,15 +1187,11 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent goto fail_per_node; } - error = init_threads(sdp, DO); - if (error) - goto fail_per_node; - if (!(sb->s_flags & MS_RDONLY)) { error = gfs2_make_fs_rw(sdp); if (error) { fs_err(sdp, "can't make FS RW: %d\n", error); - goto fail_threads; + goto fail_per_node; } } @@ -1220,8 +1199,6 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent gfs2_online_uevent(sdp); return 0; -fail_threads: - init_threads(sdp, UNDO); fail_per_node: init_per_node(sdp, UNDO); fail_inodes: @@ -1237,6 +1214,7 @@ fail_sb: fail_locking: init_locking(sdp, &mount_gh, UNDO); fail_lm: + complete_all(&sdp->sd_journal_ready); gfs2_gl_hash_clear(sdp); gfs2_lm_unmount(sdp); fail_debug: @@ -1316,8 +1294,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, if (IS_ERR(s)) goto error_bdev; - if (s->s_root) + if (s->s_root) { + /* + * s_umount nests inside bd_mutex during + * __invalidate_device(). blkdev_put() acquires + * bd_mutex and can't be called under s_umount. Drop + * s_umount temporarily. This is safe as we're + * holding an active reference. + */ + up_write(&s->s_umount); blkdev_put(bdev, mode); + down_write(&s->s_umount); + } memset(&args, 0, sizeof(args)); args.ar_quota = GFS2_QUOTA_DEFAULT; @@ -1329,7 +1317,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, error = gfs2_mount_args(&args, data); if (error) { - printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); + pr_warn("can't parse mount arguments\n"); goto error_super; } @@ -1379,15 +1367,15 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, error = kern_path(dev_name, LOOKUP_FOLLOW, &path); if (error) { - printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", - dev_name, error); + pr_warn("path_lookup on %s returned error %d\n", + dev_name, error); return ERR_PTR(error); } s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags, path.dentry->d_inode->i_sb->s_bdev); path_put(&path); if (IS_ERR(s)) { - printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); + pr_warn("gfs2 mount does not exist\n"); return ERR_CAST(s); } if ((flags ^ s->s_flags) & MS_RDONLY) { @@ -1407,7 +1395,7 @@ static void gfs2_kill_sb(struct super_block *sb) return; } - gfs2_meta_syncfs(sdp); + gfs2_log_flush(sdp, NULL, SYNC_FLUSH); dput(sdp->sd_root_dir); dput(sdp->sd_master_dir); sdp->sd_root_dir = NULL; @@ -1425,6 +1413,7 @@ struct file_system_type gfs2_fs_type = { .kill_sb = gfs2_kill_sb, .owner = THIS_MODULE, }; +MODULE_ALIAS_FS("gfs2"); struct file_system_type gfs2meta_fs_type = { .name = "gfs2meta", @@ -1432,4 +1421,4 @@ struct file_system_type gfs2meta_fs_type = { .mount = gfs2_mount_meta, .owner = THIS_MODULE, }; - +MODULE_ALIAS_FS("gfs2meta"); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index ae55e248c3b..64b29f7f6b4 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -36,6 +36,8 @@ * the quota file, so it is not being constantly read. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sched.h> #include <linux/slab.h> #include <linux/mm.h> @@ -50,6 +52,13 @@ #include <linux/freezer.h> #include <linux/quota.h> #include <linux/dqblk_xfs.h> +#include <linux/lockref.h> +#include <linux/list_lru.h> +#include <linux/rcupdate.h> +#include <linux/rculist_bl.h> +#include <linux/bit_spinlock.h> +#include <linux/jhash.h> +#include <linux/vmalloc.h> #include "gfs2.h" #include "incore.h" @@ -65,39 +74,63 @@ #include "inode.h" #include "util.h" -#define QUOTA_USER 1 -#define QUOTA_GROUP 0 +#define GFS2_QD_HASH_SHIFT 12 +#define GFS2_QD_HASH_SIZE (1 << GFS2_QD_HASH_SHIFT) +#define GFS2_QD_HASH_MASK (GFS2_QD_HASH_SIZE - 1) -struct gfs2_quota_change_host { - u64 qc_change; - u32 qc_flags; /* GFS2_QCF_... */ - u32 qc_id; -}; +/* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */ +/* -> sd_bitmap_lock */ +static DEFINE_SPINLOCK(qd_lock); +struct list_lru gfs2_qd_lru; -static LIST_HEAD(qd_lru_list); -static atomic_t qd_lru_count = ATOMIC_INIT(0); -static DEFINE_SPINLOCK(qd_lru_lock); +static struct hlist_bl_head qd_hash_table[GFS2_QD_HASH_SIZE]; -int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) +static unsigned int gfs2_qd_hash(const struct gfs2_sbd *sdp, + const struct kqid qid) { - struct gfs2_quota_data *qd; - struct gfs2_sbd *sdp; - int nr_to_scan = sc->nr_to_scan; + unsigned int h; - if (nr_to_scan == 0) - goto out; + h = jhash(&sdp, sizeof(struct gfs2_sbd *), 0); + h = jhash(&qid, sizeof(struct kqid), h); - if (!(sc->gfp_mask & __GFP_FS)) - return -1; + return h & GFS2_QD_HASH_MASK; +} + +static inline void spin_lock_bucket(unsigned int hash) +{ + hlist_bl_lock(&qd_hash_table[hash]); +} + +static inline void spin_unlock_bucket(unsigned int hash) +{ + hlist_bl_unlock(&qd_hash_table[hash]); +} - spin_lock(&qd_lru_lock); - while (nr_to_scan && !list_empty(&qd_lru_list)) { - qd = list_entry(qd_lru_list.next, - struct gfs2_quota_data, qd_reclaim); +static void gfs2_qd_dealloc(struct rcu_head *rcu) +{ + struct gfs2_quota_data *qd = container_of(rcu, struct gfs2_quota_data, qd_rcu); + kmem_cache_free(gfs2_quotad_cachep, qd); +} + +static void gfs2_qd_dispose(struct list_head *list) +{ + struct gfs2_quota_data *qd; + struct gfs2_sbd *sdp; + + while (!list_empty(list)) { + qd = list_entry(list->next, struct gfs2_quota_data, qd_lru); sdp = qd->qd_gl->gl_sbd; + list_del(&qd->qd_lru); + /* Free from the filesystem-specific list */ + spin_lock(&qd_lock); list_del(&qd->qd_list); + spin_unlock(&qd_lock); + + spin_lock_bucket(qd->qd_hash); + hlist_bl_del_rcu(&qd->qd_hlist); + spin_unlock_bucket(qd->qd_hash); gfs2_assert_warn(sdp, !qd->qd_change); gfs2_assert_warn(sdp, !qd->qd_slot_count); @@ -107,194 +140,228 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) atomic_dec(&sdp->sd_quota_count); /* Delete it from the common reclaim list */ - list_del_init(&qd->qd_reclaim); - atomic_dec(&qd_lru_count); - spin_unlock(&qd_lru_lock); - kmem_cache_free(gfs2_quotad_cachep, qd); - spin_lock(&qd_lru_lock); - nr_to_scan--; + call_rcu(&qd->qd_rcu, gfs2_qd_dealloc); } - spin_unlock(&qd_lru_lock); +} -out: - return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100; + +static enum lru_status gfs2_qd_isolate(struct list_head *item, spinlock_t *lock, void *arg) +{ + struct list_head *dispose = arg; + struct gfs2_quota_data *qd = list_entry(item, struct gfs2_quota_data, qd_lru); + + if (!spin_trylock(&qd->qd_lockref.lock)) + return LRU_SKIP; + + if (qd->qd_lockref.count == 0) { + lockref_mark_dead(&qd->qd_lockref); + list_move(&qd->qd_lru, dispose); + } + + spin_unlock(&qd->qd_lockref.lock); + return LRU_REMOVED; +} + +static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ + LIST_HEAD(dispose); + unsigned long freed; + + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; + + freed = list_lru_walk_node(&gfs2_qd_lru, sc->nid, gfs2_qd_isolate, + &dispose, &sc->nr_to_scan); + + gfs2_qd_dispose(&dispose); + + return freed; +} + +static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + return vfs_pressure_ratio(list_lru_count_node(&gfs2_qd_lru, sc->nid)); +} + +struct shrinker gfs2_qd_shrinker = { + .count_objects = gfs2_qd_shrink_count, + .scan_objects = gfs2_qd_shrink_scan, + .seeks = DEFAULT_SEEKS, + .flags = SHRINKER_NUMA_AWARE, +}; + + +static u64 qd2index(struct gfs2_quota_data *qd) +{ + struct kqid qid = qd->qd_id; + return (2 * (u64)from_kqid(&init_user_ns, qid)) + + ((qid.type == USRQUOTA) ? 0 : 1); } static u64 qd2offset(struct gfs2_quota_data *qd) { u64 offset; - offset = 2 * (u64)qd->qd_id + !test_bit(QDF_USER, &qd->qd_flags); + offset = qd2index(qd); offset *= sizeof(struct gfs2_quota); return offset; } -static int qd_alloc(struct gfs2_sbd *sdp, int user, u32 id, - struct gfs2_quota_data **qdp) +static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, struct kqid qid) { struct gfs2_quota_data *qd; int error; qd = kmem_cache_zalloc(gfs2_quotad_cachep, GFP_NOFS); if (!qd) - return -ENOMEM; + return NULL; - atomic_set(&qd->qd_count, 1); - qd->qd_id = id; - if (user) - set_bit(QDF_USER, &qd->qd_flags); + qd->qd_sbd = sdp; + qd->qd_lockref.count = 1; + spin_lock_init(&qd->qd_lockref.lock); + qd->qd_id = qid; qd->qd_slot = -1; - INIT_LIST_HEAD(&qd->qd_reclaim); + INIT_LIST_HEAD(&qd->qd_lru); + qd->qd_hash = hash; - error = gfs2_glock_get(sdp, 2 * (u64)id + !user, + error = gfs2_glock_get(sdp, qd2index(qd), &gfs2_quota_glops, CREATE, &qd->qd_gl); if (error) goto fail; - *qdp = qd; - - return 0; + return qd; fail: kmem_cache_free(gfs2_quotad_cachep, qd); - return error; + return NULL; } -static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, - struct gfs2_quota_data **qdp) +static struct gfs2_quota_data *gfs2_qd_search_bucket(unsigned int hash, + const struct gfs2_sbd *sdp, + struct kqid qid) { - struct gfs2_quota_data *qd = NULL, *new_qd = NULL; - int error, found; - - *qdp = NULL; + struct gfs2_quota_data *qd; + struct hlist_bl_node *h; - for (;;) { - found = 0; - spin_lock(&qd_lru_lock); - list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { - if (qd->qd_id == id && - !test_bit(QDF_USER, &qd->qd_flags) == !user) { - if (!atomic_read(&qd->qd_count) && - !list_empty(&qd->qd_reclaim)) { - /* Remove it from reclaim list */ - list_del_init(&qd->qd_reclaim); - atomic_dec(&qd_lru_count); - } - atomic_inc(&qd->qd_count); - found = 1; - break; - } + hlist_bl_for_each_entry_rcu(qd, h, &qd_hash_table[hash], qd_hlist) { + if (!qid_eq(qd->qd_id, qid)) + continue; + if (qd->qd_sbd != sdp) + continue; + if (lockref_get_not_dead(&qd->qd_lockref)) { + list_lru_del(&gfs2_qd_lru, &qd->qd_lru); + return qd; } + } - if (!found) - qd = NULL; + return NULL; +} - if (!qd && new_qd) { - qd = new_qd; - list_add(&qd->qd_list, &sdp->sd_quota_list); - atomic_inc(&sdp->sd_quota_count); - new_qd = NULL; - } - spin_unlock(&qd_lru_lock); +static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, + struct gfs2_quota_data **qdp) +{ + struct gfs2_quota_data *qd, *new_qd; + unsigned int hash = gfs2_qd_hash(sdp, qid); - if (qd) { - if (new_qd) { - gfs2_glock_put(new_qd->qd_gl); - kmem_cache_free(gfs2_quotad_cachep, new_qd); - } - *qdp = qd; - return 0; - } + rcu_read_lock(); + *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid); + rcu_read_unlock(); - error = qd_alloc(sdp, user, id, &new_qd); - if (error) - return error; + if (qd) + return 0; + + new_qd = qd_alloc(hash, sdp, qid); + if (!new_qd) + return -ENOMEM; + + spin_lock(&qd_lock); + spin_lock_bucket(hash); + *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid); + if (qd == NULL) { + *qdp = new_qd; + list_add(&new_qd->qd_list, &sdp->sd_quota_list); + hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]); + atomic_inc(&sdp->sd_quota_count); + } + spin_unlock_bucket(hash); + spin_unlock(&qd_lock); + + if (qd) { + gfs2_glock_put(new_qd->qd_gl); + kmem_cache_free(gfs2_quotad_cachep, new_qd); } + + return 0; } + static void qd_hold(struct gfs2_quota_data *qd) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - gfs2_assert(sdp, atomic_read(&qd->qd_count)); - atomic_inc(&qd->qd_count); + gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref)); + lockref_get(&qd->qd_lockref); } static void qd_put(struct gfs2_quota_data *qd) { - if (atomic_dec_and_lock(&qd->qd_count, &qd_lru_lock)) { - /* Add to the reclaim list */ - list_add_tail(&qd->qd_reclaim, &qd_lru_list); - atomic_inc(&qd_lru_count); - spin_unlock(&qd_lru_lock); - } + if (lockref_put_or_lock(&qd->qd_lockref)) + return; + + qd->qd_lockref.count = 0; + list_lru_add(&gfs2_qd_lru, &qd->qd_lru); + spin_unlock(&qd->qd_lockref.lock); + } static int slot_get(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - unsigned int c, o = 0, b; - unsigned char byte = 0; + struct gfs2_sbd *sdp = qd->qd_sbd; + unsigned int bit; + int error = 0; - spin_lock(&qd_lru_lock); + spin_lock(&sdp->sd_bitmap_lock); + if (qd->qd_slot_count != 0) + goto out; - if (qd->qd_slot_count++) { - spin_unlock(&qd_lru_lock); - return 0; + error = -ENOSPC; + bit = find_first_zero_bit(sdp->sd_quota_bitmap, sdp->sd_quota_slots); + if (bit < sdp->sd_quota_slots) { + set_bit(bit, sdp->sd_quota_bitmap); + qd->qd_slot = bit; + error = 0; +out: + qd->qd_slot_count++; } + spin_unlock(&sdp->sd_bitmap_lock); - for (c = 0; c < sdp->sd_quota_chunks; c++) - for (o = 0; o < PAGE_SIZE; o++) { - byte = sdp->sd_quota_bitmap[c][o]; - if (byte != 0xFF) - goto found; - } - - goto fail; - -found: - for (b = 0; b < 8; b++) - if (!(byte & (1 << b))) - break; - qd->qd_slot = c * (8 * PAGE_SIZE) + o * 8 + b; - - if (qd->qd_slot >= sdp->sd_quota_slots) - goto fail; - - sdp->sd_quota_bitmap[c][o] |= 1 << b; - - spin_unlock(&qd_lru_lock); - - return 0; - -fail: - qd->qd_slot_count--; - spin_unlock(&qd_lru_lock); - return -ENOSPC; + return error; } static void slot_hold(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_sbd; - spin_lock(&qd_lru_lock); + spin_lock(&sdp->sd_bitmap_lock); gfs2_assert(sdp, qd->qd_slot_count); qd->qd_slot_count++; - spin_unlock(&qd_lru_lock); + spin_unlock(&sdp->sd_bitmap_lock); } static void slot_put(struct gfs2_quota_data *qd) { - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; + struct gfs2_sbd *sdp = qd->qd_sbd; - spin_lock(&qd_lru_lock); + spin_lock(&sdp->sd_bitmap_lock); gfs2_assert(sdp, qd->qd_slot_count); if (!--qd->qd_slot_count) { - gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, qd->qd_slot, 0); + BUG_ON(!test_and_clear_bit(qd->qd_slot, sdp->sd_quota_bitmap)); qd->qd_slot = -1; } - spin_unlock(&qd_lru_lock); + spin_unlock(&sdp->sd_bitmap_lock); } static int bh_get(struct gfs2_quota_data *qd) @@ -358,6 +425,24 @@ static void bh_put(struct gfs2_quota_data *qd) mutex_unlock(&sdp->sd_quota_mutex); } +static int qd_check_sync(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd, + u64 *sync_gen) +{ + if (test_bit(QDF_LOCKED, &qd->qd_flags) || + !test_bit(QDF_CHANGE, &qd->qd_flags) || + (sync_gen && (qd->qd_sync_gen >= *sync_gen))) + return 0; + + if (!lockref_get_not_dead(&qd->qd_lockref)) + return 0; + + list_move_tail(&qd->qd_list, &sdp->sd_quota_list); + set_bit(QDF_LOCKED, &qd->qd_flags); + qd->qd_change_sync = qd->qd_change; + slot_hold(qd); + return 1; +} + static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) { struct gfs2_quota_data *qd = NULL; @@ -369,31 +454,18 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) if (sdp->sd_vfs->s_flags & MS_RDONLY) return 0; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) { - if (test_bit(QDF_LOCKED, &qd->qd_flags) || - !test_bit(QDF_CHANGE, &qd->qd_flags) || - qd->qd_sync_gen >= sdp->sd_quota_sync_gen) - continue; - - list_move_tail(&qd->qd_list, &sdp->sd_quota_list); - - set_bit(QDF_LOCKED, &qd->qd_flags); - gfs2_assert_warn(sdp, atomic_read(&qd->qd_count)); - atomic_inc(&qd->qd_count); - qd->qd_change_sync = qd->qd_change; - gfs2_assert_warn(sdp, qd->qd_slot_count); - qd->qd_slot_count++; - found = 1; - - break; + found = qd_check_sync(sdp, qd, &sdp->sd_quota_sync_gen); + if (found) + break; } if (!found) qd = NULL; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); if (qd) { gfs2_assert_warn(sdp, qd->qd_change_sync); @@ -411,43 +483,6 @@ static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp) return 0; } -static int qd_trylock(struct gfs2_quota_data *qd) -{ - struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - - if (sdp->sd_vfs->s_flags & MS_RDONLY) - return 0; - - spin_lock(&qd_lru_lock); - - if (test_bit(QDF_LOCKED, &qd->qd_flags) || - !test_bit(QDF_CHANGE, &qd->qd_flags)) { - spin_unlock(&qd_lru_lock); - return 0; - } - - list_move_tail(&qd->qd_list, &sdp->sd_quota_list); - - set_bit(QDF_LOCKED, &qd->qd_flags); - gfs2_assert_warn(sdp, atomic_read(&qd->qd_count)); - atomic_inc(&qd->qd_count); - qd->qd_change_sync = qd->qd_change; - gfs2_assert_warn(sdp, qd->qd_slot_count); - qd->qd_slot_count++; - - spin_unlock(&qd_lru_lock); - - gfs2_assert_warn(sdp, qd->qd_change_sync); - if (bh_get(qd)) { - clear_bit(QDF_LOCKED, &qd->qd_flags); - slot_put(qd); - qd_put(qd); - return 0; - } - - return 1; -} - static void qd_unlock(struct gfs2_quota_data *qd) { gfs2_assert_warn(qd->qd_gl->gl_sbd, @@ -458,12 +493,12 @@ static void qd_unlock(struct gfs2_quota_data *qd) qd_put(qd); } -static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, +static int qdsb_get(struct gfs2_sbd *sdp, struct kqid qid, struct gfs2_quota_data **qdp) { int error; - error = qd_get(sdp, user, id, qdp); + error = qd_get(sdp, qid, qdp); if (error) return error; @@ -491,7 +526,7 @@ static void qdsb_put(struct gfs2_quota_data *qd) qd_put(qd); } -int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) +int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data **qd; @@ -512,28 +547,30 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; - error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd); + error = qdsb_get(sdp, make_kqid_uid(ip->i_inode.i_uid), qd); if (error) goto out; ip->i_res->rs_qa_qd_num++; qd++; - error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd); + error = qdsb_get(sdp, make_kqid_gid(ip->i_inode.i_gid), qd); if (error) goto out; ip->i_res->rs_qa_qd_num++; qd++; - if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { - error = qdsb_get(sdp, QUOTA_USER, uid, qd); + if (!uid_eq(uid, NO_UID_QUOTA_CHANGE) && + !uid_eq(uid, ip->i_inode.i_uid)) { + error = qdsb_get(sdp, make_kqid_uid(uid), qd); if (error) goto out; ip->i_res->rs_qa_qd_num++; qd++; } - if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) { - error = qdsb_get(sdp, QUOTA_GROUP, gid, qd); + if (!gid_eq(gid, NO_GID_QUOTA_CHANGE) && + !gid_eq(gid, ip->i_inode.i_gid)) { + error = qdsb_get(sdp, make_kqid_gid(gid), qd); if (error) goto out; ip->i_res->rs_qa_qd_num++; @@ -567,18 +604,10 @@ static int sort_qd(const void *a, const void *b) const struct gfs2_quota_data *qd_a = *(const struct gfs2_quota_data **)a; const struct gfs2_quota_data *qd_b = *(const struct gfs2_quota_data **)b; - if (!test_bit(QDF_USER, &qd_a->qd_flags) != - !test_bit(QDF_USER, &qd_b->qd_flags)) { - if (test_bit(QDF_USER, &qd_a->qd_flags)) - return -1; - else - return 1; - } - if (qd_a->qd_id < qd_b->qd_id) + if (qid_lt(qd_a->qd_id, qd_b->qd_id)) return -1; - if (qd_a->qd_id > qd_b->qd_id) + if (qid_lt(qd_b->qd_id, qd_a->qd_id)) return 1; - return 0; } @@ -590,22 +619,22 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) s64 x; mutex_lock(&sdp->sd_quota_mutex); - gfs2_trans_add_bh(ip->i_gl, qd->qd_bh, 1); + gfs2_trans_add_meta(ip->i_gl, qd->qd_bh); if (!test_bit(QDF_CHANGE, &qd->qd_flags)) { qc->qc_change = 0; qc->qc_flags = 0; - if (test_bit(QDF_USER, &qd->qd_flags)) + if (qd->qd_id.type == USRQUOTA) qc->qc_flags = cpu_to_be32(GFS2_QCF_USER); - qc->qc_id = cpu_to_be32(qd->qd_id); + qc->qc_id = cpu_to_be32(from_kqid(&init_user_ns, qd->qd_id)); } x = be64_to_cpu(qc->qc_change) + change; qc->qc_change = cpu_to_be64(x); - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); qd->qd_change = x; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); if (!x) { gfs2_assert_warn(sdp, test_bit(QDF_CHANGE, &qd->qd_flags)); @@ -649,7 +678,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, struct buffer_head *bh; struct page *page; void *kaddr, *ptr; - struct gfs2_quota q, *qp; + struct gfs2_quota q; int err, nbytes; u64 size; @@ -665,28 +694,25 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, return err; err = -EIO; - qp = &q; - qp->qu_value = be64_to_cpu(qp->qu_value); - qp->qu_value += change; - qp->qu_value = cpu_to_be64(qp->qu_value); - qd->qd_qb.qb_value = qp->qu_value; + be64_add_cpu(&q.qu_value, change); + qd->qd_qb.qb_value = q.qu_value; if (fdq) { if (fdq->d_fieldmask & FS_DQ_BSOFT) { - qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_warn = qp->qu_warn; + q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_warn = q.qu_warn; } if (fdq->d_fieldmask & FS_DQ_BHARD) { - qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_limit = qp->qu_limit; + q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_limit = q.qu_limit; } if (fdq->d_fieldmask & FS_DQ_BCOUNT) { - qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); - qd->qd_qb.qb_value = qp->qu_value; + q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); + qd->qd_qb.qb_value = q.qu_value; } } /* Write the quota into the quota file on disk */ - ptr = qp; + ptr = &q; nbytes = sizeof(struct gfs2_quota); get_a_page: page = find_or_create_page(mapping, index, GFP_NOFS); @@ -726,7 +752,7 @@ get_a_page: goto unlock_out; } - gfs2_trans_add_bh(ip->i_gl, bh, 0); + gfs2_trans_add_data(ip->i_gl, bh); kaddr = kmap_atomic(page); if (offset + sizeof(struct gfs2_quota) > PAGE_CACHE_SIZE) @@ -752,6 +778,7 @@ get_a_page: i_size_write(inode, size); inode->i_mtime = inode->i_atime = CURRENT_TIME; mark_inode_dirty(inode); + set_bit(QDF_REFRESH, &qd->qd_flags); return 0; unlock_out: @@ -764,6 +791,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) { struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); + struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks, ind_blocks; struct gfs2_holder *ghs, i_gh; unsigned int qx, x; @@ -816,7 +844,8 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; reserved = 1 + (nalloc * (data_blocks + ind_blocks)); - error = gfs2_inplace_reserve(ip, reserved, 0); + ap.target = reserved; + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_alloc; @@ -851,7 +880,7 @@ out: gfs2_glock_dq_uninit(&ghs[qx]); mutex_unlock(&ip->i_inode.i_mutex); kfree(ghs); - gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl); + gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl, NORMAL_FLUSH); return error; } @@ -925,7 +954,7 @@ fail: return error; } -int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) +int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data *qd; @@ -975,9 +1004,9 @@ static int need_sync(struct gfs2_quota_data *qd) if (!qd->qd_qb.qb_limit) return 0; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); value = qd->qd_change; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); spin_lock(>->gt_spin); num = gt->gt_quota_scale_num; @@ -1002,9 +1031,11 @@ static int need_sync(struct gfs2_quota_data *qd) void gfs2_quota_unlock(struct gfs2_inode *ip) { + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data *qda[4]; unsigned int count = 0; unsigned int x; + int found; if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) goto out; @@ -1017,9 +1048,25 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) sync = need_sync(qd); gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]); + if (!sync) + continue; + + spin_lock(&qd_lock); + found = qd_check_sync(sdp, qd, NULL); + spin_unlock(&qd_lock); + + if (!found) + continue; + + gfs2_assert_warn(sdp, qd->qd_change_sync); + if (bh_get(qd)) { + clear_bit(QDF_LOCKED, &qd->qd_flags); + slot_put(qd); + qd_put(qd); + continue; + } - if (sync && qd_trylock(qd)) - qda[count++] = qd; + qda[count++] = qd; } if (count) { @@ -1038,15 +1085,15 @@ static int print_message(struct gfs2_quota_data *qd, char *type) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n", - sdp->sd_fsname, type, - (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group", - qd->qd_id); + fs_info(sdp, "quota %s for %s %u\n", + type, + (qd->qd_id.type == USRQUOTA) ? "user" : "group", + from_kqid(&init_user_ns, qd->qd_id)); return 0; } -int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) +int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_quota_data *qd; @@ -1063,21 +1110,18 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { qd = ip->i_res->rs_qa_qd[x]; - if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || - (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags)))) + if (!(qid_eq(qd->qd_id, make_kqid_uid(uid)) || + qid_eq(qd->qd_id, make_kqid_gid(gid)))) continue; value = (s64)be64_to_cpu(qd->qd_qb.qb_value); - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); value += qd->qd_change; - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { print_message(qd, "exceeded"); - quota_send_warning(make_kqid(&init_user_ns, - test_bit(QDF_USER, &qd->qd_flags) ? - USRQUOTA : GRPQUOTA, - qd->qd_id), + quota_send_warning(qd->qd_id, sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); error = -EDQUOT; @@ -1087,10 +1131,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) time_after_eq(jiffies, qd->qd_last_warn + gfs2_tune_get(sdp, gt_quota_warn_period) * HZ)) { - quota_send_warning(make_kqid(&init_user_ns, - test_bit(QDF_USER, &qd->qd_flags) ? - USRQUOTA : GRPQUOTA, - qd->qd_id), + quota_send_warning(qd->qd_id, sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); error = print_message(qd, "warning"); qd->qd_last_warn = jiffies; @@ -1101,7 +1142,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) } void gfs2_quota_change(struct gfs2_inode *ip, s64 change, - u32 uid, u32 gid) + kuid_t uid, kgid_t gid) { struct gfs2_quota_data *qd; unsigned int x; @@ -1114,8 +1155,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) { qd = ip->i_res->rs_qa_qd[x]; - if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || - (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { + if (qid_eq(qd->qd_id, make_kqid_uid(uid)) || + qid_eq(qd->qd_id, make_kqid_gid(gid))) { do_qc(qd, change); } } @@ -1125,17 +1166,18 @@ int gfs2_quota_sync(struct super_block *sb, int type) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_data **qda; - unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync); + unsigned int max_qd = PAGE_SIZE/sizeof(struct gfs2_holder); unsigned int num_qd; unsigned int x; int error = 0; - sdp->sd_quota_sync_gen++; - qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL); if (!qda) return -ENOMEM; + mutex_lock(&sdp->sd_quota_sync_mutex); + sdp->sd_quota_sync_gen++; + do { num_qd = 0; @@ -1160,23 +1202,19 @@ int gfs2_quota_sync(struct super_block *sb, int type) } } while (!error && num_qd == max_qd); + mutex_unlock(&sdp->sd_quota_sync_mutex); kfree(qda); return error; } -static int gfs2_quota_sync_timeo(struct super_block *sb, int type) -{ - return gfs2_quota_sync(sb, type); -} - -int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) +int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid) { struct gfs2_quota_data *qd; struct gfs2_holder q_gh; int error; - error = qd_get(sdp, user, id, &qd); + error = qd_get(sdp, qid, &qd); if (error) return error; @@ -1188,15 +1226,6 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) return error; } -static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf) -{ - const struct gfs2_quota_change *str = buf; - - qc->qc_change = be64_to_cpu(str->qc_change); - qc->qc_flags = be32_to_cpu(str->qc_flags); - qc->qc_id = be32_to_cpu(str->qc_id); -} - int gfs2_quota_init(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); @@ -1204,6 +1233,8 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift; unsigned int x, slot = 0; unsigned int found = 0; + unsigned int hash; + unsigned int bm_size; u64 dblock; u32 extlen = 0; int error; @@ -1212,23 +1243,19 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) return -EIO; sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; - sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); - + bm_size = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * sizeof(unsigned long)); + bm_size *= sizeof(unsigned long); error = -ENOMEM; - - sdp->sd_quota_bitmap = kcalloc(sdp->sd_quota_chunks, - sizeof(unsigned char *), GFP_NOFS); + sdp->sd_quota_bitmap = kzalloc(bm_size, GFP_NOFS | __GFP_NOWARN); + if (sdp->sd_quota_bitmap == NULL) + sdp->sd_quota_bitmap = __vmalloc(bm_size, GFP_NOFS | + __GFP_ZERO, PAGE_KERNEL); if (!sdp->sd_quota_bitmap) return error; - for (x = 0; x < sdp->sd_quota_chunks; x++) { - sdp->sd_quota_bitmap[x] = kzalloc(PAGE_SIZE, GFP_NOFS); - if (!sdp->sd_quota_bitmap[x]) - goto fail; - } - for (x = 0; x < blocks; x++) { struct buffer_head *bh; + const struct gfs2_quota_change *qc; unsigned int y; if (!extlen) { @@ -1246,34 +1273,41 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) goto fail; } + qc = (const struct gfs2_quota_change *)(bh->b_data + sizeof(struct gfs2_meta_header)); for (y = 0; y < sdp->sd_qc_per_block && slot < sdp->sd_quota_slots; y++, slot++) { - struct gfs2_quota_change_host qc; struct gfs2_quota_data *qd; - - gfs2_quota_change_in(&qc, bh->b_data + - sizeof(struct gfs2_meta_header) + - y * sizeof(struct gfs2_quota_change)); - if (!qc.qc_change) + s64 qc_change = be64_to_cpu(qc->qc_change); + u32 qc_flags = be32_to_cpu(qc->qc_flags); + enum quota_type qtype = (qc_flags & GFS2_QCF_USER) ? + USRQUOTA : GRPQUOTA; + struct kqid qc_id = make_kqid(&init_user_ns, qtype, + be32_to_cpu(qc->qc_id)); + qc++; + if (!qc_change) continue; - error = qd_alloc(sdp, (qc.qc_flags & GFS2_QCF_USER), - qc.qc_id, &qd); - if (error) { + hash = gfs2_qd_hash(sdp, qc_id); + qd = qd_alloc(hash, sdp, qc_id); + if (qd == NULL) { brelse(bh); goto fail; } set_bit(QDF_CHANGE, &qd->qd_flags); - qd->qd_change = qc.qc_change; + qd->qd_change = qc_change; qd->qd_slot = slot; qd->qd_slot_count = 1; - spin_lock(&qd_lru_lock); - gfs2_icbit_munge(sdp, sdp->sd_quota_bitmap, slot, 1); + spin_lock(&qd_lock); + BUG_ON(test_and_set_bit(slot, sdp->sd_quota_bitmap)); list_add(&qd->qd_list, &sdp->sd_quota_list); atomic_inc(&sdp->sd_quota_count); - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); + + spin_lock_bucket(hash); + hlist_bl_add_head_rcu(&qd->qd_hlist, &qd_hash_table[hash]); + spin_unlock_bucket(hash); found++; } @@ -1297,51 +1331,41 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp) { struct list_head *head = &sdp->sd_quota_list; struct gfs2_quota_data *qd; - unsigned int x; - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); while (!list_empty(head)) { qd = list_entry(head->prev, struct gfs2_quota_data, qd_list); - if (atomic_read(&qd->qd_count) > 1 || - (atomic_read(&qd->qd_count) && - !test_bit(QDF_CHANGE, &qd->qd_flags))) { - list_move(&qd->qd_list, head); - spin_unlock(&qd_lru_lock); - schedule(); - spin_lock(&qd_lru_lock); - continue; - } - list_del(&qd->qd_list); + /* Also remove if this qd exists in the reclaim list */ - if (!list_empty(&qd->qd_reclaim)) { - list_del_init(&qd->qd_reclaim); - atomic_dec(&qd_lru_count); - } + list_lru_del(&gfs2_qd_lru, &qd->qd_lru); atomic_dec(&sdp->sd_quota_count); - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); + + spin_lock_bucket(qd->qd_hash); + hlist_bl_del_rcu(&qd->qd_hlist); + spin_unlock_bucket(qd->qd_hash); - if (!atomic_read(&qd->qd_count)) { - gfs2_assert_warn(sdp, !qd->qd_change); - gfs2_assert_warn(sdp, !qd->qd_slot_count); - } else - gfs2_assert_warn(sdp, qd->qd_slot_count == 1); + gfs2_assert_warn(sdp, !qd->qd_change); + gfs2_assert_warn(sdp, !qd->qd_slot_count); gfs2_assert_warn(sdp, !qd->qd_bh_count); gfs2_glock_put(qd->qd_gl); - kmem_cache_free(gfs2_quotad_cachep, qd); + call_rcu(&qd->qd_rcu, gfs2_qd_dealloc); - spin_lock(&qd_lru_lock); + spin_lock(&qd_lock); } - spin_unlock(&qd_lru_lock); + spin_unlock(&qd_lock); gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count)); if (sdp->sd_quota_bitmap) { - for (x = 0; x < sdp->sd_quota_chunks; x++) - kfree(sdp->sd_quota_bitmap[x]); - kfree(sdp->sd_quota_bitmap); + if (is_vmalloc_addr(sdp->sd_quota_bitmap)) + vfree(sdp->sd_quota_bitmap); + else + kfree(sdp->sd_quota_bitmap); + sdp->sd_quota_bitmap = NULL; } } @@ -1424,7 +1448,7 @@ int gfs2_quotad(void *data) &tune->gt_statfs_quantum); /* Update quota file */ - quotad_check_timeo(sdp, "sync", gfs2_quota_sync_timeo, t, + quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, "ad_timeo, &tune->gt_quota_quantum); /* Check for & recover partially truncated inodes */ @@ -1473,7 +1497,7 @@ static int gfs2_quota_get_xstate(struct super_block *sb, } fqs->qs_uquota.qfs_nextents = 1; /* unsupported */ fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */ - fqs->qs_incoredqs = atomic_read(&qd_lru_count); + fqs->qs_incoredqs = list_lru_count(&gfs2_qd_lru); return 0; } @@ -1485,21 +1509,17 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, struct gfs2_quota_data *qd; struct gfs2_holder q_gh; int error; - int type; memset(fdq, 0, sizeof(struct fs_disk_quota)); if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return -ESRCH; /* Crazy XFS error code */ - if (qid.type == USRQUOTA) - type = QUOTA_USER; - else if (qid.type == GRPQUOTA) - type = QUOTA_GROUP; - else + if ((qid.type != USRQUOTA) && + (qid.type != GRPQUOTA)) return -EINVAL; - error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd); + error = qd_get(sdp, qid, &qd); if (error) return error; error = do_glock(qd, FORCE, &q_gh); @@ -1508,8 +1528,8 @@ static int gfs2_get_dqblk(struct super_block *sb, struct kqid qid, qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lksb.sb_lvbptr; fdq->d_version = FS_DQUOT_VERSION; - fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; - fdq->d_id = from_kqid(&init_user_ns, qid); + fdq->d_flags = (qid.type == USRQUOTA) ? FS_USER_QUOTA : FS_GROUP_QUOTA; + fdq->d_id = from_kqid_munged(current_user_ns(), qid); fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift; fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift; fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift; @@ -1535,32 +1555,18 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, int alloc_required; loff_t offset; int error; - int type; if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return -ESRCH; /* Crazy XFS error code */ - switch(qid.type) { - case USRQUOTA: - type = QUOTA_USER; - if (fdq->d_flags != FS_USER_QUOTA) - return -EINVAL; - break; - case GRPQUOTA: - type = QUOTA_GROUP; - if (fdq->d_flags != FS_GROUP_QUOTA) - return -EINVAL; - break; - default: + if ((qid.type != USRQUOTA) && + (qid.type != GRPQUOTA)) return -EINVAL; - } if (fdq->d_fieldmask & ~GFS2_FIELDMASK) return -EINVAL; - if (fdq->d_id != from_kqid(&init_user_ns, qid)) - return -EINVAL; - error = qd_get(sdp, type, from_kqid(&init_user_ns, qid), &qd); + error = qd_get(sdp, qid, &qd); if (error) return error; @@ -1602,10 +1608,12 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid, if (gfs2_is_stuffed(ip)) alloc_required = 1; if (alloc_required) { + struct gfs2_alloc_parms ap = { .aflags = 0, }; gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), &data_blocks, &ind_blocks); blocks = 1 + data_blocks + ind_blocks; - error = gfs2_inplace_reserve(ip, blocks, 0); + ap.target = blocks; + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_i; blocks += gfs2_rg_blocks(ip, blocks); @@ -1641,3 +1649,11 @@ const struct quotactl_ops gfs2_quotactl_ops = { .get_dqblk = gfs2_get_dqblk, .set_dqblk = gfs2_set_dqblk, }; + +void __init gfs2_quota_hash_init(void) +{ + unsigned i; + + for(i = 0; i < GFS2_QD_HASH_SIZE; i++) + INIT_HLIST_BL_HEAD(&qd_hash_table[i]); +} diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index f25d98b8790..55d506eb3c4 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -10,24 +10,26 @@ #ifndef __QUOTA_DOT_H__ #define __QUOTA_DOT_H__ +#include <linux/list_lru.h> + struct gfs2_inode; struct gfs2_sbd; -struct shrink_control; -#define NO_QUOTA_CHANGE ((u32)-1) +#define NO_UID_QUOTA_CHANGE INVALID_UID +#define NO_GID_QUOTA_CHANGE INVALID_GID -extern int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid); +extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); extern void gfs2_quota_unhold(struct gfs2_inode *ip); -extern int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid); +extern int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); extern void gfs2_quota_unlock(struct gfs2_inode *ip); -extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid); +extern int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid); extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, - u32 uid, u32 gid); + kuid_t uid, kgid_t gid); extern int gfs2_quota_sync(struct super_block *sb, int type); -extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); +extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid); extern int gfs2_quota_init(struct gfs2_sbd *sdp); extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp); @@ -41,7 +43,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) int ret; if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; - ret = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (ret) return ret; if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) @@ -52,8 +54,9 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) return ret; } -extern int gfs2_shrink_qd_memory(struct shrinker *shrink, - struct shrink_control *sc); extern const struct quotactl_ops gfs2_quotactl_ops; +extern struct shrinker gfs2_qd_shrinker; +extern struct list_lru gfs2_qd_lru; +extern void __init gfs2_quota_hash_init(void); #endif /* __QUOTA_DOT_H__ */ diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 963b2d75200..94555d4c569 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -52,9 +52,9 @@ int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, return error; } -int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) +int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where) { - struct list_head *head = &sdp->sd_revoke_list; + struct list_head *head = &jd->jd_revoke_list; struct gfs2_revoke_replay *rr; int found = 0; @@ -81,13 +81,13 @@ int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) return 1; } -int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) +int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where) { struct gfs2_revoke_replay *rr; int wrap, a, b, revoke; int found = 0; - list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) { + list_for_each_entry(rr, &jd->jd_revoke_list, rr_list) { if (rr->rr_blkno == blkno) { found = 1; break; @@ -97,17 +97,17 @@ int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) if (!found) return 0; - wrap = (rr->rr_where < sdp->sd_replay_tail); - a = (sdp->sd_replay_tail < where); + wrap = (rr->rr_where < jd->jd_replay_tail); + a = (jd->jd_replay_tail < where); b = (where < rr->rr_where); revoke = (wrap) ? (a || b) : (a && b); return revoke; } -void gfs2_revoke_clean(struct gfs2_sbd *sdp) +void gfs2_revoke_clean(struct gfs2_jdesc *jd) { - struct list_head *head = &sdp->sd_revoke_list; + struct list_head *head = &jd->jd_revoke_list; struct gfs2_revoke_replay *rr; while (!list_empty(head)) { @@ -454,7 +454,7 @@ void gfs2_recover_func(struct work_struct *work) struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct gfs2_log_header_host head; - struct gfs2_holder j_gh, ji_gh, t_gh; + struct gfs2_holder j_gh, ji_gh, thaw_gh; unsigned long t; int ro = 0; unsigned int pass; @@ -508,11 +508,11 @@ void gfs2_recover_func(struct work_struct *work) t = jiffies; - /* Acquire a shared hold on the transaction lock */ + /* Acquire a shared hold on the freeze lock */ - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | LM_FLAG_PRIORITY | - GL_NOCACHE, &t_gh); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, + LM_FLAG_NOEXP | LM_FLAG_PRIORITY, + &thaw_gh); if (error) goto fail_gunlock_ji; @@ -538,7 +538,7 @@ void gfs2_recover_func(struct work_struct *work) fs_warn(sdp, "jid=%u: Can't replay: read-only block " "device\n", jd->jd_jid); error = -EROFS; - goto fail_gunlock_tr; + goto fail_gunlock_thaw; } fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid); @@ -549,14 +549,14 @@ void gfs2_recover_func(struct work_struct *work) head.lh_blkno, pass); lops_after_scan(jd, error, pass); if (error) - goto fail_gunlock_tr; + goto fail_gunlock_thaw; } error = clean_journal(jd, &head); if (error) - goto fail_gunlock_tr; + goto fail_gunlock_thaw; - gfs2_glock_dq_uninit(&t_gh); + gfs2_glock_dq_uninit(&thaw_gh); t = DIV_ROUND_UP(jiffies - t, HZ); fs_info(sdp, "jid=%u: Journal replayed in %lus\n", jd->jd_jid, t); @@ -572,8 +572,8 @@ void gfs2_recover_func(struct work_struct *work) fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); goto done; -fail_gunlock_tr: - gfs2_glock_dq_uninit(&t_gh); +fail_gunlock_thaw: + gfs2_glock_dq_uninit(&thaw_gh); fail_gunlock_ji: if (jlocked) { gfs2_glock_dq_uninit(&ji_gh); @@ -587,7 +587,7 @@ fail: gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); done: clear_bit(JDF_RECOVERY, &jd->jd_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&jd->jd_flags, JDF_RECOVERY); } diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 2226136c764..6142836cce9 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -23,9 +23,9 @@ static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk) extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, struct buffer_head **bh); -extern int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); -extern int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); -extern void gfs2_revoke_clean(struct gfs2_sbd *sdp); +extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); +extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); +extern void gfs2_revoke_clean(struct gfs2_jdesc *jd); extern int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b7eff078fe9..f4cb9c0d6bb 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> @@ -57,6 +59,11 @@ * 3 = Used (metadata) */ +struct gfs2_extent { + struct gfs2_rbm rbm; + u32 len; +}; + static const char valid_change[16] = { /* current */ /* n */ 0, 1, 1, 1, @@ -65,8 +72,9 @@ static const char valid_change[16] = { 1, 0, 0, 0 }; -static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, - const struct gfs2_inode *ip, bool nowrap); +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, + const struct gfs2_inode *ip, bool nowrap, + const struct gfs2_alloc_parms *ap); /** @@ -81,32 +89,32 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, unsigned char new_state) { unsigned char *byte1, *byte2, *end, cur_state; - unsigned int buflen = rbm->bi->bi_len; + struct gfs2_bitmap *bi = rbm_bi(rbm); + unsigned int buflen = bi->bi_len; const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); - end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; + byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY); + end = bi->bi_bh->b_data + bi->bi_offset + buflen; BUG_ON(byte1 >= end); cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; if (unlikely(!valid_change[new_state * 4 + cur_state])) { - printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " - "new_state=%d\n", rbm->offset, cur_state, new_state); - printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", - (unsigned long long)rbm->rgd->rd_addr, - rbm->bi->bi_start); - printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", - rbm->bi->bi_offset, rbm->bi->bi_len); + pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n", + rbm->offset, cur_state, new_state); + pr_warn("rgrp=0x%llx bi_start=0x%x\n", + (unsigned long long)rbm->rgd->rd_addr, bi->bi_start); + pr_warn("bi_offset=0x%x bi_len=0x%x\n", + bi->bi_offset, bi->bi_len); dump_stack(); gfs2_consist_rgrpd(rbm->rgd); return; } *byte1 ^= (cur_state ^ new_state) << bit; - if (do_clone && rbm->bi->bi_clone) { - byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); + if (do_clone && bi->bi_clone) { + byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY); cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; *byte2 ^= (cur_state ^ new_state) << bit; } @@ -121,7 +129,8 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) { - const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; + struct gfs2_bitmap *bi = rbm_bi(rbm); + const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset; const u8 *byte; unsigned int bit; @@ -252,29 +261,53 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) { u64 rblock = block - rbm->rgd->rd_data0; - u32 x; if (WARN_ON_ONCE(rblock > UINT_MAX)) return -EINVAL; if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) return -E2BIG; - rbm->bi = rbm->rgd->rd_bits; + rbm->bii = 0; rbm->offset = (u32)(rblock); /* Check if the block is within the first block */ - if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) + if (rbm->offset < rbm_bi(rbm)->bi_blocks) return 0; /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ rbm->offset += (sizeof(struct gfs2_rgrp) - sizeof(struct gfs2_meta_header)) * GFS2_NBBY; - x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; - rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; - rbm->bi += x; + rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; + rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; return 0; } /** + * gfs2_rbm_incr - increment an rbm structure + * @rbm: The rbm with rgd already set correctly + * + * This function takes an existing rbm structure and increments it to the next + * viable block offset. + * + * Returns: If incrementing the offset would cause the rbm to go past the + * end of the rgrp, true is returned, otherwise false. + * + */ + +static bool gfs2_rbm_incr(struct gfs2_rbm *rbm) +{ + if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */ + rbm->offset++; + return false; + } + if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */ + return true; + + rbm->offset = 0; + rbm->bii++; + return false; +} + +/** * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned * @rbm: Position to search (value/result) * @n_unaligned: Number of unaligned blocks to check @@ -285,7 +318,6 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) { - u64 block; u32 n; u8 res; @@ -296,8 +328,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le (*len)--; if (*len == 0) return true; - block = gfs2_rbm_to_block(rbm); - if (gfs2_rbm_from_block(rbm, block + 1)) + if (gfs2_rbm_incr(rbm)) return true; } @@ -306,7 +337,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le /** * gfs2_free_extlen - Return extent length of free blocks - * @rbm: Starting position + * @rrbm: Starting position * @len: Max length to check * * Starting at the block specified by the rbm, see how many free blocks @@ -328,6 +359,7 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) u32 chunk_size; u8 *ptr, *start, *end; u64 block; + struct gfs2_bitmap *bi; if (n_unaligned && gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) @@ -336,11 +368,12 @@ static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) n_unaligned = len & 3; /* Start is now byte aligned */ while (len > 3) { - start = rbm.bi->bi_bh->b_data; - if (rbm.bi->bi_clone) - start = rbm.bi->bi_clone; - end = start + rbm.bi->bi_bh->b_size; - start += rbm.bi->bi_offset; + bi = rbm_bi(&rbm); + start = bi->bi_bh->b_data; + if (bi->bi_clone) + start = bi->bi_clone; + end = start + bi->bi_bh->b_size; + start += bi->bi_offset; BUG_ON(rbm.offset & 3); start += (rbm.offset / GFS2_NBBY); bytes = min_t(u32, len / GFS2_NBBY, (end - start)); @@ -576,7 +609,7 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) RB_CLEAR_NODE(&ip->i_res->rs_node); out: up_write(&ip->i_rw_mutex); - return 0; + return error; } static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) @@ -592,7 +625,7 @@ static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) +static void __rs_deltree(struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; @@ -605,12 +638,18 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) RB_CLEAR_NODE(&rs->rs_node); if (rs->rs_free) { - /* return reserved blocks to the rgrp and the ip */ + struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm); + + /* return reserved blocks to the rgrp */ BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; + /* The rgrp extent failure point is likely not to increase; + it will only do so if the freed blocks are somehow + contiguous with a span of free blocks that follows. Still, + it will force the number to be recalculated later. */ + rgd->rd_extfail_pt += rs->rs_free; rs->rs_free = 0; - clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); - smp_mb__after_clear_bit(); + clear_bit(GBF_FULL, &bi->bi_flags); } } @@ -619,14 +658,14 @@ static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) +void gfs2_rs_deltree(struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; rgd = rs->rs_rbm.rgd; if (rgd) { spin_lock(&rgd->rd_rsspin); - __rs_deltree(ip, rs); + __rs_deltree(rs); spin_unlock(&rgd->rd_rsspin); } } @@ -634,13 +673,14 @@ void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) /** * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation + * @wcount: The inode's write count, or NULL * */ -void gfs2_rs_delete(struct gfs2_inode *ip) +void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) { down_write(&ip->i_rw_mutex); - if (ip->i_res) { - gfs2_rs_deltree(ip, ip->i_res); + if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) { + gfs2_rs_deltree(ip->i_res); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); ip->i_res = NULL; @@ -664,7 +704,7 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd) spin_lock(&rgd->rd_rsspin); while ((n = rb_first(&rgd->rd_rstree))) { rs = rb_entry(n, struct gfs2_blkreserv, rs_node); - __rs_deltree(NULL, rs); + __rs_deltree(rs); } spin_unlock(&rgd->rd_rsspin); } @@ -698,11 +738,11 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) { - printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); - printk(KERN_INFO " ri_length = %u\n", rgd->rd_length); - printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); - printk(KERN_INFO " ri_data = %u\n", rgd->rd_data); - printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes); + pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); + pr_info("ri_length = %u\n", rgd->rd_length); + pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); + pr_info("ri_data = %u\n", rgd->rd_data); + pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes); } /** @@ -741,18 +781,21 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) bi->bi_offset = sizeof(struct gfs2_rgrp); bi->bi_start = 0; bi->bi_len = bytes; + bi->bi_blocks = bytes * GFS2_NBBY; /* header block */ } else if (x == 0) { bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp); bi->bi_offset = sizeof(struct gfs2_rgrp); bi->bi_start = 0; bi->bi_len = bytes; + bi->bi_blocks = bytes * GFS2_NBBY; /* last block */ } else if (x + 1 == length) { bytes = bytes_left; bi->bi_offset = sizeof(struct gfs2_meta_header); bi->bi_start = rgd->rd_bitbytes - bytes_left; bi->bi_len = bytes; + bi->bi_blocks = bytes * GFS2_NBBY; /* other blocks */ } else { bytes = sdp->sd_sb.sb_bsize - @@ -760,6 +803,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) bi->bi_offset = sizeof(struct gfs2_meta_header); bi->bi_start = rgd->rd_bitbytes - bytes_left; bi->bi_len = bytes; + bi->bi_blocks = bytes * GFS2_NBBY; } bytes_left -= bytes; @@ -844,6 +888,7 @@ static int rgd_insert(struct gfs2_rgrpd *rgd) static int read_rindex_entry(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + const unsigned bsize = sdp->sd_sb.sb_bsize; loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); struct gfs2_rindex buf; int error; @@ -881,6 +926,8 @@ static int read_rindex_entry(struct gfs2_inode *ip) goto fail; rgd->rd_gl->gl_object = rgd; + rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize; + rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1; rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; rgd->rd_flags &= ~GFS2_RDF_UPTODATE; if (rgd->rd_data > sdp->sd_max_rg_data) @@ -1057,7 +1104,7 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd) * Returns: errno */ -int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) +static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) { struct gfs2_sbd *sdp = rgd->rd_sbd; struct gfs2_glock *gl = rgd->rd_gl; @@ -1094,8 +1141,10 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); rgd->rd_free_clone = rgd->rd_free; + /* max out the rgrp allocation failure point */ + rgd->rd_extfail_pt = rgd->rd_free; } - if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { + if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); @@ -1122,14 +1171,14 @@ fail: return error; } -int update_rgrp_lvb(struct gfs2_rgrpd *rgd) +static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) { u32 rl_flags; if (rgd->rd_flags & GFS2_RDF_UPTODATE) return 0; - if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) + if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) return gfs2_rgrp_bh_get(rgd); rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); @@ -1152,7 +1201,7 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh) if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) return 0; - return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object); + return gfs2_rgrp_bh_get(rgd); } /** @@ -1181,12 +1230,9 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed) { struct super_block *sb = sdp->sd_vfs; - struct block_device *bdev = sb->s_bdev; - const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / - bdev_logical_block_size(sb->s_bdev); u64 blk; sector_t start = 0; - sector_t nr_sects = 0; + sector_t nr_blks = 0; int rv; unsigned int x; u32 trimmed = 0; @@ -1206,35 +1252,34 @@ int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, if (diff == 0) continue; blk = offset + ((bi->bi_start + x) * GFS2_NBBY); - blk *= sects_per_blk; /* convert to sectors */ while(diff) { if (diff & 1) { - if (nr_sects == 0) + if (nr_blks == 0) goto start_new_extent; - if ((start + nr_sects) != blk) { - if (nr_sects >= minlen) { - rv = blkdev_issue_discard(bdev, - start, nr_sects, + if ((start + nr_blks) != blk) { + if (nr_blks >= minlen) { + rv = sb_issue_discard(sb, + start, nr_blks, GFP_NOFS, 0); if (rv) goto fail; - trimmed += nr_sects; + trimmed += nr_blks; } - nr_sects = 0; + nr_blks = 0; start_new_extent: start = blk; } - nr_sects += sects_per_blk; + nr_blks++; } diff >>= 2; - blk += sects_per_blk; + blk++; } } - if (nr_sects >= minlen) { - rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); + if (nr_blks >= minlen) { + rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0); if (rv) goto fail; - trimmed += nr_sects; + trimmed += nr_blks; } if (ptrimmed) *ptrimmed = trimmed; @@ -1257,7 +1302,7 @@ fail: int gfs2_fitrim(struct file *filp, void __user *argp) { - struct inode *inode = filp->f_dentry->d_inode; + struct inode *inode = file_inode(filp); struct gfs2_sbd *sdp = GFS2_SB(inode); struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); struct buffer_head *bh; @@ -1290,13 +1335,15 @@ int gfs2_fitrim(struct file *filp, void __user *argp) minlen = max_t(u64, r.minlen, q->limits.discard_granularity) >> bs_shift; + if (end <= start || minlen > sdp->sd_max_rg_data) + return -EINVAL; + rgd = gfs2_blk2rgrpd(sdp, start, 0); - rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0); + rgd_end = gfs2_blk2rgrpd(sdp, end, 0); - if (end <= start || - minlen > sdp->sd_max_rg_data || - start > rgd_end->rd_data0 + rgd_end->rd_data) - return -EINVAL; + if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end)) + && (start > rgd_end->rd_data0 + rgd_end->rd_data)) + return -EINVAL; /* start is beyond the end of the fs */ while (1) { @@ -1323,7 +1370,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) if (ret == 0) { bh = rgd->rd_bits[0].bi_bh; rgd->rd_flags |= GFS2_RGF_TRIMMED; - gfs2_trans_add_bh(rgd->rd_gl, bh, 1); + gfs2_trans_add_meta(rgd->rd_gl, bh); gfs2_rgrp_out(rgd, bh->b_data); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); gfs2_trans_end(sdp); @@ -1338,7 +1385,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) } out: - r.len = trimmed << 9; + r.len = trimmed << bs_shift; if (copy_to_user(argp, &r, sizeof(r))) return -EFAULT; @@ -1392,12 +1439,12 @@ static void rs_insert(struct gfs2_inode *ip) * rg_mblk_search - find a group of multiple free blocks to form a reservation * @rgd: the resource group descriptor * @ip: pointer to the inode for which we're reserving blocks - * @requested: number of blocks required for this allocation + * @ap: the allocation parameters * */ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, - unsigned requested) + const struct gfs2_alloc_parms *ap) { struct gfs2_rbm rbm = { .rgd = rgd, }; u64 goal; @@ -1405,9 +1452,14 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, u32 extlen; u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; int ret; + struct inode *inode = &ip->i_inode; - extlen = max_t(u32, atomic_read(&rs->rs_sizehint), requested); - extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); + if (S_ISDIR(inode->i_mode)) + extlen = 1; + else { + extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target); + extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks); + } if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) return; @@ -1420,7 +1472,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) return; - ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, extlen, ip, true); + ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap); if (ret == 0) { rs->rs_rbm = rbm; rs->rs_free = extlen; @@ -1485,6 +1537,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, * @rbm: The current position in the resource group * @ip: The inode for which we are searching for blocks * @minext: The minimum extent length + * @maxext: A pointer to the maximum extent structure * * This checks the current position in the rgrp to see whether there is * a reservation covering this block. If not then this function is a @@ -1497,7 +1550,8 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, const struct gfs2_inode *ip, - u32 minext) + u32 minext, + struct gfs2_extent *maxext) { u64 block = gfs2_rbm_to_block(rbm); u32 extlen = 1; @@ -1510,8 +1564,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, */ if (minext) { extlen = gfs2_free_extlen(rbm, minext); - nblock = block + extlen; - if (extlen < minext) + if (extlen <= maxext->len) goto fail; } @@ -1520,9 +1573,17 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, * and skip if parts of it are already reserved */ nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); - if (nblock == block) - return 0; + if (nblock == block) { + if (!minext || extlen >= minext) + return 0; + + if (extlen > maxext->len) { + maxext->len = extlen; + maxext->rbm = *rbm; + } fail: + nblock = block + extlen; + } ret = gfs2_rbm_from_block(rbm, nblock); if (ret < 0) return ret; @@ -1533,30 +1594,38 @@ fail: * gfs2_rbm_find - Look for blocks of a particular state * @rbm: Value/result starting position and final position * @state: The state which we want to find - * @minext: The requested extent length (0 for a single block) + * @minext: Pointer to the requested extent length (NULL for a single block) + * This is updated to be the actual reservation size. * @ip: If set, check for reservations * @nowrap: Stop looking at the end of the rgrp, rather than wrapping * around until we've reached the starting point. + * @ap: the allocation parameters * * Side effects: * - If looking for free blocks, we set GBF_FULL on each bitmap which * has no free blocks in it. + * - If looking for free blocks, we set rd_extfail_pt on each rgrp which + * has come up short on a free block search. * * Returns: 0 on success, -ENOSPC if there is no block of the requested state */ -static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, - const struct gfs2_inode *ip, bool nowrap) +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, + const struct gfs2_inode *ip, bool nowrap, + const struct gfs2_alloc_parms *ap) { struct buffer_head *bh; - struct gfs2_bitmap *initial_bi; + int initial_bii; u32 initial_offset; + int first_bii = rbm->bii; + u32 first_offset = rbm->offset; u32 offset; u8 *buffer; - int index; int n = 0; int iters = rbm->rgd->rd_length; int ret; + struct gfs2_bitmap *bi; + struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, }; /* If we are not starting at the beginning of a bitmap, then we * need to add one to the bitmap count to ensure that we search @@ -1566,52 +1635,55 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, iters++; while(1) { - if (test_bit(GBF_FULL, &rbm->bi->bi_flags) && + bi = rbm_bi(rbm); + if (test_bit(GBF_FULL, &bi->bi_flags) && (state == GFS2_BLKST_FREE)) goto next_bitmap; - bh = rbm->bi->bi_bh; - buffer = bh->b_data + rbm->bi->bi_offset; + bh = bi->bi_bh; + buffer = bh->b_data + bi->bi_offset; WARN_ON(!buffer_uptodate(bh)); - if (state != GFS2_BLKST_UNLINKED && rbm->bi->bi_clone) - buffer = rbm->bi->bi_clone + rbm->bi->bi_offset; + if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) + buffer = bi->bi_clone + bi->bi_offset; initial_offset = rbm->offset; - offset = gfs2_bitfit(buffer, rbm->bi->bi_len, rbm->offset, state); + offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state); if (offset == BFITNOENT) goto bitmap_full; rbm->offset = offset; if (ip == NULL) return 0; - initial_bi = rbm->bi; - ret = gfs2_reservation_check_and_update(rbm, ip, minext); + initial_bii = rbm->bii; + ret = gfs2_reservation_check_and_update(rbm, ip, + minext ? *minext : 0, + &maxext); if (ret == 0) return 0; if (ret > 0) { - n += (rbm->bi - initial_bi); + n += (rbm->bii - initial_bii); goto next_iter; } if (ret == -E2BIG) { - index = 0; + rbm->bii = 0; rbm->offset = 0; - n += (rbm->bi - initial_bi); + n += (rbm->bii - initial_bii); goto res_covered_end_of_rgrp; } return ret; bitmap_full: /* Mark bitmap as full and fall through */ - if ((state == GFS2_BLKST_FREE) && initial_offset == 0) - set_bit(GBF_FULL, &rbm->bi->bi_flags); + if ((state == GFS2_BLKST_FREE) && initial_offset == 0) { + struct gfs2_bitmap *bi = rbm_bi(rbm); + set_bit(GBF_FULL, &bi->bi_flags); + } next_bitmap: /* Find next bitmap in the rgrp */ rbm->offset = 0; - index = rbm->bi - rbm->rgd->rd_bits; - index++; - if (index == rbm->rgd->rd_length) - index = 0; + rbm->bii++; + if (rbm->bii == rbm->rgd->rd_length) + rbm->bii = 0; res_covered_end_of_rgrp: - rbm->bi = &rbm->rgd->rd_bits[index]; - if ((index == 0) && nowrap) + if ((rbm->bii == 0) && nowrap) break; n++; next_iter: @@ -1619,6 +1691,24 @@ next_iter: break; } + if (minext == NULL || state != GFS2_BLKST_FREE) + return -ENOSPC; + + /* If the extent was too small, and it's smaller than the smallest + to have failed before, remember for future reference that it's + useless to search this rgrp again for this amount or more. */ + if ((first_offset == 0) && (first_bii == 0) && + (*minext < rbm->rgd->rd_extfail_pt)) + rbm->rgd->rd_extfail_pt = *minext; + + /* If the maximum extent we found is big enough to fulfill the + minimum requirements, use it anyway. */ + if (maxext.len) { + *rbm = maxext.rbm; + *minext = maxext.len; + return 0; + } + return -ENOSPC; } @@ -1640,11 +1730,12 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip struct gfs2_inode *ip; int error; int found = 0; - struct gfs2_rbm rbm = { .rgd = rgd, .bi = rgd->rd_bits, .offset = 0 }; + struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 }; while (1) { down_write(&sdp->sd_log_flush_lock); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, 0, NULL, true); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL, + true, NULL); up_write(&sdp->sd_log_flush_lock); if (error == -ENOSPC) break; @@ -1795,12 +1886,12 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b /** * gfs2_inplace_reserve - Reserve space in the filesystem * @ip: the inode to reserve space for - * @requested: the number of blocks to be reserved + * @ap: the allocation parameters * * Returns: errno */ -int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) +int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct gfs2_rgrpd *begin = NULL; @@ -1812,17 +1903,16 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) if (sdp->sd_args.ar_rgrplvb) flags |= GL_SKIP; - if (gfs2_assert_warn(sdp, requested)) + if (gfs2_assert_warn(sdp, ap->target)) return -EINVAL; if (gfs2_rs_active(rs)) { begin = rs->rs_rbm.rgd; - flags = 0; /* Yoda: Do or do not. There is no try */ } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { rs->rs_rbm.rgd = begin = ip->i_rgd; } else { rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); } - if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV)) + if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV)) skip = gfs2_orlov_skip(ip); if (rs->rs_rbm.rgd == NULL) return -EBADSLT; @@ -1856,7 +1946,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) } /* Skip unuseable resource groups */ - if (rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) + if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | + GFS2_RDF_ERROR)) || + (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) goto skip_rgrp; if (sdp->sd_args.ar_rgrplvb) @@ -1864,27 +1956,28 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) /* Get a reservation if we don't already have one */ if (!gfs2_rs_active(rs)) - rg_mblk_search(rs->rs_rbm.rgd, ip, requested); + rg_mblk_search(rs->rs_rbm.rgd, ip, ap); /* Skip rgrps when we can't get a reservation on first pass */ if (!gfs2_rs_active(rs) && (loops < 1)) goto check_rgrp; /* If rgrp has enough free space, use it */ - if (rs->rs_rbm.rgd->rd_free_clone >= requested) { + if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) { ip->i_rgd = rs->rs_rbm.rgd; return 0; } - /* Drop reservation, if we couldn't use reserved rgrp */ - if (gfs2_rs_active(rs)) - gfs2_rs_deltree(ip, rs); check_rgrp: /* Check for unlinked inodes which can be reclaimed */ if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, ip->i_no_addr); skip_rgrp: + /* Drop reservation, if we couldn't use reserved rgrp */ + if (gfs2_rs_active(rs)) + gfs2_rs_deltree(rs); + /* Unlock rgrp if required */ if (!rg_locked) gfs2_glock_dq_uninit(&rs->rs_rgd_gh); @@ -1908,7 +2001,7 @@ next_rgrp: } /* Flushing the log may release space */ if (loops == 2) - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); } return -ENOSPC; @@ -1968,14 +2061,14 @@ static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, *n = 1; block = gfs2_rbm_to_block(rbm); - gfs2_trans_add_bh(rbm->rgd->rd_gl, rbm->bi->bi_bh, 1); + gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh); gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); block++; while (*n < elen) { ret = gfs2_rbm_from_block(&pos, block); if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE) break; - gfs2_trans_add_bh(pos.rgd->rd_gl, pos.bi->bi_bh, 1); + gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh); gfs2_setbit(&pos, true, GFS2_BLKST_USED); (*n)++; block++; @@ -1996,6 +2089,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, u32 blen, unsigned char new_state) { struct gfs2_rbm rbm; + struct gfs2_bitmap *bi; rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); if (!rbm.rgd) { @@ -2006,15 +2100,15 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, while (blen--) { gfs2_rbm_from_block(&rbm, bstart); + bi = rbm_bi(&rbm); bstart++; - if (!rbm.bi->bi_clone) { - rbm.bi->bi_clone = kmalloc(rbm.bi->bi_bh->b_size, - GFP_NOFS | __GFP_NOFAIL); - memcpy(rbm.bi->bi_clone + rbm.bi->bi_offset, - rbm.bi->bi_bh->b_data + rbm.bi->bi_offset, - rbm.bi->bi_len); + if (!bi->bi_clone) { + bi->bi_clone = kmalloc(bi->bi_bh->b_size, + GFP_NOFS | __GFP_NOFAIL); + memcpy(bi->bi_clone + bi->bi_offset, + bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); } - gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.bi->bi_bh, 1); + gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh); gfs2_setbit(&rbm, false, new_state); } @@ -2028,25 +2122,24 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, * */ -int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) +void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) { struct gfs2_rgrpd *rgd = gl->gl_object; struct gfs2_blkreserv *trs; const struct rb_node *n; if (rgd == NULL) - return 0; - gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n", + return; + gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n", (unsigned long long)rgd->rd_addr, rgd->rd_flags, rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, - rgd->rd_reserved); + rgd->rd_reserved, rgd->rd_extfail_pt); spin_lock(&rgd->rd_rsspin); for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { trs = rb_entry(n, struct gfs2_blkreserv, rs_node); dump_rs(seq, trs); } spin_unlock(&rgd->rd_rsspin); - return 0; } static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) @@ -2091,13 +2184,42 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip, if (rs->rs_free && !ret) goto out; } - __rs_deltree(ip, rs); + __rs_deltree(rs); } out: spin_unlock(&rgd->rd_rsspin); } /** + * gfs2_set_alloc_start - Set starting point for block allocation + * @rbm: The rbm which will be set to the required location + * @ip: The gfs2 inode + * @dinode: Flag to say if allocation includes a new inode + * + * This sets the starting point from the reservation if one is active + * otherwise it falls back to guessing a start point based on the + * inode's goal block or the last allocation point in the rgrp. + */ + +static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, + const struct gfs2_inode *ip, bool dinode) +{ + u64 goal; + + if (gfs2_rs_active(ip->i_res)) { + *rbm = ip->i_res->rs_rbm; + return; + } + + if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal)) + goal = ip->i_goal; + else + goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0; + + gfs2_rbm_from_block(rbm, goal); +} + +/** * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode * @ip: the inode to allocate the block for * @bn: Used to return the starting block number @@ -2115,30 +2237,24 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, struct buffer_head *dibh; struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; unsigned int ndata; - u64 goal; u64 block; /* block, within the file system scope */ int error; - if (gfs2_rs_active(ip->i_res)) - goal = gfs2_rbm_to_block(&ip->i_res->rs_rbm); - else if (!dinode && rgrp_contains_block(rbm.rgd, ip->i_goal)) - goal = ip->i_goal; - else - goal = rbm.rgd->rd_last_alloc + rbm.rgd->rd_data0; - - gfs2_rbm_from_block(&rbm, goal); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, ip, false); + gfs2_set_alloc_start(&rbm, ip, dinode); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL); if (error == -ENOSPC) { - gfs2_rbm_from_block(&rbm, goal); - error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, 0, NULL, false); + gfs2_set_alloc_start(&rbm, ip, dinode); + error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false, + NULL); } /* Since all blocks are reserved in advance, this shouldn't happen */ if (error) { - fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d\n", + fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n", (unsigned long long)ip->i_no_addr, error, *nblocks, - test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags)); + test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags), + rbm.rgd->rd_extfail_pt); goto rgrp_error; } @@ -2157,14 +2273,14 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, if (error == 0) { struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); brelse(dibh); } } if (rbm.rgd->rd_free < *nblocks) { - printk(KERN_WARNING "nblocks=%u\n", *nblocks); + pr_warn("nblocks=%u\n", *nblocks); goto rgrp_error; } @@ -2176,21 +2292,15 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, *generation = rbm.rgd->rd_igeneration++; } - gfs2_trans_add_bh(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh, 1); + gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); if (dinode) - gfs2_trans_add_unrevoke(sdp, block, 1); + gfs2_trans_add_unrevoke(sdp, block, *nblocks); - /* - * This needs reviewing to see why we cannot do the quota change - * at this point in the dinode case. - */ - if (ndata) - gfs2_quota_change(ip, ndata, ip->i_inode.i_uid, - ip->i_inode.i_gid); + gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid); rbm.rgd->rd_free_clone -= *nblocks; trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, @@ -2223,7 +2333,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta) trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE); rgd->rd_free += blen; rgd->rd_flags &= ~GFS2_RGF_TRIMMED; - gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); + gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); @@ -2260,7 +2370,7 @@ void gfs2_unlink_di(struct inode *inode) if (!rgd) return; trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); - gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); + gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); update_rgrp_lvb_unlinked(rgd, 1); @@ -2281,7 +2391,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) rgd->rd_dinodes--; rgd->rd_free++; - gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); + gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh); gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); update_rgrp_lvb_unlinked(rgd, -1); @@ -2412,7 +2522,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state) /** * gfs2_rlist_free - free a resource group list - * @list: the list of resource groups + * @rlist: the list of resource groups * */ diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 842185853f6..463ab2e95d1 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -40,15 +40,15 @@ extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); #define GFS2_AF_ORLOV 1 -extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 flags); +extern int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap); extern void gfs2_inplace_release(struct gfs2_inode *ip); extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern int gfs2_rs_alloc(struct gfs2_inode *ip); -extern void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip); +extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); +extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); @@ -68,7 +68,7 @@ extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist, extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); -extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); +extern void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, struct buffer_head *bh, const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index d6488674d91..1319b5c4ec6 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/bio.h> #include <linux/sched.h> #include <linux/slab.h> @@ -175,8 +177,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) break; case Opt_debug: if (args->ar_errors == GFS2_ERRORS_PANIC) { - printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " - "are mutually exclusive.\n"); + pr_warn("-o debug and -o errors=panic are mutually exclusive\n"); return -EINVAL; } args->ar_debug = 1; @@ -228,21 +229,21 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) case Opt_commit: rv = match_int(&tmp[0], &args->ar_commit); if (rv || args->ar_commit <= 0) { - printk(KERN_WARNING "GFS2: commit mount option requires a positive numeric argument\n"); + pr_warn("commit mount option requires a positive numeric argument\n"); return rv ? rv : -EINVAL; } break; case Opt_statfs_quantum: rv = match_int(&tmp[0], &args->ar_statfs_quantum); if (rv || args->ar_statfs_quantum < 0) { - printk(KERN_WARNING "GFS2: statfs_quantum mount option requires a non-negative numeric argument\n"); + pr_warn("statfs_quantum mount option requires a non-negative numeric argument\n"); return rv ? rv : -EINVAL; } break; case Opt_quota_quantum: rv = match_int(&tmp[0], &args->ar_quota_quantum); if (rv || args->ar_quota_quantum <= 0) { - printk(KERN_WARNING "GFS2: quota_quantum mount option requires a positive numeric argument\n"); + pr_warn("quota_quantum mount option requires a positive numeric argument\n"); return rv ? rv : -EINVAL; } break; @@ -250,7 +251,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) rv = match_int(&tmp[0], &args->ar_statfs_percent); if (rv || args->ar_statfs_percent < 0 || args->ar_statfs_percent > 100) { - printk(KERN_WARNING "statfs_percent mount option requires a numeric argument between 0 and 100\n"); + pr_warn("statfs_percent mount option requires a numeric argument between 0 and 100\n"); return rv ? rv : -EINVAL; } break; @@ -259,8 +260,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) break; case Opt_err_panic: if (args->ar_debug) { - printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " - "are mutually exclusive.\n"); + pr_warn("-o debug and -o errors=panic are mutually exclusive\n"); return -EINVAL; } args->ar_errors = GFS2_ERRORS_PANIC; @@ -279,7 +279,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) break; case Opt_error: default: - printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o); + pr_warn("invalid mount option: %s\n", o); return -EINVAL; } } @@ -295,9 +295,8 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) void gfs2_jindex_free(struct gfs2_sbd *sdp) { - struct list_head list, *head; + struct list_head list; struct gfs2_jdesc *jd; - struct gfs2_journal_extent *jext; spin_lock(&sdp->sd_jindex_spin); list_add(&list, &sdp->sd_jindex_list); @@ -307,14 +306,7 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp) while (!list_empty(&list)) { jd = list_entry(list.next, struct gfs2_jdesc, jd_list); - head = &jd->extent_list; - while (!list_empty(head)) { - jext = list_entry(head->next, - struct gfs2_journal_extent, - extent_list); - list_del(&jext->extent_list); - kfree(jext); - } + gfs2_free_journal_extents(jd); list_del(&jd->jd_list); iput(jd->jd_inode); kfree(jd); @@ -369,6 +361,33 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) return 0; } +static int init_threads(struct gfs2_sbd *sdp) +{ + struct task_struct *p; + int error = 0; + + p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); + if (IS_ERR(p)) { + error = PTR_ERR(p); + fs_err(sdp, "can't start logd thread: %d\n", error); + return error; + } + sdp->sd_logd_process = p; + + p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); + if (IS_ERR(p)) { + error = PTR_ERR(p); + fs_err(sdp, "can't start quotad thread: %d\n", error); + goto fail; + } + sdp->sd_quotad_process = p; + return 0; + +fail: + kthread_stop(sdp->sd_logd_process); + return error; +} + /** * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one * @sdp: the filesystem @@ -380,14 +399,19 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; - struct gfs2_holder t_gh; + struct gfs2_holder thaw_gh; struct gfs2_log_header_host head; int error; - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh); + error = init_threads(sdp); if (error) return error; + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0, + &thaw_gh); + if (error) + goto fail_threads; + j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); error = gfs2_find_jhead(sdp->sd_jdesc, &head); @@ -410,14 +434,16 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - gfs2_glock_dq_uninit(&t_gh); + gfs2_glock_dq_uninit(&thaw_gh); return 0; fail: - t_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq_uninit(&t_gh); - + thaw_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_uninit(&thaw_gh); +fail_threads: + kthread_stop(sdp->sd_quotad_process); + kthread_stop(sdp->sd_logd_process); return error; } @@ -500,7 +526,7 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, if (error) return; - gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); + gfs2_trans_add_meta(l_ip->i_gl, l_bh); spin_lock(&sdp->sd_statfs_spin); l_sc->sc_total += total; @@ -528,7 +554,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; - gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1); + gfs2_trans_add_meta(l_ip->i_gl, l_bh); spin_lock(&sdp->sd_statfs_spin); m_sc->sc_total += l_sc->sc_total; @@ -539,7 +565,7 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, 0, sizeof(struct gfs2_statfs_change)); spin_unlock(&sdp->sd_statfs_spin); - gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1); + gfs2_trans_add_meta(m_ip->i_gl, m_bh); gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); } @@ -610,15 +636,21 @@ struct lfcc { */ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, - struct gfs2_holder *t_gh) + struct gfs2_holder *freeze_gh) { struct gfs2_inode *ip; struct gfs2_jdesc *jd; struct lfcc *lfcc; LIST_HEAD(list); struct gfs2_log_header_host lh; + struct gfs2_inode *dip = GFS2_I(sdp->sd_root_dir->d_inode); int error; + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, + &sdp->sd_freeze_root_gh); + if (error) + return error; + atomic_set(&sdp->sd_frozen_root, 1); list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL); if (!lfcc) { @@ -634,8 +666,8 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, list_add(&lfcc->list, &list); } - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED, - GL_NOCACHE, t_gh); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE, + GL_NOCACHE, freeze_gh); list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { error = gfs2_jdesc_check(jd); @@ -651,7 +683,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, } if (error) - gfs2_glock_dq_uninit(t_gh); + gfs2_glock_dq_uninit(freeze_gh); out: while (!list_empty(&list)) { @@ -660,57 +692,14 @@ out: gfs2_glock_dq_uninit(&lfcc->gh); kfree(lfcc); } - return error; -} - -/** - * gfs2_freeze_fs - freezes the file system - * @sdp: the file system - * - * This function flushes data and meta data for all machines by - * acquiring the transaction log exclusively. All journals are - * ensured to be in a clean state as well. - * - * Returns: errno - */ - -int gfs2_freeze_fs(struct gfs2_sbd *sdp) -{ - int error = 0; - - mutex_lock(&sdp->sd_freeze_lock); - - if (!sdp->sd_freeze_count++) { - error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh); - if (error) - sdp->sd_freeze_count--; + if (error) { + atomic_dec(&sdp->sd_frozen_root); + wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0); + gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh); } - - mutex_unlock(&sdp->sd_freeze_lock); - return error; } -/** - * gfs2_unfreeze_fs - unfreezes the file system - * @sdp: the file system - * - * This function allows the file system to proceed by unlocking - * the exclusively held transaction lock. Other GFS2 nodes are - * now free to acquire the lock shared and go on with their lives. - * - */ - -void gfs2_unfreeze_fs(struct gfs2_sbd *sdp) -{ - mutex_lock(&sdp->sd_freeze_lock); - - if (sdp->sd_freeze_count && !--sdp->sd_freeze_count) - gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); - - mutex_unlock(&sdp->sd_freeze_lock); -} - void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) { struct gfs2_dinode *str = buf; @@ -721,8 +710,8 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); str->di_mode = cpu_to_be32(ip->i_inode.i_mode); - str->di_uid = cpu_to_be32(ip->i_inode.i_uid); - str->di_gid = cpu_to_be32(ip->i_inode.i_gid); + str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode)); + str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode)); str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); @@ -765,7 +754,7 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) int ret = 0; if (wbc->sync_mode == WB_SYNC_ALL) - gfs2_log_flush(GFS2_SB(inode), ip->i_gl); + gfs2_log_flush(GFS2_SB(inode), ip->i_gl, NORMAL_FLUSH); if (bdi->dirty_exceeded) gfs2_ail1_flush(sdp, wbc); else @@ -824,7 +813,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) ret = gfs2_meta_inode_buffer(ip, &bh); if (ret == 0) { - gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_trans_add_meta(ip->i_gl, bh); gfs2_dinode_out(ip, bh->b_data); brelse(bh); } @@ -845,25 +834,30 @@ out: static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) { - struct gfs2_holder t_gh; + struct gfs2_holder thaw_gh; int error; - flush_workqueue(gfs2_delete_workqueue); - gfs2_quota_sync(sdp->sd_vfs, 0); - gfs2_statfs_sync(sdp->sd_vfs, 0); - - error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, - &t_gh); + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE, + &thaw_gh); if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) return error; - gfs2_meta_syncfs(sdp); - gfs2_log_shutdown(sdp); - + down_write(&sdp->sd_log_flush_lock); clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + up_write(&sdp->sd_log_flush_lock); + + kthread_stop(sdp->sd_quotad_process); + kthread_stop(sdp->sd_logd_process); + + flush_workqueue(gfs2_delete_workqueue); + gfs2_quota_sync(sdp->sd_vfs, 0); + gfs2_statfs_sync(sdp->sd_vfs, 0); - if (t_gh.gh_gl) - gfs2_glock_dq_uninit(&t_gh); + gfs2_log_flush(sdp, NULL, SHUTDOWN_FLUSH); + gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks); + + if (thaw_gh.gh_gl) + gfs2_glock_dq_uninit(&thaw_gh); gfs2_quota_cleanup(sdp); @@ -888,13 +882,6 @@ static void gfs2_put_super(struct super_block *sb) int error; struct gfs2_jdesc *jd; - /* Unfreeze the filesystem, if we need to */ - - mutex_lock(&sdp->sd_freeze_lock); - if (sdp->sd_freeze_count) - gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); - mutex_unlock(&sdp->sd_freeze_lock); - /* No more recovery requests */ set_bit(SDF_NORECOVERY, &sdp->sd_flags); smp_mb(); @@ -912,9 +899,6 @@ restart: } spin_unlock(&sdp->sd_jindex_spin); - kthread_stop(sdp->sd_quotad_process); - kthread_stop(sdp->sd_logd_process); - if (!(sb->s_flags & MS_RDONLY)) { error = gfs2_make_fs_ro(sdp); if (error) @@ -930,7 +914,7 @@ restart: iput(sdp->sd_quota_inode); gfs2_glock_put(sdp->sd_rename_gl); - gfs2_glock_put(sdp->sd_trans_gl); + gfs2_glock_put(sdp->sd_freeze_gl); if (!sdp->sd_args.ar_spectator) { gfs2_glock_dq_uninit(&sdp->sd_journal_gh); @@ -965,8 +949,8 @@ static int gfs2_sync_fs(struct super_block *sb, int wait) struct gfs2_sbd *sdp = sb->s_fs_info; gfs2_quota_sync(sb, -1); - if (wait && sdp) - gfs2_log_flush(sdp, NULL); + if (wait && sdp && !atomic_read(&sdp->sd_log_freeze)) + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); return 0; } @@ -985,7 +969,7 @@ static int gfs2_freeze(struct super_block *sb) return -EINVAL; for (;;) { - error = gfs2_freeze_fs(sdp); + error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh); if (!error) break; @@ -1013,7 +997,12 @@ static int gfs2_freeze(struct super_block *sb) static int gfs2_unfreeze(struct super_block *sb) { - gfs2_unfreeze_fs(sb->s_fs_info); + struct gfs2_sbd *sdp = sb->s_fs_info; + + gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); + atomic_dec(&sdp->sd_frozen_root); + wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0); + gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh); return 0; } @@ -1195,6 +1184,8 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) struct gfs2_tune *gt = &sdp->sd_tune; int error; + sync_filesystem(sb); + spin_lock(>->gt_spin); args.ar_commit = gt->gt_logd_secs; args.ar_quota_quantum = gt->gt_quota_quantum; @@ -1276,7 +1267,7 @@ static int gfs2_drop_inode(struct inode *inode) { struct gfs2_inode *ip = GFS2_I(inode); - if (inode->i_nlink) { + if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) && inode->i_nlink) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) clear_nlink(inode); @@ -1429,7 +1420,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip) if (error) return error; - error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (error) return error; @@ -1491,12 +1482,18 @@ static void gfs2_evict_inode(struct inode *inode) struct gfs2_holder gh; int error; + if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) { + clear_inode(inode); + return; + } + if (inode->i_nlink || (sb->s_flags & MS_RDONLY)) goto out; /* Must not read inode block until block type has been verified */ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); if (unlikely(error)) { + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_uninit(&ip->i_iopen_gh); goto out; } @@ -1545,7 +1542,7 @@ static void gfs2_evict_inode(struct inode *inode) goto out_unlock; out_truncate: - gfs2_log_flush(sdp, ip->i_gl); + gfs2_log_flush(sdp, ip->i_gl, NORMAL_FLUSH); if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl); filemap_fdatawrite(metamapping); @@ -1565,18 +1562,21 @@ out_truncate: out_unlock: /* Error path for case 1 */ if (gfs2_rs_active(ip->i_res)) - gfs2_rs_deltree(ip, ip->i_res); + gfs2_rs_deltree(ip->i_res); - if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) + if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq(&ip->i_iopen_gh); + } gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_dq_uninit(&gh); if (error && error != GLR_TRYFAILED && error != -EROFS) fs_warn(sdp, "gfs2_evict_inode: %d\n", error); out: /* Case 3 starts here */ - truncate_inode_pages(&inode->i_data, 0); - gfs2_rs_delete(ip); + truncate_inode_pages_final(&inode->i_data); + gfs2_rs_delete(ip, NULL); + gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); ip->i_gl->gl_object = NULL; @@ -1586,6 +1586,7 @@ out: ip->i_gl = NULL; if (ip->i_iopen_gh.gh_gl) { ip->i_iopen_gh.gh_gl->gl_object = NULL; + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_uninit(&ip->i_iopen_gh); } } diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index a0464680af0..90e3322ffa1 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -46,9 +46,6 @@ extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, struct buffer_head *l_bh); extern int gfs2_statfs_sync(struct super_block *sb, int type); -extern int gfs2_freeze_fs(struct gfs2_sbd *sdp); -extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp); - extern struct file_system_type gfs2_fs_type; extern struct file_system_type gfs2meta_fs_type; extern const struct export_operations gfs2_export_ops; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 8056b7b7238..3ab566ba569 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/completion.h> @@ -91,39 +93,37 @@ static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) { - unsigned int count; - - mutex_lock(&sdp->sd_freeze_lock); - count = sdp->sd_freeze_count; - mutex_unlock(&sdp->sd_freeze_lock); + struct super_block *sb = sdp->sd_vfs; + int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1; - return snprintf(buf, PAGE_SIZE, "%u\n", count); + return snprintf(buf, PAGE_SIZE, "%u\n", frozen); } static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { - ssize_t ret = len; - int error = 0; + int error; int n = simple_strtol(buf, NULL, 0); if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + return -EPERM; switch (n) { case 0: - gfs2_unfreeze_fs(sdp); + error = thaw_super(sdp->sd_vfs); break; case 1: - error = gfs2_freeze_fs(sdp); + error = freeze_super(sdp->sd_vfs); break; default: - ret = -EINVAL; + return -EINVAL; } - if (error) + if (error) { fs_warn(sdp, "freeze %d error %d", n, error); + return error; + } - return ret; + return len; } static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) @@ -135,14 +135,13 @@ static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + return -EPERM; if (simple_strtol(buf, NULL, 0) != 1) return -EINVAL; - gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: withdrawing from cluster at user's request\n", - sdp->sd_fsname); + gfs2_lm_withdraw(sdp, "withdrawing from cluster at user's request\n"); + return len; } @@ -150,7 +149,7 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + return -EPERM; if (simple_strtol(buf, NULL, 0) != 1) return -EINVAL; @@ -163,7 +162,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + return -EPERM; if (simple_strtol(buf, NULL, 0) != 1) return -EINVAL; @@ -175,30 +174,40 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { + struct kqid qid; int error; u32 id; if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + return -EPERM; id = simple_strtoul(buf, NULL, 0); - error = gfs2_quota_refresh(sdp, 1, id); + qid = make_kqid(current_user_ns(), USRQUOTA, id); + if (!qid_valid(qid)) + return -EINVAL; + + error = gfs2_quota_refresh(sdp, qid); return error ? error : len; } static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { + struct kqid qid; int error; u32 id; if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + return -EPERM; id = simple_strtoul(buf, NULL, 0); - error = gfs2_quota_refresh(sdp, 0, id); + qid = make_kqid(current_user_ns(), GRPQUOTA, id); + if (!qid_valid(qid)) + return -EINVAL; + + error = gfs2_quota_refresh(sdp, qid); return error ? error : len; } @@ -213,7 +222,7 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len int rv; if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + return -EPERM; rv = sscanf(buf, "%u:%llu %15s", &gltype, &glnum, mode); @@ -231,8 +240,8 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len if (gltype > LM_TYPE_JOURNAL) return -EINVAL; - if (gltype == LM_TYPE_NONDISK && glnum == GFS2_TRANS_LOCK) - glops = &gfs2_trans_glops; + if (gltype == LM_TYPE_NONDISK && glnum == GFS2_FREEZE_LOCK) + glops = &gfs2_freeze_glops; else glops = gfs2_glops_list[gltype]; if (glops == NULL) @@ -324,7 +333,7 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len) set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); else if (val == 0) { clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); gfs2_glock_thaw(sdp); } else { ret = -EINVAL; @@ -332,6 +341,28 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len) return ret; } +static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf) +{ + int val = completion_done(&sdp->sd_wdack) ? 1 : 0; + + return sprintf(buf, "%d\n", val); +} + +static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +{ + ssize_t ret = len; + int val; + + val = simple_strtol(buf, NULL, 0); + + if ((val == 1) && + !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) + complete(&sdp->sd_wdack); + else + ret = -EINVAL; + return ret; +} + static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; @@ -376,6 +407,9 @@ int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) struct gfs2_jdesc *jd; int rv; + /* Wait for our primary journal to be initialized */ + wait_for_completion(&sdp->sd_journal_ready); + spin_lock(&sdp->sd_jindex_spin); rv = -EBUSY; if (sdp->sd_jdesc->jd_jid == jid) @@ -451,7 +485,7 @@ static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) rv = jid = -EINVAL; sdp->sd_lockstruct.ls_jid = jid; clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); out: spin_unlock(&sdp->sd_jindex_spin); @@ -463,7 +497,7 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); GDLM_ATTR(block, 0644, block_show, block_store); -GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); +GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store); GDLM_ATTR(jid, 0644, jid_show, jid_store); GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store); GDLM_ATTR(first_done, 0444, first_done_show, NULL); @@ -502,7 +536,7 @@ static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf, unsigned int x, y; if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + return -EPERM; if (sscanf(buf, "%u %u", &x, &y) != 2 || !y) return -EINVAL; @@ -521,7 +555,7 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field, unsigned int x; if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + return -EPERM; x = simple_strtoul(buf, NULL, 0); @@ -557,7 +591,6 @@ TUNE_ATTR(max_readahead, 0); TUNE_ATTR(complain_secs, 0); TUNE_ATTR(statfs_slow, 0); TUNE_ATTR(new_files_jdata, 0); -TUNE_ATTR(quota_simul_sync, 1); TUNE_ATTR(statfs_quantum, 1); TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); @@ -567,7 +600,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_max_readahead.attr, &tune_attr_complain_secs.attr, &tune_attr_statfs_slow.attr, - &tune_attr_quota_simul_sync.attr, &tune_attr_statfs_quantum.attr, &tune_attr_quota_scale.attr, &tune_attr_new_files_jdata.attr, diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 2ee13e841e9..20c007d747a 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -159,9 +159,9 @@ TRACE_EVENT(gfs2_glock_put, /* Callback (local or remote) requesting lock demotion */ TRACE_EVENT(gfs2_demote_rq, - TP_PROTO(const struct gfs2_glock *gl), + TP_PROTO(const struct gfs2_glock *gl, bool remote), - TP_ARGS(gl), + TP_ARGS(gl, remote), TP_STRUCT__entry( __field( dev_t, dev ) @@ -170,6 +170,7 @@ TRACE_EVENT(gfs2_demote_rq, __field( u8, cur_state ) __field( u8, dmt_state ) __field( unsigned long, flags ) + __field( bool, remote ) ), TP_fast_assign( @@ -179,14 +180,16 @@ TRACE_EVENT(gfs2_demote_rq, __entry->cur_state = glock_trace_state(gl->gl_state); __entry->dmt_state = glock_trace_state(gl->gl_demote_state); __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0); + __entry->remote = remote; ), - TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s", + TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype, (unsigned long long)__entry->glnum, glock_trace_name(__entry->cur_state), glock_trace_name(__entry->dmt_state), - show_glock_flags(__entry->flags)) + show_glock_flags(__entry->flags), + __entry->remote ? "remote" : "local") ); diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 413627072f3..0546ab4e28e 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -18,6 +20,7 @@ #include "gfs2.h" #include "incore.h" #include "glock.h" +#include "inode.h" #include "log.h" #include "lops.h" #include "meta_io.h" @@ -45,63 +48,41 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, tr->tr_blocks = blocks; tr->tr_revokes = revokes; tr->tr_reserved = 1; + tr->tr_alloced = 1; if (blocks) tr->tr_reserved += 6 + blocks; if (revokes) tr->tr_reserved += gfs2_struct2blk(sdp, revokes, sizeof(u64)); - sb_start_intwrite(sdp->sd_vfs); - gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); + INIT_LIST_HEAD(&tr->tr_databuf); + INIT_LIST_HEAD(&tr->tr_buf); - error = gfs2_glock_nq(&tr->tr_t_gh); - if (error) - goto fail_holder_uninit; + sb_start_intwrite(sdp->sd_vfs); error = gfs2_log_reserve(sdp, tr->tr_reserved); if (error) - goto fail_gunlock; + goto fail; current->journal_info = tr; return 0; -fail_gunlock: - gfs2_glock_dq(&tr->tr_t_gh); - -fail_holder_uninit: +fail: sb_end_intwrite(sdp->sd_vfs); - gfs2_holder_uninit(&tr->tr_t_gh); kfree(tr); return error; } -/** - * gfs2_log_release - Release a given number of log blocks - * @sdp: The GFS2 superblock - * @blks: The number of blocks - * - */ - -static void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) -{ - - atomic_add(blks, &sdp->sd_log_blks_free); - trace_gfs2_log_blocks(sdp, blks); - gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= - sdp->sd_jdesc->jd_blocks); - up_read(&sdp->sd_log_flush_lock); -} - static void gfs2_print_trans(const struct gfs2_trans *tr) { - print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip); - printk(KERN_WARNING "GFS2: blocks=%u revokes=%u reserved=%u touched=%d\n", - tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched); - printk(KERN_WARNING "GFS2: Buf %u/%u Databuf %u/%u Revoke %u/%u\n", - tr->tr_num_buf_new, tr->tr_num_buf_rm, - tr->tr_num_databuf_new, tr->tr_num_databuf_rm, - tr->tr_num_revoke, tr->tr_num_revoke_rm); + pr_warn("Transaction created at: %pSR\n", (void *)tr->tr_ip); + pr_warn("blocks=%u revokes=%u reserved=%u touched=%u\n", + tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched); + pr_warn("Buf %u/%u Databuf %u/%u Revoke %u/%u\n", + tr->tr_num_buf_new, tr->tr_num_buf_rm, + tr->tr_num_databuf_new, tr->tr_num_databuf_rm, + tr->tr_num_revoke, tr->tr_num_revoke_rm); } void gfs2_trans_end(struct gfs2_sbd *sdp) @@ -113,11 +94,8 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) if (!tr->tr_touched) { gfs2_log_release(sdp, tr->tr_reserved); - if (tr->tr_t_gh.gh_gl) { - gfs2_glock_dq(&tr->tr_t_gh); - gfs2_holder_uninit(&tr->tr_t_gh); + if (tr->tr_alloced) kfree(tr); - } sb_end_intwrite(sdp->sd_vfs); return; } @@ -131,55 +109,142 @@ void gfs2_trans_end(struct gfs2_sbd *sdp) gfs2_print_trans(tr); gfs2_log_commit(sdp, tr); - if (tr->tr_t_gh.gh_gl) { - gfs2_glock_dq(&tr->tr_t_gh); - gfs2_holder_uninit(&tr->tr_t_gh); - kfree(tr); - } + if (tr->tr_alloced && !tr->tr_attached) + kfree(tr); + up_read(&sdp->sd_log_flush_lock); if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) - gfs2_log_flush(sdp, NULL); + gfs2_log_flush(sdp, NULL, NORMAL_FLUSH); sb_end_intwrite(sdp->sd_vfs); } +static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl, + struct buffer_head *bh, + const struct gfs2_log_operations *lops) +{ + struct gfs2_bufdata *bd; + + bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL); + bd->bd_bh = bh; + bd->bd_gl = gl; + bd->bd_ops = lops; + INIT_LIST_HEAD(&bd->bd_list); + bh->b_private = bd; + return bd; +} + /** - * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction - * @gl: the glock the buffer belongs to + * gfs2_trans_add_data - Add a databuf to the transaction. + * @gl: The inode glock associated with the buffer * @bh: The buffer to add - * @meta: True in the case of adding metadata * + * This is used in two distinct cases: + * i) In ordered write mode + * We put the data buffer on a list so that we can ensure that its + * synced to disk at the right time + * ii) In journaled data mode + * We need to journal the data block in the same way as metadata in + * the functions above. The difference is that here we have a tag + * which is two __be64's being the block number (as per meta data) + * and a flag which says whether the data block needs escaping or + * not. This means we need a new log entry for each 251 or so data + * blocks, which isn't an enormous overhead but twice as much as + * for normal metadata blocks. */ +void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) +{ + struct gfs2_trans *tr = current->journal_info; + struct gfs2_sbd *sdp = gl->gl_sbd; + struct address_space *mapping = bh->b_page->mapping; + struct gfs2_inode *ip = GFS2_I(mapping->host); + struct gfs2_bufdata *bd; -void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta) + if (!gfs2_is_jdata(ip)) { + gfs2_ordered_add_inode(ip); + return; + } + + lock_buffer(bh); + gfs2_log_lock(sdp); + bd = bh->b_private; + if (bd == NULL) { + gfs2_log_unlock(sdp); + unlock_buffer(bh); + if (bh->b_private == NULL) + bd = gfs2_alloc_bufdata(gl, bh, &gfs2_databuf_lops); + lock_buffer(bh); + gfs2_log_lock(sdp); + } + gfs2_assert(sdp, bd->bd_gl == gl); + tr->tr_touched = 1; + if (list_empty(&bd->bd_list)) { + set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); + set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); + gfs2_pin(sdp, bd->bd_bh); + tr->tr_num_databuf_new++; + list_add_tail(&bd->bd_list, &tr->tr_databuf); + } + gfs2_log_unlock(sdp); + unlock_buffer(bh); +} + +static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) { + struct gfs2_meta_header *mh; + struct gfs2_trans *tr; + + tr = current->journal_info; + tr->tr_touched = 1; + if (!list_empty(&bd->bd_list)) + return; + set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); + set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); + mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; + if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) { + pr_err("Attempting to add uninitialised block to journal (inplace block=%lld)\n", + (unsigned long long)bd->bd_bh->b_blocknr); + BUG(); + } + gfs2_pin(sdp, bd->bd_bh); + mh->__pad0 = cpu_to_be64(0); + mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); + list_add(&bd->bd_list, &tr->tr_buf); + tr->tr_num_buf_new++; +} + +void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; struct gfs2_bufdata *bd; lock_buffer(bh); gfs2_log_lock(sdp); bd = bh->b_private; - if (bd) - gfs2_assert(sdp, bd->bd_gl == gl); - else { + if (bd == NULL) { gfs2_log_unlock(sdp); unlock_buffer(bh); - gfs2_attach_bufdata(gl, bh, meta); - bd = bh->b_private; + lock_page(bh->b_page); + if (bh->b_private == NULL) + bd = gfs2_alloc_bufdata(gl, bh, &gfs2_buf_lops); + unlock_page(bh->b_page); lock_buffer(bh); gfs2_log_lock(sdp); } - lops_add(sdp, bd); + gfs2_assert(sdp, bd->bd_gl == gl); + meta_lo_add(sdp, bd); gfs2_log_unlock(sdp); unlock_buffer(bh); } void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) { + struct gfs2_trans *tr = current->journal_info; + BUG_ON(!list_empty(&bd->bd_list)); - BUG_ON(!list_empty(&bd->bd_ail_st_list)); - BUG_ON(!list_empty(&bd->bd_ail_gl_list)); - lops_init_le(bd, &gfs2_revoke_lops); - lops_add(sdp, bd); + gfs2_add_revoke(sdp, bd); + tr->tr_touched = 1; + tr->tr_num_revoke++; } void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index bf2ae9aeee7..1e6e7da25a1 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -39,7 +39,8 @@ extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, unsigned int revokes); extern void gfs2_trans_end(struct gfs2_sbd *sdp); -extern void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); +extern void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh); +extern void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh); extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index f00d7c5744f..86d2035ac66 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -7,6 +7,8 @@ * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/spinlock.h> #include <linux/completion.h> #include <linux/buffer_head.h> @@ -30,22 +32,27 @@ mempool_t *gfs2_page_pool __read_mostly; void gfs2_assert_i(struct gfs2_sbd *sdp) { - printk(KERN_EMERG "GFS2: fsid=%s: fatal assertion failed\n", - sdp->sd_fsname); + fs_emerg(sdp, "fatal assertion failed\n"); } -int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) +int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; const struct lm_lockops *lm = ls->ls_ops; va_list args; + struct va_format vaf; if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) return 0; va_start(args, fmt); - vprintk(fmt, args); + + vaf.fmt = fmt; + vaf.va = &args; + + fs_err(sdp, "%pV", &vaf); + va_end(args); if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { @@ -54,6 +61,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); + if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) + wait_for_completion(&sdp->sd_wdack); + if (lm->lm_unmount) { fs_err(sdp, "telling LM to unmount\n"); lm->lm_unmount(sdp); @@ -63,7 +73,7 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) } if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) - panic("GFS2: fsid=%s: panic requested.\n", sdp->sd_fsname); + panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname); return -1; } @@ -79,10 +89,9 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, { int me; me = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, assertion, - sdp->sd_fsname, function, file, line); + "fatal: assertion \"%s\" failed\n" + " function = %s, file = %s, line = %u\n", + assertion, function, file, line); dump_stack(); return (me) ? -1 : -2; } @@ -102,11 +111,8 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, return -2; if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) - printk(KERN_WARNING - "GFS2: fsid=%s: warning: assertion \"%s\" failed\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, assertion, - sdp->sd_fsname, function, file, line); + fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n", + assertion, function, file, line); if (sdp->sd_args.ar_debug) BUG(); @@ -135,10 +141,8 @@ int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function, { int rv; rv = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: filesystem consistency error\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, function, file, line); + "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n", + function, file, line); return rv; } @@ -154,13 +158,12 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); int rv; rv = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: filesystem consistency error\n" - "GFS2: fsid=%s: inode = %llu %llu\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, (unsigned long long)ip->i_no_formal_ino, - (unsigned long long)ip->i_no_addr, - sdp->sd_fsname, function, file, line); + "fatal: filesystem consistency error\n" + " inode = %llu %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)ip->i_no_formal_ino, + (unsigned long long)ip->i_no_addr, + function, file, line); return rv; } @@ -176,12 +179,11 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, struct gfs2_sbd *sdp = rgd->rd_sbd; int rv; rv = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: filesystem consistency error\n" - "GFS2: fsid=%s: RG = %llu\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, (unsigned long long)rgd->rd_addr, - sdp->sd_fsname, function, file, line); + "fatal: filesystem consistency error\n" + " RG = %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)rgd->rd_addr, + function, file, line); return rv; } @@ -197,12 +199,11 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, { int me; me = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: invalid metadata block\n" - "GFS2: fsid=%s: bh = %llu (%s)\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, - sdp->sd_fsname, function, file, line); + "fatal: invalid metadata block\n" + " bh = %llu (%s)\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, type, + function, file, line); return (me) ? -1 : -2; } @@ -218,12 +219,11 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, { int me; me = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: invalid metadata block\n" - "GFS2: fsid=%s: bh = %llu (type: exp=%u, found=%u)\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, t, - sdp->sd_fsname, function, file, line); + "fatal: invalid metadata block\n" + " bh = %llu (type: exp=%u, found=%u)\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, type, t, + function, file, line); return (me) ? -1 : -2; } @@ -238,10 +238,9 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, { int rv; rv = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: I/O error\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, function, file, line); + "fatal: I/O error\n" + " function = %s, file = %s, line = %u\n", + function, file, line); return rv; } @@ -256,32 +255,11 @@ int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, { int rv; rv = gfs2_lm_withdraw(sdp, - "GFS2: fsid=%s: fatal: I/O error\n" - "GFS2: fsid=%s: block = %llu\n" - "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", - sdp->sd_fsname, - sdp->sd_fsname, (unsigned long long)bh->b_blocknr, - sdp->sd_fsname, function, file, line); + "fatal: I/O error\n" + " block = %llu\n" + " function = %s, file = %s, line = %u\n", + (unsigned long long)bh->b_blocknr, + function, file, line); return rv; } -void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, - unsigned int bit, int new_value) -{ - unsigned int c, o, b = bit; - int old_value; - - c = b / (8 * PAGE_SIZE); - b %= 8 * PAGE_SIZE; - o = b / 8; - b %= 8; - - old_value = (bitmap[c][o] & (1 << b)); - gfs2_assert_withdraw(sdp, !old_value != !new_value); - - if (new_value) - bitmap[c][o] |= 1 << b; - else - bitmap[c][o] &= ~(1 << b); -} - diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 80535739ac7..cbdcbdf3961 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -10,22 +10,23 @@ #ifndef __UTIL_DOT_H__ #define __UTIL_DOT_H__ +#ifdef pr_fmt +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#endif + #include <linux/mempool.h> #include "incore.h" -#define fs_printk(level, fs, fmt, arg...) \ - printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg) - -#define fs_info(fs, fmt, arg...) \ - fs_printk(KERN_INFO , fs , fmt , ## arg) - -#define fs_warn(fs, fmt, arg...) \ - fs_printk(KERN_WARNING , fs , fmt , ## arg) - -#define fs_err(fs, fmt, arg...) \ - fs_printk(KERN_ERR, fs , fmt , ## arg) - +#define fs_emerg(fs, fmt, ...) \ + pr_emerg("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) +#define fs_warn(fs, fmt, ...) \ + pr_warn("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) +#define fs_err(fs, fmt, ...) \ + pr_err("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) +#define fs_info(fs, fmt, ...) \ + pr_info("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) void gfs2_assert_i(struct gfs2_sbd *sdp); @@ -85,7 +86,7 @@ static inline int gfs2_meta_check(struct gfs2_sbd *sdp, struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; u32 magic = be32_to_cpu(mh->mh_magic); if (unlikely(magic != GFS2_MAGIC)) { - printk(KERN_ERR "GFS2: Magic number missing at %llu\n", + pr_err("Magic number missing at %llu\n", (unsigned long long)bh->b_blocknr); return -EIO; } @@ -164,9 +165,7 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, #define gfs2_tune_get(sdp, field) \ gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) -void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, - unsigned int bit, int new_value); -int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...); +__printf(2, 3) +int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...); #endif /* __UTIL_DOT_H__ */ - diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 76c144b3c9b..0b81f783f78 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -13,6 +13,7 @@ #include <linux/buffer_head.h> #include <linux/xattr.h> #include <linux/gfs2_ondisk.h> +#include <linux/posix_acl_xattr.h> #include <asm/uaccess.h> #include "gfs2.h" @@ -270,7 +271,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, if (error) goto out_gunlock; - gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_trans_add_meta(ip->i_gl, bh); dataptrs = GFS2_EA2DATAPTRS(ea); for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) { @@ -309,7 +310,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { ip->i_inode.i_ctime = CURRENT_TIME; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -331,7 +332,7 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, if (error) return error; - error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (error) goto out_alloc; @@ -509,7 +510,7 @@ static int gfs2_iter_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, } if (din) { - gfs2_trans_add_bh(ip->i_gl, bh[x], 1); + gfs2_trans_add_meta(ip->i_gl, bh[x]); memcpy(pos, din, cp_size); din += sdp->sd_jbsize; } @@ -629,7 +630,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) return error; gfs2_trans_add_unrevoke(sdp, block, 1); *bhp = gfs2_meta_new(ip->i_gl, block); - gfs2_trans_add_bh(ip->i_gl, *bhp, 1); + gfs2_trans_add_meta(ip->i_gl, *bhp); gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header)); @@ -691,7 +692,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, return error; gfs2_trans_add_unrevoke(sdp, block, 1); bh = gfs2_meta_new(ip->i_gl, block); - gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_trans_add_meta(ip->i_gl, bh); gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); gfs2_add_inode_blocks(&ip->i_inode, 1); @@ -723,6 +724,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, unsigned int blks, ea_skeleton_call_t skeleton_call, void *private) { + struct gfs2_alloc_parms ap = { .target = blks }; struct buffer_head *dibh; int error; @@ -734,7 +736,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, if (error) return error; - error = gfs2_inplace_reserve(ip, blks, 0); + error = gfs2_inplace_reserve(ip, &ap); if (error) goto out_gunlock_q; @@ -751,7 +753,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { ip->i_inode.i_ctime = CURRENT_TIME; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -834,7 +836,7 @@ static void ea_set_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_header *prev = el->el_prev; u32 len; - gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); + gfs2_trans_add_meta(ip->i_gl, el->el_bh); if (!prev || !GFS2_EA_IS_STUFFED(ea)) { ea->ea_type = GFS2_EATYPE_UNUSED; @@ -872,7 +874,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, if (error) return error; - gfs2_trans_add_bh(ip->i_gl, bh, 1); + gfs2_trans_add_meta(ip->i_gl, bh); if (es->ea_split) ea = ea_split_ea(ea); @@ -886,7 +888,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, if (error) goto out; ip->i_inode.i_ctime = CURRENT_TIME; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); out: @@ -901,7 +903,7 @@ static int ea_set_simple_alloc(struct gfs2_inode *ip, struct gfs2_ea_header *ea = es->es_ea; int error; - gfs2_trans_add_bh(ip->i_gl, es->es_bh, 1); + gfs2_trans_add_meta(ip->i_gl, es->es_bh); if (es->ea_split) ea = ea_split_ea(ea); @@ -997,7 +999,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, goto out; } - gfs2_trans_add_bh(ip->i_gl, indbh, 1); + gfs2_trans_add_meta(ip->i_gl, indbh); } else { u64 blk; unsigned int n = 1; @@ -1006,7 +1008,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, return error; gfs2_trans_add_unrevoke(sdp, blk, 1); indbh = gfs2_meta_new(ip->i_gl, blk); - gfs2_trans_add_bh(ip->i_gl, indbh, 1); + gfs2_trans_add_meta(ip->i_gl, indbh); gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); gfs2_buffer_clear_tail(indbh, mh_size); @@ -1092,7 +1094,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) if (error) return error; - gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); + gfs2_trans_add_meta(ip->i_gl, el->el_bh); if (prev) { u32 len; @@ -1109,7 +1111,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { ip->i_inode.i_ctime = CURRENT_TIME; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -1265,7 +1267,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) if (GFS2_EA_IS_STUFFED(el.el_ea)) { error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0); if (error == 0) { - gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); + gfs2_trans_add_meta(ip->i_gl, el.el_bh); memcpy(GFS2_EA2DATA(el.el_ea), data, GFS2_EA_DATA_LEN(el.el_ea)); } @@ -1352,7 +1354,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) if (error) goto out_gunlock; - gfs2_trans_add_bh(ip->i_gl, indbh, 1); + gfs2_trans_add_meta(ip->i_gl, indbh); eablk = (__be64 *)(indbh->b_data + sizeof(struct gfs2_meta_header)); bstart = 0; @@ -1384,7 +1386,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -1434,7 +1436,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip) error = gfs2_meta_inode_buffer(ip, &dibh); if (!error) { - gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_trans_add_meta(ip->i_gl, dibh); gfs2_dinode_out(ip, dibh->b_data); brelse(dibh); } @@ -1461,7 +1463,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip) if (error) return error; - error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (error) return error; @@ -1499,7 +1501,8 @@ static const struct xattr_handler gfs2_xattr_security_handler = { const struct xattr_handler *gfs2_xattr_handlers[] = { &gfs2_xattr_user_handler, &gfs2_xattr_security_handler, - &gfs2_xattr_system_handler, + &posix_acl_access_xattr_handler, + &posix_acl_default_xattr_handler, NULL, }; |
