diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-22 17:32:27 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-10-22 17:32:27 -0700 |
commit | 5fe3a5ae5c09d53b2b3c7a971e1d87ab3a747055 (patch) | |
tree | 1e0d3e10c83e456a1678c4e01acb5ff624129202 | |
parent | 0fc0531e0a2174377a86fd6953ecaa00287d8f70 (diff) | |
parent | 39dc948c6921169e13224a97fa53188922acfde8 (diff) |
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs: (36 commits)
xfs: semaphore cleanup
xfs: Extend project quotas to support 32bit project ids
xfs: remove xfs_buf wrappers
xfs: remove xfs_cred.h
xfs: remove xfs_globals.h
xfs: remove xfs_version.h
xfs: remove xfs_refcache.h
xfs: fix the xfs_trans_committed
xfs: remove unused t_callback field in struct xfs_trans
xfs: fix bogus m_maxagi check in xfs_iget
xfs: do not use xfs_mod_incore_sb_batch for per-cpu counters
xfs: do not use xfs_mod_incore_sb for per-cpu counters
xfs: remove XFS_MOUNT_NO_PERCPU_SB
xfs: pack xfs_buf structure more tightly
xfs: convert buffer cache hash to rbtree
xfs: serialise inode reclaim within an AG
xfs: batch inode reclaim lookup
xfs: implement batched inode lookups for AG walking
xfs: split out inode walk inode grabbing
xfs: split inode AG walking into separate code for reclaim
...
60 files changed, 1185 insertions, 1375 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index f3ccaec5760..ba5312802aa 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -188,8 +188,8 @@ _xfs_buf_initialize( atomic_set(&bp->b_hold, 1); init_completion(&bp->b_iowait); INIT_LIST_HEAD(&bp->b_list); - INIT_LIST_HEAD(&bp->b_hash_list); - init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ + RB_CLEAR_NODE(&bp->b_rbnode); + sema_init(&bp->b_sema, 0); /* held, no waiters */ XB_SET_OWNER(bp); bp->b_target = target; bp->b_file_offset = range_base; @@ -262,8 +262,6 @@ xfs_buf_free( { trace_xfs_buf_free(bp, _RET_IP_); - ASSERT(list_empty(&bp->b_hash_list)); - if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { uint i; @@ -422,8 +420,10 @@ _xfs_buf_find( { xfs_off_t range_base; size_t range_length; - xfs_bufhash_t *hash; - xfs_buf_t *bp, *n; + struct xfs_perag *pag; + struct rb_node **rbp; + struct rb_node *parent; + xfs_buf_t *bp; range_base = (ioff << BBSHIFT); range_length = (isize << BBSHIFT); @@ -432,14 +432,37 @@ _xfs_buf_find( ASSERT(!(range_length < (1 << btp->bt_sshift))); ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); - hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; - - spin_lock(&hash->bh_lock); - - list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { - ASSERT(btp == bp->b_target); - if (bp->b_file_offset == range_base && - bp->b_buffer_length == range_length) { + /* get tree root */ + pag = xfs_perag_get(btp->bt_mount, + xfs_daddr_to_agno(btp->bt_mount, ioff)); + + /* walk tree */ + spin_lock(&pag->pag_buf_lock); + rbp = &pag->pag_buf_tree.rb_node; + parent = NULL; + bp = NULL; + while (*rbp) { + parent = *rbp; + bp = rb_entry(parent, struct xfs_buf, b_rbnode); + + if (range_base < bp->b_file_offset) + rbp = &(*rbp)->rb_left; + else if (range_base > bp->b_file_offset) + rbp = &(*rbp)->rb_right; + else { + /* + * found a block offset match. If the range doesn't + * match, the only way this is allowed is if the buffer + * in the cache is stale and the transaction that made + * it stale has not yet committed. i.e. we are + * reallocating a busy extent. Skip this buffer and + * continue searching to the right for an exact match. + */ + if (bp->b_buffer_length != range_length) { + ASSERT(bp->b_flags & XBF_STALE); + rbp = &(*rbp)->rb_right; + continue; + } atomic_inc(&bp->b_hold); goto found; } @@ -449,17 +472,21 @@ _xfs_buf_find( if (new_bp) { _xfs_buf_initialize(new_bp, btp, range_base, range_length, flags); - new_bp->b_hash = hash; - list_add(&new_bp->b_hash_list, &hash->bh_list); + rb_link_node(&new_bp->b_rbnode, parent, rbp); + rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); + /* the buffer keeps the perag reference until it is freed */ + new_bp->b_pag = pag; + spin_unlock(&pag->pag_buf_lock); } else { XFS_STATS_INC(xb_miss_locked); + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); } - - spin_unlock(&hash->bh_lock); return new_bp; found: - spin_unlock(&hash->bh_lock); + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); /* Attempt to get the semaphore without sleeping, * if this does not work then we need to drop the @@ -625,8 +652,7 @@ void xfs_buf_readahead( xfs_buftarg_t *target, xfs_off_t ioff, - size_t isize, - xfs_buf_flags_t flags) + size_t isize) { struct backing_dev_info *bdi; @@ -634,8 +660,42 @@ xfs_buf_readahead( if (bdi_read_congested(bdi)) return; - flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); - xfs_buf_read(target, ioff, isize, flags); + xfs_buf_read(target, ioff, isize, + XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK); +} + +/* + * Read an uncached buffer from disk. Allocates and returns a locked + * buffer containing the disk contents or nothing. + */ +struct xfs_buf * +xfs_buf_read_uncached( + struct xfs_mount *mp, + struct xfs_buftarg *target, + xfs_daddr_t daddr, + size_t length, + int flags) +{ + xfs_buf_t *bp; + int error; + + bp = xfs_buf_get_uncached(target, length, flags); + if (!bp) + return NULL; + + /* set up the buffer for a read IO */ + xfs_buf_lock(bp); + XFS_BUF_SET_ADDR(bp, daddr); + XFS_BUF_READ(bp); + XFS_BUF_BUSY(bp); + + xfsbdstrat(mp, bp); + error = xfs_buf_iowait(bp); + if (error || bp->b_error) { + xfs_buf_relse(bp); + return NULL; + } + return bp; } xfs_buf_t * @@ -707,9 +767,10 @@ xfs_buf_associate_memory( } xfs_buf_t * -xfs_buf_get_noaddr( +xfs_buf_get_uncached( + struct xfs_buftarg *target, size_t len, - xfs_buftarg_t *target) + int flags) { unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; int error, i; @@ -725,7 +786,7 @@ xfs_buf_get_noaddr( goto fail_free_buf; for (i = 0; i < page_count; i++) { - bp->b_pages[i] = alloc_page(GFP_KERNEL); + bp->b_pages[i] = alloc_page(xb_to_gfp(flags)); if (!bp->b_pages[i]) goto fail_free_mem; } @@ -740,7 +801,7 @@ xfs_buf_get_noaddr( xfs_buf_unlock(bp); - trace_xfs_buf_get_noaddr(bp, _RET_IP_); + trace_xfs_buf_get_uncached(bp, _RET_IP_); return bp; fail_free_mem: @@ -774,29 +835,30 @@ void xfs_buf_rele( xfs_buf_t *bp) { - xfs_bufhash_t *hash = bp->b_hash; + struct xfs_perag *pag = bp->b_pag; trace_xfs_buf_rele(bp, _RET_IP_); - if (unlikely(!hash)) { + if (!pag) { ASSERT(!bp->b_relse); + ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); if (atomic_dec_and_test(&bp->b_hold)) xfs_buf_free(bp); return; } + ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); ASSERT(atomic_read(&bp->b_hold) > 0); - if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { + if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { if (bp->b_relse) { atomic_inc(&bp->b_hold); - spin_unlock(&hash->bh_lock); - (*(bp->b_relse)) (bp); - } else if (bp->b_flags & XBF_FS_MANAGED) { - spin_unlock(&hash->bh_lock); + spin_unlock(&pag->pag_buf_lock); + bp->b_relse(bp); } else { ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); - list_del_init(&bp->b_hash_list); - spin_unlock(&hash->bh_lock); + rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); xfs_buf_free(bp); } } @@ -859,7 +921,7 @@ xfs_buf_lock( trace_xfs_buf_lock(bp, _RET_IP_); if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) - xfs_log_force(bp->b_mount, 0); + xfs_log_force(bp->b_target->bt_mount, 0); if (atomic_read(&bp->b_io_remaining)) blk_run_address_space(bp->b_target->bt_mapping); down(&bp->b_sema); @@ -970,7 +1032,6 @@ xfs_bwrite( { int error; - bp->b_mount = mp; bp->b_flags |= XBF_WRITE; bp->b_flags &= ~(XBF_ASYNC | XBF_READ); @@ -991,8 +1052,6 @@ xfs_bdwrite( { trace_xfs_buf_bdwrite(bp, _RET_IP_); - bp->b_mount = mp; - bp->b_flags &= ~XBF_READ; bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); @@ -1001,7 +1060,7 @@ xfs_bdwrite( /* * Called when we want to stop a buffer from getting written or read. - * We attach the EIO error, muck with its flags, and call biodone + * We attach the EIO error, muck with its flags, and call xfs_buf_ioend * so that the proper iodone callbacks get called. */ STATIC int @@ -1018,21 +1077,21 @@ xfs_bioerror( XFS_BUF_ERROR(bp, EIO); /* - * We're calling biodone, so delete XBF_DONE flag. + * We're calling xfs_buf_ioend, so delete XBF_DONE flag. */ XFS_BUF_UNREAD(bp); XFS_BUF_UNDELAYWRITE(bp); XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp); - xfs_biodone(bp); + xfs_buf_ioend(bp, 0); return EIO; } /* * Same as xfs_bioerror, except that we are releasing the buffer - * here ourselves, and avoiding the biodone call. + * here ourselves, and avoiding the xfs_buf_ioend call. * This is meant for userdata errors; metadata bufs come with * iodone functions attached, so that we can track down errors. */ @@ -1081,7 +1140,7 @@ int xfs_bdstrat_cb( struct xfs_buf *bp) { - if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { + if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { trace_xfs_bdstrat_shut(bp, _RET_IP_); /* * Metadata write that didn't get logged but @@ -1387,62 +1446,24 @@ xfs_buf_iomove( */ void xfs_wait_buftarg( - xfs_buftarg_t *btp) -{ - xfs_buf_t *bp, *n; - xfs_bufhash_t *hash; - uint i; - - for (i = 0; i < (1 << btp->bt_hashshift); i++) { - hash = &btp->bt_hash[i]; -again: - spin_lock(&hash->bh_lock); - list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { - ASSERT(btp == bp->b_target); - if (!(bp->b_flags & XBF_FS_MANAGED)) { - spin_unlock(&hash->bh_lock); - /* - * Catch superblock reference count leaks - * immediately - */ - BUG_ON(bp->b_bn == 0); - delay(100); - goto again; - } - } - spin_unlock(&hash->bh_lock); - } -} - -/* - * Allocate buffer hash table for a given target. - * For devices containing metadata (i.e. not the log/realtime devices) - * we need to allocate a much larger hash table. - */ -STATIC void -xfs_alloc_bufhash( - xfs_buftarg_t *btp, - int external) + struct xfs_buftarg *btp) { - unsigned int i; + struct xfs_perag *pag; + uint i; - btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */ - btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * - sizeof(xfs_bufhash_t)); - for (i = 0; i < (1 << btp->bt_hashshift); i++) { - spin_lock_init(&btp->bt_hash[i].bh_lock); - INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); + for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { + pag = xfs_perag_get(btp->bt_mount, i); + spin_lock(&pag->pag_buf_lock); + while (rb_first(&pag->pag_buf_tree)) { + spin_unlock(&pag->pag_buf_lock); + delay(100); + spin_lock(&pag->pag_buf_lock); + } + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); } } -STATIC void -xfs_free_bufhash( - xfs_buftarg_t *btp) -{ - kmem_free_large(btp->bt_hash); - btp->bt_hash = NULL; -} - /* * buftarg list for delwrite queue processing */ @@ -1475,7 +1496,6 @@ xfs_free_buftarg( xfs_flush_buftarg(btp, 1); if (mp->m_flags & XFS_MOUNT_BARRIER) xfs_blkdev_issue_flush(btp); - xfs_free_bufhash(btp); iput(btp->bt_mapping->host); /* Unregister the buftarg first so that we don't get a @@ -1597,6 +1617,7 @@ out_error: xfs_buftarg_t * xfs_alloc_buftarg( + struct xfs_mount *mp, struct block_device *bdev, int external, const char *fsname) @@ -1605,6 +1626,7 @@ xfs_alloc_buftarg( btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); + btp->bt_mount = mp; btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; if (xfs_setsize_buftarg_early(btp, bdev)) @@ -1613,7 +1635,6 @@ xfs_alloc_buftarg( goto error; if (xfs_alloc_delwrite_queue(btp, fsname)) goto error; - xfs_alloc_bufhash(btp, external); return btp; error: @@ -1904,7 +1925,7 @@ xfs_flush_buftarg( bp = list_first_entry(&wait_list, struct xfs_buf, b_list); list_del_init(&bp->b_list); - xfs_iowait(bp); + xfs_buf_iowait(bp); xfs_buf_relse(bp); } } diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 9d021c73ea5..383a3f37cf9 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -51,7 +51,6 @@ typedef enum { #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ #define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ #define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ -#define XBF_FS_MANAGED (1 << 8) /* filesystem controls freeing memory */ #define XBF_ORDERED (1 << 11)/* use ordered writes */ #define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */ #define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */ @@ -96,7 +95,6 @@ typedef unsigned int xfs_buf_flags_t; { XBF_DONE, "DONE" }, \ { XBF_DELWRI, "DELWRI" }, \ { XBF_STALE, "STALE" }, \ - { XBF_FS_MANAGED, "FS_MANAGED" }, \ { XBF_ORDERED, "ORDERED" }, \ { XBF_READ_AHEAD, "READ_AHEAD" }, \ { XBF_LOCK, "LOCK" }, /* should never be set */\ @@ -123,14 +121,11 @@ typedef struct xfs_buftarg { dev_t bt_dev; struct block_device *bt_bdev; struct address_space *bt_mapping; + struct xfs_mount *bt_mount; unsigned int bt_bsize; unsigned int bt_sshift; size_t bt_smask; - /* per device buffer hash table */ - uint bt_hashshift; - xfs_bufhash_t *bt_hash; - /* per device delwri queue */ struct task_struct *bt_task; struct list_head bt_list; @@ -158,34 +153,41 @@ typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); #define XB_PAGES 2 typedef struct xfs_buf { + /* + * first cacheline holds all the fields needed for an uncontended cache + * hit to be fully processed. The semaphore straddles the cacheline + * boundary, but the counter and lock sits on the first cacheline, + * which is the only bit that is touched if we hit the semaphore + * fast-path on locking. + */ + struct rb_node b_rbnode; /* rbtree node */ + xfs_off_t b_file_offset; /* offset in file */ + size_t b_buffer_length;/* size of buffer in bytes */ + atomic_t b_hold; /* reference count */ + xfs_buf_flags_t b_flags; /* status flags */ struct semaphore b_sema; /* semaphore for lockables */ - unsigned long b_queuetime; /* time buffer was queued */ - atomic_t b_pin_count; /* pin count */ + wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; - xfs_buf_flags_t b_flags; /* status flags */ - struct list_head b_hash_list; /* hash table list */ - xfs_bufhash_t *b_hash; /* hash table list start */ + struct xfs_perag *b_pag; /* contains rbtree root */ xfs_buftarg_t *b_target; /* buffer target (device) */ - atomic_t b_hold; /* reference count */ xfs_daddr_t b_bn; /* block number for I/O */ - xfs_off_t b_file_offset; /* offset in file */ - size_t b_buffer_length;/* size of buffer in bytes */ size_t b_count_desired;/* desired transfer size */ void *b_addr; /* virtual address of buffer */ struct work_struct b_iodone_work; - atomic_t b_io_remaining; /* #outstanding I/O requests */ xfs_buf_iodone_t b_iodone; /* I/O completion function */ xfs_buf_relse_t b_relse; /* releasing function */ struct completion b_iowait; /* queue for I/O waiters */ void *b_fspriv; void *b_fspriv2; - struct xfs_mount *b_mount; - unsigned short b_error; /* error code on I/O */ - unsigned int b_page_count; /* size of page array */ - unsigned int b_offset; /* page offset in first page */ struct page **b_pages; /* array of page pointers */ struct page *b_page_array[XB_PAGES]; /* inline pages */ + unsigned long b_queuetime; /* time buffer was queued */ + atomic_t b_pin_count; /* pin count */ + atomic_t b_io_remaining; /* #outstanding I/O requests */ + unsigned int b_page_count; /* size of page array */ + unsigned int b_offset; /* page offset in first page */ + unsigned short b_error; /* error code on I/O */ #ifdef XFS_BUF_LOCK_TRACKING int b_last_holder; #endif @@ -204,11 +206,13 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, xfs_buf_flags_t); extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); -extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); +extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); extern void xfs_buf_hold(xfs_buf_t *); -extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t, - xfs_buf_flags_t); +extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t); +struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp, + struct xfs_buftarg *target, + xfs_daddr_t daddr, size_t length, int flags); /* Releasing Buffers */ extern void xfs_buf_free(xfs_buf_t *); @@ -233,6 +237,8 @@ extern int xfs_buf_iorequest(xfs_buf_t *); extern int xfs_buf_iowait(xfs_buf_t *); extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, xfs_buf_rw_t); +#define xfs_buf_zero(bp, off, len) \ + xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) static inline int xfs_buf_geterror(xfs_buf_t *bp) { @@ -267,8 +273,6 @@ extern void xfs_buf_terminate(void); XFS_BUF_DONE(bp); \ } while (0) -#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) - #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) @@ -347,25 +351,11 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) xfs_buf_rele(bp); } -#define xfs_biodone(bp) xfs_buf_ioend(bp, 0) - -#define xfs_biomove(bp, off, len, data, rw) \ - xfs_buf_iomove((bp), (off), (len), (data), \ - ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ) - -#define xfs_biozero(bp, off, len) \ - xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) - -#define xfs_iowait(bp) xfs_buf_iowait(bp) - -#define xfs_baread(target, rablkno, ralen) \ - xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK) - - /* * Handling of buftargs. */ -extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *); +extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, + struct block_device *, int, const char *); extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h deleted file mode 100644 index 55bddf3b609..00000000000 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_CRED_H__ -#define __XFS_CRED_H__ - -#include <linux/capability.h> - -/* - * Credentials - */ -typedef const struct cred cred_t; - -#endif /* __XFS_CRED_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 1f279b012f9..ed88ed16811 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -32,10 +32,9 @@ xfs_tosspages( xfs_off_t last, int fiopt) { - struct address_space *mapping = VFS_I(ip)->i_mapping; - - if (mapping->nrpages) - truncate_inode_pages(mapping, first); + /* can't toss partial tail pages, so mask them out */ + last &= ~(PAGE_SIZE - 1); + truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1); } int @@ -50,12 +49,11 @@ xfs_flushinval_pages( trace_xfs_pagecache_inval(ip, first, last); - if (mapping->nrpages) { - xfs_iflags_clear(ip, XFS_ITRUNCATED); - ret = filemap_write_and_wait(mapping); - if (!ret) - truncate_inode_pages(mapping, first); - } + xfs_iflags_clear(ip, XFS_ITRUNCATED); + ret = filemap_write_and_wait_range(mapping, first, + last == -1 ? LLONG_MAX : last); + if (!ret) + truncate_inode_pages_range(mapping, first, last); return -ret; } @@ -71,10 +69,9 @@ xfs_flush_pages( int ret = 0; int ret2; - if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { - xfs_iflags_clear(ip, XFS_ITRUNCATED); - ret = -filemap_fdatawrite(mapping); - } + xfs_iflags_clear(ip, XFS_ITRUNCATED); + ret = -filemap_fdatawrite_range(mapping, first, + last == -1 ? LLONG_MAX : last); if (flags & XBF_ASYNC) return ret; ret2 = xfs_wait_on_pages(ip, first, last); @@ -91,7 +88,9 @@ xfs_wait_on_pages( { struct address_space *mapping = VFS_I(ip)->i_mapping; - if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) - return -filemap_fdatawait(mapping); + if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { + return -filemap_fdatawait_range(mapping, first, + last == -1 ? ip->i_size - 1 : last); + } return 0; } diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index 2ae8b1ccb02..76e81cff70b 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -16,7 +16,6 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xfs.h" -#include "xfs_cred.h" #include "xfs_sysctl.h" /* diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h deleted file mode 100644 index 69f71caf061..00000000000 --- a/fs/xfs/linux-2.6/xfs_globals.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_GLOBALS_H__ -#define __XFS_GLOBALS_H__ - -extern uint64_t xfs_panic_mask; /* set to cause more panics */ - -#endif /* __XFS_GLOBALS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 3b9e626f7cd..2ea238f6d38 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -790,7 +790,7 @@ xfs_ioc_fsgetxattr( xfs_ilock(ip, XFS_ILOCK_SHARED); fa.fsx_xflags = xfs_ip2xflags(ip); fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; - fa.fsx_projid = ip->i_d.di_projid; + fa.fsx_projid = xfs_get_projid(ip); if (attr) { if (ip->i_afp) { @@ -909,10 +909,10 @@ xfs_ioctl_setattr( return XFS_ERROR(EIO); /* - * Disallow 32bit project ids because on-disk structure - * is 16bit only. + * Disallow 32bit project ids when projid32bit feature is not enabled. */ - if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1)) + if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && + !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) return XFS_ERROR(EINVAL); /* @@ -961,7 +961,7 @@ xfs_ioctl_setattr( if (mask & FSX_PROJID) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && - ip->i_d.di_projid != fa->fsx_projid) { + xfs_get_projid(ip) != fa->fsx_projid) { ASSERT(tp); code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? @@ -1063,12 +1063,12 @@ xfs_ioctl_setattr( * Change the ownerships and register quota modifications * in the transaction. */ - if (ip->i_d.di_projid != fa->fsx_projid) { + if (xfs_get_projid(ip) != fa->fsx_projid) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } - ip->i_d.di_projid = fa->fsx_projid; + xfs_set_projid(ip, fa->fsx_projid); /* * We may have to rev the inode as well as @@ -1088,8 +1088,8 @@ xfs_ioctl_setattr( xfs_diflags_to_linux(ip); } + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_ichgtime(ip, XFS_ICHGTIME_CHG); XFS_STATS_INC(xs_ig_attrchg); @@ -1301,7 +1301,8 @@ xfs_file_ioctl( case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: { + case XFS_IOC_UNRESVSP64: + case XFS_IOC_ZERO_RANGE: { xfs_flock64_t bf; if (copy_from_user(&bf, arg, sizeof(bf))) diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 6c83f7f62dc..b3486dfa552 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -164,7 +164,8 @@ xfs_ioctl32_bstat_copyin( get_user(bstat->bs_extsize, &bstat32->bs_extsize) || get_user(bstat->bs_extents, &bstat32->bs_extents) || get_user(bstat->bs_gen, &bstat32->bs_gen) || - get_user(bstat->bs_projid, &bstat32->bs_projid) || + get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) || + get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) || get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || get_user(bstat->bs_aextents, &bstat32->bs_aextents)) @@ -218,6 +219,7 @@ xfs_bulkstat_one_fmt_compat( put_user(buffer->bs_extents, &p32->bs_extents) || put_user(buffer->bs_gen, &p32->bs_gen) || put_user(buffer->bs_projid, &p32->bs_projid) || + put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) || put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || put_user(buffer->bs_dmstate, &p32->bs_dmstate) || put_user(buffer->bs_aextents, &p32->bs_aextents)) @@ -574,6 +576,7 @@ xfs_file_compat_ioctl( case XFS_IOC_FSGEOMETRY_V1: case XFS_IOC_FSGROWFSDATA: case XFS_IOC_FSGROWFSRT: + case XFS_IOC_ZERO_RANGE: return xfs_file_ioctl(filp, cmd, p); #else case XFS_IOC_ALLOCSP_32: diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 1024c4f8ba0..08b605792a9 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -65,8 +65,10 @@ typedef struct compat_xfs_bstat { __s32 bs_extsize; /* extent size */ __s32 bs_extents; /* number of extents */ __u32 bs_gen; /* generation count */ - __u16 bs_projid; /* project id */ - unsigned char bs_pad[14]; /* pad space, unused */ + __u16 bs_projid_lo; /* lower part of project id */ +#define bs_projid bs_projid_lo /* (previously just bs_projid) */ + __u16 bs_projid_hi; /* high part of project id */ + unsigned char bs_pad[12]; /* pad space, unused */ __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index b1fc2a6bfe8..ec858e09d54 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -95,41 +95,6 @@ xfs_mark_inode_dirty( } /* - * Change the requested timestamp in the given inode. - * We don't lock across timestamp updates, and we don't log them but - * we do record the fact that there is dirty information in core. - */ -void -xfs_ichgtime( - xfs_inode_t *ip, - int flags) -{ - struct inode *inode = VFS_I(ip); - timespec_t tv; - int sync_it = 0; - - tv = current_fs_time(inode->i_sb); - - if ((flags & XFS_ICHGTIME_MOD) && - !timespec_equal(&inode->i_mtime, &tv)) { - inode->i_mtime = tv; - sync_it = 1; - } - if ((flags & XFS_ICHGTIME_CHG) && - !timespec_equal(&inode->i_ctime, &tv)) { - inode->i_ctime = tv; - sync_it = 1; - } - - /* - * Update complete - now make sure everyone knows that the inode - * is dirty. - */ - if (sync_it) - xfs_mark_inode_dirty_sync(ip); -} - -/* * Hook in SELinux. This is not quite correct yet, what we really need * here (as we do for default ACLs) is a mechanism by which creation of * these attrs can be journalled at inode creation time (along with the @@ -224,7 +189,7 @@ xfs_vn_mknod( } xfs_dentry_to_name(&name, dentry); - error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); + error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); if (unlikely(error)) goto out_free_acl; @@ -397,7 +362,7 @@ xfs_vn_symlink( (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); xfs_dentry_to_name(&name, dentry); - error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL); + error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); if (unlikely(error)) goto out; diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 2fa0bd9ebc7..214ddd71ff7 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -71,6 +71,7 @@ #include <linux/random.h> #include <linux/ctype.h> #include <linux/writeback.h> +#include <linux/capability.h> #include <asm/page.h> #include <asm/div64.h> @@ -79,14 +80,12 @@ #include <asm/byteorder.h> #include <asm/unaligned.h> -#include <xfs_cred.h> #include <xfs_vnode.h> #include <xfs_stats.h> #include <xfs_sysctl.h> #include <xfs_iops.h> #include <xfs_aops.h> #include <xfs_super.h> -#include <xfs_globals.h> #include <xfs_buf.h> /* @@ -144,7 +143,7 @@ #define SYNCHRONIZE() barrier() #define __return_address __builtin_return_address(0) -#define dfltprid 0 +#define XFS_PROJID_DEFAULT 0 #define MAXPATHLEN 1024 #define MIN(a,b) (min(a,b)) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 08fd3102128..ab31ce5aeaf 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -44,7 +44,6 @@ #include "xfs_buf_item.h" #include "xfs_utils.h" #include "xfs_vnodeops.h" -#include "xfs_version.h" #include "xfs_log_priv.h" #include "xfs_trans_priv.h" #include "xfs_filestream.h" @@ -645,7 +644,7 @@ xfs_barrier_test( XFS_BUF_ORDERED(sbp); xfsbdstrat(mp, sbp); - error = xfs_iowait(sbp); + error = xfs_buf_iowait(sbp); /* * Clear all the flags we set and possible error state in the @@ -757,18 +756,20 @@ xfs_open_devices( * Setup xfs_mount buffer target pointers */ error = ENOMEM; - mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname); + mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname); if (!mp->m_ddev_targp) goto out_close_rtdev; if (rtdev) { - mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname); + mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1, + mp->m_fsname); if (!mp->m_rtdev_targp) goto out_free_ddev_targ; } if (logdev && logdev != ddev) { - mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname); + mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1, + mp->m_fsname); if (!mp->m_logdev_targp) goto out_free_rtdev_targ; } else { @@ -971,12 +972,7 @@ xfs_fs_inode_init_once( /* * Dirty the XFS inode when mark_inode_dirty_sync() is called so that - * we catch unlogged VFS level updates to the inode. Care must be taken - * here - the transaction code calls mark_inode_dirty_sync() to mark the - * VFS inode dirty in a transaction and clears the i_update_core field; - * it must clear the field after calling mark_inode_dirty_sync() to - * correctly indicate that the dirty state has been propagated into the - * inode log item. + * we catch unlogged VFS level updates to the inode. * * We need the barrier() to maintain correct ordering between unlogged * updates and the transaction commit code that clears the i_update_core @@ -1520,8 +1516,9 @@ xfs_fs_fill_super( if (error) goto out_free_fsname; - if (xfs_icsb_init_counters(mp)) - mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; + error = xfs_icsb_init_counters(mp); + if (error) + goto out_close_devices; error = xfs_readsb(mp, flags); if (error) @@ -1582,6 +1579,7 @@ xfs_fs_fill_super( xfs_freesb(mp); out_destroy_counters: xfs_icsb_destroy_counters(mp); + out_close_devices: xfs_close_devices(mp); out_free_fsname: xfs_free_fsname(mp); diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 1ef4a4d2d99..50a3266c999 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -62,6 +62,7 @@ extern void xfs_qm_exit(void); # define XFS_DBG_STRING "no debug" #endif +#define XFS_VERSION_STRING "SGI XFS" #define XFS_BUILD_OPTIONS XFS_ACL_STRING \ XFS_SECURITY_STRING \ XFS_REALTIME_STRING \ diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 81976ffed7d..37d33254981 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -39,42 +39,39 @@ #include <linux/kthread.h> #include <linux/freezer.h> +/* + * The inode lookup is done in batches to keep the amount of lock traffic and + * radix tree lookups to a minimum. The batch size is a trade off between + * lookup reduction and stack usage. This is in the reclaim path, so we can't + * be too greedy. + */ +#define XFS_LOOKUP_BATCH 32 -STATIC xfs_inode_t * -xfs_inode_ag_lookup( - struct xfs_mount *mp, - struct xfs_perag *pag, - uint32_t *first_index, - int tag) +STATIC int +xfs_inode_ag_walk_grab( + struct xfs_inode *ip) { - int nr_found; - struct xfs_inode *ip; + struct inode *inode = VFS_I(ip); - /* - * use a gang lookup to find the next inode in the tree - * as the tree is sparse and a gang lookup walks to find - * the number of objects requested. - */ - if (tag == XFS_ICI_NO_TAG) { - nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, - (void **)&ip, *first_index, 1); - } else { - nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, - (void **)&ip, *first_index, 1, tag); + /* nothing to sync during shutdown */ + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return EFSCORRUPTED; + + /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ + if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + return ENOENT; + + /* If we can't grab the inode, it must on it's way to reclaim. */ + if (!igrab(inode)) + return ENOENT; + + if (is_bad_inode(inode)) { + IRELE(ip); + return ENOENT; } - if (!nr_found) - return NULL; - /* - * Update the index for the next lookup. Catch overflows - * into the next AG range which can occur if we have inodes - * in the last block of the AG and we are currently - * pointing to the last inode. - */ - *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); - if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) - return NULL; - return ip; + /* inode is valid */ + return 0; } STATIC int @@ -83,49 +80,75 @@ xfs_inode_ag_walk( struct xfs_perag *pag, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags, - int tag, - int exclusive, - int *nr_to_scan) + int flags) { uint32_t first_index; int last_error = 0; int skipped; + int done; + int nr_found; restart: + done = 0; skipped = 0; first_index = 0; + nr_found = 0; do { + struct xfs_inode *batch[XFS_LOOKUP_BATCH]; int error = 0; - xfs_inode_t *ip; + int i; - if (exclusive) - write_lock(&pag->pag_ici_lock); - else - read_lock(&pag->pag_ici_lock); - ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); - if (!ip) { - if (exclusive) - write_unlock(&pag->pag_ici_lock); - else - read_unlock(&pag->pag_ici_lock); + read_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, + (void **)batch, first_index, + XFS_LOOKUP_BATCH); + if (!nr_found) { + read_unlock(&pag->pag_ici_lock); break; } - /* execute releases pag->pag_ici_lock */ - error = execute(ip, pag, flags); - if (error == EAGAIN) { - skipped++; - continue; + /* + * Grab the inodes before we drop the lock. if we found + * nothing, nr == 0 and the loop will be skipped. + */ + for (i = 0; i < nr_found; i++) { + struct xfs_inode *ip = batch[i]; + + if (done || xfs_inode_ag_walk_grab(ip)) + batch[i] = NULL; + + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) + done = 1; + } + + /* unlock now we've grabbed the inodes. */ + read_unlock(&pag->pag_ici_lock); + + for (i = 0; i < nr_found; i++) { + if (!batch[i]) + continue; + error = execute(batch[i], pag, flags); + IRELE(batch[i]); + if (error == EAGAIN) { + skipped++; + continue; + } + if (error && last_error != EFSCORRUPTED) + last_error = error; } - if (error) - last_error = error; /* bail out if the filesystem is corrupted. */ if (error == EFSCORRUPTED) break; - } while ((*nr_to_scan)--); + } while (nr_found && !done); if (skipped) { delay(1); @@ -134,110 +157,32 @@ restart: return last_error; } -/* - * Select the next per-ag structure to iterate during the walk. The reclaim - * walk is optimised only to walk AGs with reclaimable inodes in them. - */ -static struct xfs_perag * -xfs_inode_ag_iter_next_pag( - struct xfs_mount *mp, - xfs_agnumber_t *first, - int tag) -{ - struct xfs_perag *pag = NULL; - - if (tag == XFS_ICI_RECLAIM_TAG) { - int found; - int ref; - - spin_lock(&mp->m_perag_lock); - found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, - (void **)&pag, *first, 1, tag); - if (found <= 0) { - spin_unlock(&mp->m_perag_lock); - return NULL; - } - *first = pag->pag_agno + 1; - /* open coded pag reference increment */ - ref = atomic_inc_return(&pag->pag_ref); - spin_unlock(&mp->m_perag_lock); - trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_); - } else { - pag = xfs_perag_get(mp, *first); - (*first)++; - } - return pag; -} - int xfs_inode_ag_iterator( struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags, - int tag, - int exclusive, - int *nr_to_scan) + int flags) { struct xfs_perag *pag; int error = 0; int last_error = 0; xfs_agnumber_t ag; - int nr; - nr = nr_to_scan ? *nr_to_scan : INT_MAX; ag = 0; - while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) { - error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, - exclusive, &nr); + while ((pag = xfs_perag_get(mp, ag))) { + ag = pag->pag_agno + 1; + error = xfs_inode_ag_walk(mp, pag, execute, flags); xfs_perag_put(pag); if (error) { last_error = error; if (error == EFSCORRUPTED) break; } - if (nr <= 0) - break; } - if (nr_to_scan) - *nr_to_scan = nr; return XFS_ERROR(last_error); } -/* must be called with pag_ici_lock held and releases it */ -int -xfs_sync_inode_valid( - struct xfs_inode *ip, - struct xfs_perag *pag) -{ - struct inode *inode = VFS_I(ip); - int error = EFSCORRUPTED; - - /* nothing to sync during shutdown */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - goto out_unlock; - - /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ - error = ENOENT; - if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) - goto out_unlock; - - /* If we can't grab the inode, it must on it's way to reclaim. */ - if (!igrab(inode)) - goto out_unlock; - - if (is_bad_inode(inode)) { - IRELE(ip); - goto out_unlock; - } - - /* inode is valid */ - error = 0; -out_unlock: - read_unlock(&pag->pag_ici_lock); - return error; -} - STATIC int xfs_sync_inode_data( struct xfs_inode *ip, @@ -248,10 +193,6 @@ xfs_sync_inode_data( struct address_space *mapping = inode->i_mapping; int error = 0; - error = xfs_sync_inode_valid(ip, pag); - if (error) - return error; - if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) goto out_wait; @@ -268,7 +209,6 @@ xfs_sync_inode_data( out_wait: if (flags & SYNC_WAIT) xfs_ioend_wait(ip); - IRELE(ip); return error; } @@ -280,10 +220,6 @@ xfs_sync_inode_attr( { int error = 0; - error = xfs_sync_inode_valid(ip, pag); - if (error) - return error; - xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_inode_clean(ip)) goto out_unlock; @@ -302,7 +238,6 @@ xfs_sync_inode_attr( out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); - IRELE(ip); return error; } @@ -318,8 +253,7 @@ xfs_sync_data( ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); - error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, - XFS_ICI_NO_TAG, 0, NULL); + error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags); if (error) return XFS_ERROR(error); @@ -337,8 +271,7 @@ xfs_sync_attr( { ASSERT((flags & ~SYNC_WAIT) == 0); - return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, - XFS_ICI_NO_TAG, 0, NULL); + return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags); } STATIC int @@ -698,6 +631,43 @@ __xfs_inode_clear_reclaim_tag( } /* + * Grab the inode for reclaim exclusively. + * Return 0 if we grabbed it, non-zero otherwise. + */ +STATIC int +xfs_reclaim_inode_grab( + struct xfs_inode *ip, + int flags) +{ + + /* + * do some unlocked checks first to avoid unnecceary lock traffic. + * The first is a flush lock check, the second is a already in reclaim + * check. Only do these checks if we are not going to block on locks. + */ + if ((flags & SYNC_TRYLOCK) && + (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { + return 1; + } + + /* + * The radix tree lock here protects a thread in xfs_iget from racing + * with us starting reclaim on the inode. Once we have the + * XFS_IRECLAIM flag set it will not touch us. + */ + spin_lock(&ip->i_flags_lock); + ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { + /* ignore as it is already under reclaim */ + spin_unlock(&ip->i_flags_lock); + return 1; + } + __xfs_iflags_set(ip, XFS_IRECLAIM); + spin_unlock(&ip->i_flags_lock); + return 0; +} + +/* * Inodes in different states need to be treated differently, and the return * value of xfs_iflush is not sufficient to get this right. The following table * lists the inode states and the reclaim actions necessary for non-blocking @@ -755,23 +725,6 @@ xfs_reclaim_inode( { int error = 0; - /* - * The radix tree lock here protects a thread in xfs_iget from racing - * with us starting reclaim on the inode. Once we have the - * XFS_IRECLAIM flag set it will not touch us. - */ - spin_lock(&ip->i_flags_lock); - ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { - /* ignore as it is already under reclaim */ - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - return 0; - } - __xfs_iflags_set(ip, XFS_IRECLAIM); - spin_unlock(&ip->i_flags_lock); - write_unlock(&pag->pag_ici_lock); - xfs_ilock(ip, XFS_ILOCK_EXCL); if (!xfs_iflock_nowait(ip)) { if (!(sync_mode & SYNC_WAIT)) @@ -868,13 +821,126 @@ reclaim: } +/* + * Walk the AGs and reclaim the inodes in them. Even if the filesystem is + * corrupted, we still want to try to reclaim all the inodes. If we don't, + * then a shut down during filesystem unmount reclaim walk leak all the + * unreclaimed inodes. + */ +int +xfs_reclaim_inodes_ag( + struct xfs_mount *mp, + int flags, + int *nr_to_scan) +{ + struct xfs_perag *pag; + int error = 0; + int last_error = 0; + xfs_agnumber_t ag; + int trylock = flags & SYNC_TRYLOCK; + int skipped; + +restart: + ag = 0; + skipped = 0; + while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { + unsigned long first_index = 0; + int done = 0; + int nr_found = 0; + + ag = pag->pag_agno + 1; + + if (trylock) { + if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { + skipped++; + continue; + } + first_index = pag->pag_ici_reclaim_cursor; + } else + mutex_lock(&pag->pag_ici_reclaim_lock); + + do { + struct xfs_inode *batch[XFS_LOOKUP_BATCH]; + int i; + + write_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup_tag( + &pag->pag_ici_root, + (void **)batch, first_index, + XFS_LOOKUP_BATCH, + XFS_ICI_RECLAIM_TAG); + if (!nr_found) { + write_unlock(&pag->pag_ici_lock); + break; + } + + /* + * Grab the inodes before we drop the lock. if we found + * nothing, nr == 0 and the loop will be skipped. + */ + for (i = 0; i < nr_found; i++) { + struct xfs_inode *ip = batch[i]; + + if (done || xfs_reclaim_inode_grab(ip, flags)) + batch[i] = NULL; + + /* + * Update the index for the next lookup. Catch + * overflows into the next AG range which can + * occur if we have inodes in the last block of + * the AG and we are currently pointing to the + * last inode. + */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) + done = 1; + } + + /* unlock now we've grabbed the inodes. */ + write_unlock(&pag->pag_ici_lock); + + for (i = 0; i < nr_found; i++) { + if (!batch[i]) + continue; + error = xfs_reclaim_inode(batch[i], pag, flags); + if (error && last_error != EFSCORRUPTED) + last_error = error; + } + + *nr_to_scan -= XFS_LOOKUP_BATCH; + + } while (nr_found && !done && *nr_to_scan > 0); + + if (trylock && !done) + pag->pag_ici_reclaim_cursor = first_index; + else + pag->pag_ici_reclaim_cursor = 0; + mutex_unlock(&pag->pag_ici_reclaim_lock); + xfs_perag_put(pag); + } + + /* + * if we skipped any AG, and we still have scan count remaining, do + * another pass this time using blocking reclaim semantics (i.e + * waiting on the reclaim locks and ignoring the reclaim cursors). This + * ensure that when we get more reclaimers than AGs we block rather + * than spin trying to execute reclaim. + */ + if (trylock && skipped && *nr_to_scan > 0) { + trylock = 0; + goto restart; + } + return XFS_ERROR(last_error); +} + int xfs_reclaim_inodes( xfs_mount_t *mp, int mode) { - return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, - XFS_ICI_RECLAIM_TAG, 1, NULL); + int nr_to_scan = INT_MAX; + + return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan); } /* @@ -896,17 +962,16 @@ xfs_reclaim_inode_shrink( if (!(gfp_mask & __GFP_FS)) return -1; - xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, - XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); - /* if we don't exhaust the scan, don't bother coming back */ + xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); + /* terminate if we don't exhaust the scan */ if (nr_to_scan > 0) return -1; } reclaimable = 0; ag = 0; - while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, - XFS_ICI_RECLAIM_TAG))) { + while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { + ag = pag->pag_agno + 1; reclaimable += pag->pag_ici_reclaimable; xfs_perag_put(pag); } diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index fe78726196f..32ba6628290 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -47,10 +47,10 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, struct xfs_inode *ip); -int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); +int xfs_sync_inode_grab(struct xfs_inode *ip); int xfs_inode_ag_iterator(struct xfs_mount *mp, int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags, int tag, int write_lock, int *nr_to_scan); + int flags); void xfs_inode_shrinker_register(struct xfs_mount *mp); void xfs_inode_shrinker_unregister(struct xfs_mount *mp); diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 8fe311a456e..acef2e98c59 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \ unsigned long caller_ip), \ TP_ARGS(mp, agno, refcount, caller_ip)) DEFINE_PERAG_REF_EVENT(xfs_perag_get); -DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim); +DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_put); DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); @@ -330,7 +330,7 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done); DEFINE_BUF_EVENT(xfs_buf_delwri_queue); DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); DEFINE_BUF_EVENT(xfs_buf_delwri_split); -DEFINE_BUF_EVENT(xfs_buf_get_noaddr); +DEFINE_BUF_EVENT(xfs_buf_get_uncached); DEFINE_BUF_EVENT(xfs_bdstrat_shut); DEFINE_BUF_EVENT(xfs_buf_item_relse); DEFINE_BUF_EVENT(xfs_buf_item_iodone); diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h deleted file mode 100644 index f8d279d7563..00000000000 --- a/fs/xfs/linux-2.6/xfs_version.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_VERSION_H__ -#define __XFS_VERSION_H__ - -/* - * Dummy file that can contain a timestamp to put into the - * XFS init string, to help users keep track of what they're - * running - */ - -#define XFS_VERSION_STRING "SGI XFS" - -#endif /* __XFS_VERSION_H__ */ diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index e1a2f6800e0..faf8e1a83a1 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -463,87 +463,68 @@ xfs_qm_dqtobp( uint flags) { xfs_bmbt_irec_t map; - int nmaps, error; + int nmaps = 1, error; xfs_buf_t *bp; - xfs_inode_t *quotip; - xfs_mount_t *mp; + xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); + xfs_mount_t *mp = dqp->q_mount; xfs_disk_dquot_t *ddq; - xfs_dqid_t id; - boolean_t newdquot; + xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); xfs_trans_t *tp = (tpp ? *tpp : NULL); - mp = dqp->q_mount; - id = be32_to_cpu(dqp->q_core.d_id); - nmaps = 1; - newdquot = B_FALSE; + dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; - /* - * If we don't know where the dquot lives, find out. - */ - if (dqp->q_blkno == (xfs_daddr_t) 0) { - /* We use the id as an index */ - dqp->q_fileoffset = (xfs_fileoff_t)id / - mp->m_quotainfo->qi_dqperchunk; - nmaps = 1; - quotip = XFS_DQ_TO_QIP(dqp); - xfs_ilock(quotip, XFS_ILOCK_SHARED); + xfs_ilock(quotip, XFS_ILOCK_SHARED); + if (XFS_IS_THIS_QUOTA_OFF(dqp)) { /* - * Return if this type of quotas is turned off while we didn't - * have an inode lock + * Return if this type of quotas is turned off while we + * didn't have the quota inode lock. */ - if (XFS_IS_THIS_QUOTA_OFF(dqp)) { - xfs_iunlock(quotip, XFS_ILOCK_SHARED); - return (ESRCH); - } + xfs_iunlock(quotip, XFS_ILOCK_SHARED); + return ESRCH; + } + + /* + * Find the block map; no allocations yet + */ + error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, + XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, + NULL, 0, &map, &nmaps, NULL); + + xfs_iunlock(quotip, XFS_ILOCK_SHARED); + if (error) + return error; + + ASSERT(nmaps == 1); + ASSERT(map.br_blockcount == 1); + + /* + * Offset of dquot in the (fixed sized) dquot chunk. + */ + dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * + sizeof(xfs_dqblk_t); + + ASSERT(map.br_startblock != DELAYSTARTBLOCK); + if (map.br_startblock == HOLESTARTBLOCK) { /* - * Find the block map; no allocations yet + * We don't allocate unless we're asked to */ - error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, - XFS_DQUOT_CLUSTER_SIZE_FSB, - XFS_BMAPI_METADATA, - NULL, 0, &map, &nmaps, NULL); + if (!(flags & XFS_QMOPT_DQALLOC)) + return ENOENT; - xfs_iunlock(quotip, XFS_ILOCK_SHARED); + ASSERT(tp); + error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, + dqp->q_fileoffset, &bp); if (error) - return (error); - ASSERT(nmaps == 1); - ASSERT(map.br_blockcount == 1); + return error; + tp = *tpp; + } else { + trace_xfs_dqtobp_read(dqp); /* - * offset of dquot in the (fixed sized) dquot chunk. + * store the blkno etc so that we don't have to do the + * mapping all the time */ - dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * - sizeof(xfs_dqblk_t); - if (map.br_startblock == HOLESTARTBLOCK) { - /* - * We don't allocate unless we're asked to - */ - if (!(flags & XFS_QMOPT_DQALLOC)) - return (ENOENT); - - ASSERT(tp); - if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, - dqp->q_fileoffset, &bp))) - return (error); - tp = *tpp; - newdquot = B_TRUE; - } else { - /* - * store the blkno etc so that we don't have to do the - * mapping all the time - */ - dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); - } - } - ASSERT(dqp->q_blkno != DELAYSTARTBLOCK); - ASSERT(dqp->q_blkno != HOLESTARTBLOCK); - - /* - * Read in the buffer, unless we've just done the allocation - * (in which case we already have the buf). - */ - if (!newdquot) { - trace_xfs_dqtobp_read(dqp); + dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, dqp->q_blkno, @@ -552,13 +533,14 @@ xfs_qm_dqtobp( if (error || !bp) return XFS_ERROR(error); } + ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); /* * calculate the location of the dquot inside the buffer. */ - ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset); + ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); /* * A simple sanity check in case we got a corrupted dquot... @@ -1176,18 +1158,18 @@ xfs_qm_dqflush( xfs_dquot_t *dqp, uint flags) { - xfs_mount_t *mp; - xfs_buf_t *bp; - xfs_disk_dquot_t *ddqp; + struct xfs_mount *mp = dqp->q_mount; + struct xfs_buf *bp; + struct xfs_disk_dquot *ddqp; int error; ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(!completion_done(&dqp->q_flush)); + trace_xfs_dqflush(dqp); /* - * If not dirty, or it's pinned and we are not supposed to - * block, nada. + * If not dirty, or it's pinned and we are not supposed to block, nada. */ if (!XFS_DQ_IS_DIRTY(dqp) || (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { @@ -1201,40 +1183,46 @@ xfs_qm_dqflush( * down forcibly. If that's the case we must not write this dquot * to disk, because the log record didn't make it to disk! */ - if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) { - dqp->dq_flags &= ~(XFS_DQ_DIRTY); + if (XFS_FORCED_SHUTDOWN(mp)) { + dqp->dq_flags &= ~XFS_DQ_DIRTY; xfs_dqfunlock(dqp); return XFS_ERROR(EIO); } /* * Get the buffer containing the on-disk dquot - * We don't need a transaction envelope because we know that the - * the ondisk-dquot has already been allocated for. */ - if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) { + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, 0, &bp); + if (error) { ASSERT(error != ENOENT); - /* - * Quotas could have gotten turned off (ESRCH) - */ xfs_dqfunlock(dqp); - return (error); + return error; } - if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), - 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { - xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE); + /* + * Calculate the location of the dquot inside the buffer. + */ + ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); + + /* + * A simple sanity check in case we got a corrupted dquot.. + */ + if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0, + XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { + xfs_buf_relse(bp); + xfs_dqfunlock(dqp); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return XFS_ERROR(EIO); } /* This is the only portion of data that needs to persist */ - memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t)); + memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); /* * Clear the dirty field and remember the flush lsn for later use. */ - dqp->dq_flags &= ~(XFS_DQ_DIRTY); - mp = dqp->q_mount; + dqp->dq_flags &= ~XFS_DQ_DIRTY; xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, &dqp->q_logitem.qli_item.li_lsn); diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 9a92407109a..f8e854b4fde 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -55,8 +55,6 @@ uint ndquot; kmem_zone_t *qm_dqzone; kmem_zone_t *qm_dqtrxzone; -static cred_t xfs_zerocr; - STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); @@ -837,7 +835,7 @@ xfs_qm_dqattach_locked( xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, flags & XFS_QMOPT_DQALLOC, ip->i_udquot, &ip->i_gdquot) : - xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ, + xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, flags & XFS_QMOPT_DQALLOC, ip->i_udquot, &ip->i_gdquot); /* @@ -1199,87 +1197,6 @@ xfs_qm_list_destroy( mutex_destroy(&(list->qh_lock)); } - -/* - * Stripped down version of dqattach. This doesn't attach, or even look at the - * dquots attached to the inode. The rationale is that there won't be any - * attached at the time this is called from quotacheck. - */ -STATIC int -xfs_qm_dqget_noattach( - xfs_inode_t *ip, - xfs_dquot_t **O_udqpp, - xfs_dquot_t **O_gdqpp) -{ - int error; - xfs_mount_t *mp; - xfs_dquot_t *udqp, *gdqp; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - mp = ip->i_mount; - udqp = NULL; - gdqp = NULL; - - if (XFS_IS_UQUOTA_ON(mp)) { - ASSERT(ip->i_udquot == NULL); - /* - * We want the dquot allocated if it doesn't exist. - */ - if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER, - XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, - &udqp))) { - /* - * Shouldn't be able to turn off quotas here. - */ - ASSERT(error != ESRCH); - ASSERT(error != ENOENT); - return error; - } - ASSERT(udqp); - } - - if (XFS_IS_OQUOTA_ON(mp)) { - ASSERT(ip->i_gdquot == NULL); - if (udqp) - xfs_dqunlock(udqp); - error = XFS_IS_GQUOTA_ON(mp) ? - xfs_qm_dqget(mp, ip, - ip->i_d.di_gid, XFS_DQ_GROUP, - XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, - &gdqp) : - xfs_qm_dqget(mp, ip, - ip->i_d.di_projid, XFS_DQ_PROJ, - XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, - &gdqp); - if (error) { - if (udqp) - xfs_qm_dqrele(udqp); - ASSERT(error != ESRCH); - ASSERT(error != ENOENT); - return error; - } - ASSERT(gdqp); - - /* Reacquire the locks in the right order */ - if (udqp) { - if (! xfs_qm_dqlock_nowait(udqp)) { - xfs_dqunlock(gdqp); - xfs_dqlock(udqp); - xfs_dqlock(gdqp); - } - } - } - - *O_udqpp = udqp; - *O_gdqpp = gdqp; - -#ifdef QUOTADEBUG - if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp)); - if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp)); -#endif - return 0; -} - /* * Create an inode and return with a reference already taken, but unlocked * This is how we create quota inodes @@ -1305,8 +1222,8 @@ xfs_qm_qino_alloc( return error; } - if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, - &xfs_zerocr, 0, 1, ip, &committed))) { + error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); + if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); return error; @@ -1516,7 +1433,7 @@ xfs_qm_dqiterate( rablkcnt = map[i+1].br_blockcount; rablkno = map[i+1].br_startblock; while (rablkcnt--) { - xfs_baread(mp->m_ddev_targp, + xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, rablkno), mp->m_quotainfo->qi_dqchunklen); rablkno++; @@ -1546,18 +1463,34 @@ xfs_qm_dqiterate( /* * Called by dqusage_adjust in doing a quotacheck. - * Given the inode, and a dquot (either USR or GRP, doesn't matter), - * this updates its incore copy as well as the buffer copy. This is - * so that once the quotacheck is done, we can just log all the buffers, - * as opposed to logging numerous updates to individual dquots. + * + * Given the inode, and a dquot id this updates both the incore dqout as well + * as the buffer copy. This is so that once the quotacheck is done, we can + * just log all the buffers, as opposed to logging numerous updates to + * individual dquots. */ -STATIC void +STATIC int xfs_qm_quotacheck_dqadjust( - xfs_dquot_t *dqp, + struct xfs_inode *ip, + xfs_dqid_t id, + uint type, xfs_qcnt_t nblks, xfs_qcnt_t rtblks) { - ASSERT(XFS_DQ_IS_LOCKED(dqp)); + struct xfs_mount *mp = ip->i_mount; + struct xfs_dquot *dqp; + int error; + + error = xfs_qm_dqget(mp, ip, id, type, + XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); + if (error) { + /* + * Shouldn't be able to turn off quotas here. + */ + ASSERT(error != ESRCH); + ASSERT(error != ENOENT); + return error; + } trace_xfs_dqadjust(dqp); @@ -1582,11 +1515,13 @@ xfs_qm_quotacheck_dqadjust( * There are no timers for the default values set in the root dquot. */ if (dqp->q_core.d_id) { - xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core); - xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core); + xfs_qm_adjust_dqlimits(mp, &dqp->q_core); + xfs_qm_adjust_dqtimers(mp, &dqp->q_core); } dqp->dq_flags |= XFS_DQ_DIRTY; + xfs_qm_dqput(dqp); + return 0; } STATIC int @@ -1629,8 +1564,7 @@ xfs_qm_dqusage_adjust( int *res) /* result code value */ { xfs_inode_t *ip; - xfs_dquot_t *udqp, *gdqp; - xfs_qcnt_t nblks, rtblks; + xfs_qcnt_t nblks, rtblks = 0; int error; ASSERT(XFS_IS_QUOTA_RUNNING(mp)); @@ -1650,51 +1584,24 @@ xfs_qm_dqusage_adjust( * the case in all other instances. It's OK that we do this because * quotacheck is done only at mount time. */ - if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) { + error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); + if (error) { *res = BULKSTAT_RV_NOTHING; return error; } - /* - * Obtain the locked dquots. In case of an error (eg. allocation - * fails for ENOSPC), we return the negative of the error number - * to bulkstat, so that it can get propagated to quotacheck() and - * making us disable quotas for the file system. - */ - if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - IRELE(ip); - *res = BULKSTAT_RV_GIVEUP; - return error; - } + ASSERT(ip->i_delayed_blks == 0); - rtblks = 0; - if (! XFS_IS_REALTIME_INODE(ip)) { - nblks = (xfs_qcnt_t)ip->i_d.di_nblocks; - } else { + if (XFS_IS_REALTIME_INODE(ip)) { /* * Walk thru the extent list and count the realtime blocks. */ - if ((error = xfs_qm_get_rtblks(ip, &rtblks))) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - IRELE(ip); - if (udqp) - xfs_qm_dqput(udqp); - if (gdqp) - xfs_qm_dqput(gdqp); - *res = BULKSTAT_RV_GIVEUP; - return error; - } - nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; + error = xfs_qm_get_rtblks(ip, &rtblks); + if (error) + goto error0; } - ASSERT(ip->i_delayed_blks == 0); - /* - * We can't release the inode while holding its dquot locks. - * The inode can go into inactive and might try to acquire the dquotlocks. - * So, just unlock here and do a vn_rele at the end. - */ - xfs_iunlock(ip, XFS_ILOCK_EXCL); + nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; /* * Add the (disk blocks and inode) resources occupied by this @@ -1709,26 +1616,36 @@ xfs_qm_dqusage_adjust( * and quotaoffs don't race. (Quotachecks happen at mount time only). */ if (XFS_IS_UQUOTA_ON(mp)) { - ASSERT(udqp); - xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks); - xfs_qm_dqput(udqp); + error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid, + XFS_DQ_USER, nblks, rtblks); + if (error) + goto error0; } - if (XFS_IS_OQUOTA_ON(mp)) { - ASSERT(gdqp); - xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks); - xfs_qm_dqput(gdqp); + + if (XFS_IS_GQUOTA_ON(mp)) { + error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid, + XFS_DQ_GROUP, nblks, rtblks); + if (error) + goto error0; } - /* - * Now release the inode. This will send it to 'inactive', and - * possibly even free blocks. - */ - IRELE(ip); - /* - * Goto next inode. - */ + if (XFS_IS_PQUOTA_ON(mp)) { + error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip), + XFS_DQ_PROJ, nblks, rtblks); + if (error) + goto error0; + } + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + IRELE(ip); *res = BULKSTAT_RV_DIDONE; return 0; + +error0: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + IRELE(ip); + *res = BULKSTAT_RV_GIVEUP; + return error; } /* @@ -2224,7 +2141,7 @@ xfs_qm_write_sb_changes( /* - * Given an inode, a uid and gid (from cred_t) make sure that we have + * Given an inode, a uid, gid and prid make sure that we have * allocated relevant dquot(s) on disk, and that we won't exceed inode * quotas by creating this file. * This also attaches dquot(s) to the given inode after locking it, @@ -2332,7 +2249,7 @@ xfs_qm_vop_dqalloc( xfs_dqunlock(gq); } } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { - if (ip->i_d.di_projid != prid) { + if (xfs_get_projid(ip) != prid) { xfs_iunlock(ip, lockflags); if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, XFS_DQ_PROJ, @@ -2454,7 +2371,7 @@ xfs_qm_vop_chown_reserve( } if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { if (XFS_IS_PQUOTA_ON(ip->i_mount) && - ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id)) + xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) prjflags = XFS_QMOPT_ENOSPC; if (prjflags || @@ -2558,7 +2475,7 @@ xfs_qm_vop_create_dqattach( ip->i_gdquot = gdqp; ASSERT(XFS_IS_OQUOTA_ON(mp)); ASSERT((XFS_IS_GQUOTA_ON(mp) ? - ip->i_d.di_gid : ip->i_d.di_projid) == + ip->i_d.di_gid : xfs_get_projid(ip)) == be32_to_cpu(gdqp->q_core.d_id)); xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); } diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index bea02d786c5..45b5cb1788a 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -81,7 +81,7 @@ xfs_qm_statvfs( xfs_mount_t *mp = ip->i_mount; xfs_dquot_t *dqp; - if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) { + if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); xfs_qm_dqput(dqp); } diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 45e5849df23..bdebc183223 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -276,7 +276,7 @@ xfs_qm_scall_trunc_qfile( goto out_unlock; } - xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); out_unlock: @@ -875,21 +875,14 @@ xfs_dqrele_inode( struct xfs_perag *pag, int flags) { - int error; - /* skip quota inodes */ if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || ip == ip->i_mount->m_quotainfo->qi_gquotaip) { ASSERT(ip->i_udquot == NULL); ASSERT(ip->i_gdquot == NULL); - read_unlock(&pag->pag_ici_lock); return 0; } - error = xfs_sync_inode_valid(ip, pag); - if (error) - return error; - xfs_ilock(ip, XFS_ILOCK_EXCL); if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { xfs_qm_dqrele(ip->i_udquot); @@ -900,8 +893,6 @@ xfs_dqrele_inode( ip->i_gdquot = NULL; } xfs_iunlock(ip, XFS_ILOCK_EXCL); - - IRELE(ip); return 0; } @@ -918,8 +909,7 @@ xfs_qm_dqrele_all_inodes( uint flags) { ASSERT(mp->m_quotainfo); - xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, - XFS_ICI_NO_TAG, 0, NULL); + xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); } /*------------------------------------------------------------------------*/ @@ -1175,7 +1165,7 @@ xfs_qm_internalqcheck_adjust( } xfs_qm_internalqcheck_get_dquots(mp, (xfs_dqid_t) ip->i_d.di_uid, - (xfs_dqid_t) ip->i_d.di_projid, + (xfs_dqid_t) xfs_get_projid(ip), (xfs_dqid_t) ip->i_d.di_gid, &ud, &gd); if (XFS_IS_UQUOTA_ON(mp)) { diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 4917d4eed4e..63c7a1a6c02 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -230,6 +230,15 @@ typedef struct xfs_perag { rwlock_t pag_ici_lock; /* incore inode lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ int pag_ici_reclaimable; /* reclaimable inodes */ + struct mutex pag_ici_reclaim_lock; /* serialisation point */ + unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ + + /* buffer cache index */ + spinlock_t pag_buf_lock; /* lock for pag_buf_tree */ + struct rb_root pag_buf_tree; /* ordered tree of active buffers */ + + /* for rcu-safe freeing */ + struct rcu_head rcu_head; #endif int pagb_count; /* pagb slots in use */ } xfs_perag_t; diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index af168faccc7..112abc439ca 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -675,7 +675,7 @@ xfs_alloc_ag_vextent_near( xfs_agblock_t gtbnoa; /* aligned ... */ xfs_extlen_t gtdiff; /* difference to right side entry */ xfs_extlen_t gtlen; /* length of right side entry */ - xfs_extlen_t gtlena; /* aligned ... */ + xfs_extlen_t gtlena = 0; /* aligned ... */ xfs_agblock_t gtnew; /* useful start bno of right side */ int error; /* error code */ int i; /* result code, temporary */ @@ -684,7 +684,7 @@ xfs_alloc_ag_vextent_near( xfs_agblock_t ltbnoa; /* aligned ... */ xfs_extlen_t ltdiff; /* difference to left side entry */ xfs_extlen_t ltlen; /* length of left side entry */ - xfs_extlen_t ltlena; /* aligned ... */ + xfs_extlen_t ltlena = 0; /* aligned ... */ xfs_agblock_t ltnew; /* useful start bno of left side */ xfs_extlen_t rlen; /* length of returned extent */ #if defined(DEBUG) && defined(__KERNEL__) diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 97f7328967f..3916925e258 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -280,38 +280,6 @@ xfs_allocbt_key_diff( return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; } -STATIC int -xfs_allocbt_kill_root( - struct xfs_btree_cur *cur, - struct xfs_buf *bp, - int level, - union xfs_btree_ptr *newroot) -{ - int error; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_STATS_INC(cur, killroot); - - /* - * Update the root pointer, decreasing the level by 1 and then - * free the old root. - */ - xfs_allocbt_set_root(cur, newroot, -1); - error = xfs_allocbt_free_block(cur, bp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; - } - - XFS_BTREE_STATS_INC(cur, free); - - xfs_btree_setbuf(cur, level, NULL); - cur->bc_nlevels--; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - return 0; -} - #ifdef DEBUG STATIC int xfs_allocbt_keys_inorder( @@ -423,7 +391,6 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { .dup_cursor = xfs_allocbt_dup_cursor, .set_root = xfs_allocbt_set_root, - .kill_root = xfs_allocbt_kill_root, .alloc_block = xfs_allocbt_alloc_block, .free_block = xfs_allocbt_free_block, .update_lastrec = xfs_allocbt_update_lastrec, diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index c2568242a90..c8637537881 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -355,16 +355,15 @@ xfs_attr_set_int( if (mp->m_flags & XFS_MOUNT_WSYNC) { xfs_trans_set_sync(args.trans); } + + if (!error && (flags & ATTR_KERNOTIME) == 0) { + xfs_trans_ichgtime(args.trans, dp, + XFS_ICHGTIME_CHG); + } err2 = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - /* - * Hit the inode change time. - */ - if (!error && (flags & ATTR_KERNOTIME) == 0) { - xfs_ichgtime(dp, XFS_ICHGTIME_CHG); - } return(error == 0 ? err2 : error); } @@ -420,6 +419,9 @@ xfs_attr_set_int( xfs_trans_set_sync(args.trans); } + if ((flags & ATTR_KERNOTIME) == 0) + xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); + /* * Commit the last in the sequence of transactions. */ @@ -427,13 +429,6 @@ xfs_attr_set_int( error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - /* - * Hit the inode change time. - */ - if (!error && (flags & ATTR_KERNOTIME) == 0) { - xfs_ichgtime(dp, XFS_ICHGTIME_CHG); - } - return(error); out: @@ -567,6 +562,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) xfs_trans_set_sync(args.trans); } + if ((flags & ATTR_KERNOTIME) == 0) + xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); + /* * Commit the last in the sequence of transactions. */ @@ -574,13 +572,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); - /* - * Hit the inode change time. - */ - if (!error && (flags & ATTR_KERNOTIME) == 0) { - xfs_ichgtime(dp, XFS_ICHGTIME_CHG); - } - return(error); out: @@ -1995,7 +1986,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : XFS_BUF_SIZE(bp); - xfs_biomove(bp, 0, tmp, dst, XBF_READ); + xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ); xfs_buf_relse(bp); dst += tmp; valuelen -= tmp; @@ -2125,9 +2116,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : XFS_BUF_SIZE(bp); - xfs_biomove(bp, 0, tmp, src, XBF_WRITE); + xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE); if (tmp < XFS_BUF_SIZE(bp)) - xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); + xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ return (error); } diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index f90dadd5a96..8abd12e32e1 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -614,7 +614,7 @@ xfs_bmap_add_extent( nblks += cur->bc_private.b.allocated; ASSERT(nblks <= da_old); if (nblks < da_old) - xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, + xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, (int64_t)(da_old - nblks), rsvd); } /* @@ -1079,7 +1079,8 @@ xfs_bmap_add_extent_delay_real( diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - (cur ? cur->bc_private.b.allocated : 0)); if (diff > 0 && - xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) { + xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, + -((int64_t)diff), rsvd)) { /* * Ick gross gag me with a spoon. */ @@ -1089,16 +1090,18 @@ xfs_bmap_add_extent_delay_real( temp--; diff--; if (!diff || - !xfs_mod_incore_sb(ip->i_mount, - XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) + !xfs_icsb_modify_counters(ip->i_mount, + XFS_SBS_FDBLOCKS, + -((int64_t)diff), rsvd)) break; } if (temp2) { temp2--; diff--; if (!diff || - !xfs_mod_incore_sb(ip->i_mount, - XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) + !xfs_icsb_modify_counters(ip->i_mount, + XFS_SBS_FDBLOCKS, + -((int64_t)diff), rsvd)) break; } } @@ -1766,7 +1769,7 @@ xfs_bmap_add_extent_hole_delay( } if (oldlen != newlen) { ASSERT(oldlen > newlen); - xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, + xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, (int64_t)(oldlen - newlen), rsvd); /* * Nothing to do for disk quota accounting here. @@ -3111,9 +3114,10 @@ xfs_bmap_del_extent( * Nothing to do for disk quota accounting here. */ ASSERT(da_old >= da_new); - if (da_old > da_new) - xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new), - rsvd); + if (da_old > da_new) { + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + (int64_t)(da_old - da_new), rsvd); + } done: *logflagsp = flags; return error; @@ -4526,13 +4530,13 @@ xfs_bmapi( -((int64_t)extsz), (flags & XFS_BMAPI_RSVBLOCKS)); } else { - error = xfs_mod_incore_sb(mp, + error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -((int64_t)alen), (flags & XFS_BMAPI_RSVBLOCKS)); } if (!error) { - error = xfs_mod_incore_sb(mp, + error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -((int64_t)indlen), (flags & XFS_BMAPI_RSVBLOCKS)); @@ -4542,7 +4546,7 @@ xfs_bmapi( (int64_t)extsz, (flags & XFS_BMAPI_RSVBLOCKS)); else if (error) - xfs_mod_incore_sb(mp, + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, (int64_t)alen, (flags & XFS_BMAPI_RSVBLOCKS)); @@ -4744,8 +4748,12 @@ xfs_bmapi( * Check if writing previously allocated but * unwritten extents. */ - if (wr && mval->br_state == XFS_EXT_UNWRITTEN && - ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) { + if (wr && + ((mval->br_state == XFS_EXT_UNWRITTEN && + ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) || + (mval->br_state == XFS_EXT_NORM && + ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT)) == + (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT))))) { /* * Modify (by adding) the state flag, if writing. */ @@ -4757,7 +4765,9 @@ xfs_bmapi( *firstblock; cur->bc_private.b.flist = flist; } - mval->br_state = XFS_EXT_NORM; + mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) + ? XFS_EXT_NORM + : XFS_EXT_UNWRITTEN; error = xfs_bmap_add_extent(ip, lastx, &cur, mval, firstblock, flist, &tmp_logflags, whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); @@ -5200,7 +5210,7 @@ xfs_bunmapi( ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_RTBLKS); } else { - xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, (int64_t)del.br_blockcount, rsvd); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index b13569a6179..71ec9b6ecdf 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -74,9 +74,12 @@ typedef struct xfs_bmap_free #define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */ /* combine contig. space */ #define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */ -#define XFS_BMAPI_CONVERT 0x200 /* unwritten extent conversion - */ - /* need write cache flushing and no */ - /* additional allocation alignments */ +/* + * unwritten extent conversion - this needs write cache flushing and no additional + * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts + * from written to unwritten, otherwise convert from unwritten to written. + */ +#define XFS_BMAPI_CONVERT 0x200 #define XFS_BMAPI_FLAGS \ { XFS_BMAPI_WRITE, "WRITE" }, \ diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 829af92f0fb..04f9cca8da7 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -217,7 +217,7 @@ xfs_btree_del_cursor( */ for (i = 0; i < cur->bc_nlevels; i++) { if (cur->bc_bufs[i]) - xfs_btree_setbuf(cur, i, NULL); + xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); else if (!error) break; } @@ -656,7 +656,7 @@ xfs_btree_reada_bufl( ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); - xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); + xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count); } /* @@ -676,7 +676,7 @@ xfs_btree_reada_bufs( ASSERT(agno != NULLAGNUMBER); ASSERT(agbno != NULLAGBLOCK); d = XFS_AGB_TO_DADDR(mp, agno, agbno); - xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); + xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count); } STATIC int @@ -763,22 +763,19 @@ xfs_btree_readahead( * Set the buffer for level "lev" in the cursor to bp, releasing * any previous buffer. */ -void +STATIC void xfs_btree_setbuf( xfs_btree_cur_t *cur, /* btree cursor */ int lev, /* level in btree */ xfs_buf_t *bp) /* new buffer to set */ { struct xfs_btree_block *b; /* btree block */ - xfs_buf_t *obp; /* old buffer pointer */ - obp = cur->bc_bufs[lev]; - if (obp) - xfs_trans_brelse(cur->bc_tp, obp); + if (cur->bc_bufs[lev]) + xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); cur->bc_bufs[lev] = bp; cur->bc_ra[lev] = 0; - if (!bp) - return; + b = XFS_BUF_TO_BLOCK(bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) @@ -3011,6 +3008,43 @@ out0: return 0; } +/* + * Kill the current root node, and replace it with it's only child node. + */ +STATIC int +xfs_btree_kill_root( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int level, + union xfs_btree_ptr *newroot) +{ + int error; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, killroot); + + /* + * Update the root pointer, decreasing the level by 1 and then + * free the old root. + */ + cur->bc_ops->set_root(cur, newroot, -1); + + error = cur->bc_ops->free_block(cur, bp); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + + XFS_BTREE_STATS_INC(cur, free); + + cur->bc_bufs[level] = NULL; + cur->bc_ra[level] = 0; + cur->bc_nlevels--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} + STATIC int xfs_btree_dec_cursor( struct xfs_btree_cur *cur, @@ -3195,7 +3229,7 @@ xfs_btree_delrec( * Make it the new root of the btree. */ pp = xfs_btree_ptr_addr(cur, 1, block); - error = cur->bc_ops->kill_root(cur, bp, level, pp); + error = xfs_btree_kill_root(cur, bp, level, pp); if (error) goto error0; } else if (level > 0) { diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 7fa07062bdd..82fafc66bd1 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -152,9 +152,7 @@ struct xfs_btree_ops { /* update btree root pointer */ void (*set_root)(struct xfs_btree_cur *cur, - union xfs_btree_ptr *nptr, int level_change); - int (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp, - int level, union xfs_btree_ptr *newroot); + union xfs_btree_ptr *nptr, int level_change); /* block allocation / freeing */ int (*alloc_block)(struct xfs_btree_cur *cur, @@ -399,16 +397,6 @@ xfs_btree_reada_bufs( xfs_agblock_t agbno, /* allocation group block number */ xfs_extlen_t count); /* count of filesystem blocks */ -/* - * Set the buffer for level "lev" in the cursor to bp, releasing - * any previous buffer. - */ -void -xfs_btree_setbuf( - xfs_btree_cur_t *cur, /* btree cursor */ - int lev, /* level in btree */ - struct xfs_buf *bp); /* new buffer to set */ - /* * Common btree core entry points. diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 1b09d7a280d..2686d0d54c5 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -692,8 +692,7 @@ xfs_buf_item_init( * the first. If we do already have one, there is * nothing to do here so return. */ - if (bp->b_mount != mp) - bp->b_mount = mp; + ASSERT(bp->b_target->bt_mount == mp); if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); if (lip->li_type == XFS_LI_BUF) { @@ -974,7 +973,7 @@ xfs_buf_iodone_callbacks( xfs_buf_do_callbacks(bp, lip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); - xfs_biodone(bp); + xfs_buf_ioend(bp, 0); return; } @@ -1033,7 +1032,7 @@ xfs_buf_iodone_callbacks( xfs_buf_do_callbacks(bp, lip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); - xfs_biodone(bp); + xfs_buf_ioend(bp, 0); } /* diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 30fa0e206fb..1c00bedb317 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -2042,7 +2042,7 @@ xfs_da_do_buf( mappedbno, nmapped, 0, &bp); break; case 3: - xfs_baread(mp->m_ddev_targp, mappedbno, nmapped); + xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped); error = 0; bp = NULL; break; diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index e5b153b2e6a..dffba9ba0db 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -49,8 +49,9 @@ typedef struct xfs_dinode { __be32 di_uid; /* owner's user id */ __be32 di_gid; /* owner's group id */ __be32 di_nlink; /* number of links to file */ - __be16 di_projid; /* owner's project id */ - __u8 di_pad[8]; /* unused, zeroed space */ + __be16 di_projid_lo; /* lower part of owner's project id */ + __be16 di_projid_hi; /* higher part owner's project id */ + __u8 di_pad[6]; /* unused, zeroed space */ __be16 di_flushiter; /* incremented on flush */ xfs_timestamp_t di_atime; /* time last accessed */ xfs_timestamp_t di_mtime; /* time last modified */ diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 504be8640e9..ae891223be9 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -961,7 +961,7 @@ xfs_dir2_leaf_getdents( if (i > ra_current && map[ra_index].br_blockcount >= mp->m_dirblkfsbs) { - xfs_baread(mp->m_ddev_targp, + xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, map[ra_index].br_startblock + ra_offset), diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 87c2e9d0228..8f6fc1a9638 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -293,9 +293,11 @@ typedef struct xfs_bstat { __s32 bs_extsize; /* extent size */ __s32 bs_extents; /* number of extents */ __u32 bs_gen; /* generation count */ - __u16 bs_projid; /* project id */ + __u16 bs_projid_lo; /* lower part of project id */ +#define bs_projid bs_projid_lo /* (previously just bs_projid) */ __u16 bs_forkoff; /* inode fork offset in bytes */ - unsigned char bs_pad[12]; /* pad space, unused */ + __u16 bs_projid_hi; /* higher part of project id */ + unsigned char bs_pad[10]; /* pad space, unused */ __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ @@ -448,6 +450,7 @@ typedef struct xfs_handle { /* XFS_IOC_SETBIOSIZE ---- deprecated 46 */ /* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ #define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) +#define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) /* * ioctl commands that replace IRIX syssgi()'s diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 43b1d569933..a7c116e814a 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -144,12 +144,11 @@ xfs_growfs_data_private( if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) return error; dpct = pct - mp->m_sb.sb_imax_pct; - error = xfs_read_buf(mp, mp->m_ddev_targp, - XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), - XFS_FSS_TO_BB(mp, 1), 0, &bp); - if (error) - return error; - ASSERT(bp); + bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, + XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), + BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); + if (!bp) + return EIO; xfs_buf_relse(bp); new = nb; /* use new as a temporary here */ @@ -597,7 +596,8 @@ out: * the extra reserve blocks from the reserve..... */ int error; - error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, fdblks_delta, 0); + error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + fdblks_delta, 0); if (error == ENOSPC) goto retry; } diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 5371d2dc360..0626a32c344 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -212,7 +212,7 @@ xfs_ialloc_inode_init( * to log a whole cluster of inodes instead of all the * individual transactions causing a lot of log traffic. */ - xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); + xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); for (i = 0; i < ninodes; i++) { int ioffset = i << mp->m_sb.sb_inodelog; uint isize = sizeof(struct xfs_dinode); diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index d352862cefa..16921f55c54 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -183,38 +183,6 @@ xfs_inobt_key_diff( cur->bc_rec.i.ir_startino; } -STATIC int -xfs_inobt_kill_root( - struct xfs_btree_cur *cur, - struct xfs_buf *bp, - int level, - union xfs_btree_ptr *newroot) -{ - int error; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); - XFS_BTREE_STATS_INC(cur, killroot); - - /* - * Update the root pointer, decreasing the level by 1 and then - * free the old root. - */ - xfs_inobt_set_root(cur, newroot, -1); - error = xfs_inobt_free_block(cur, bp); - if (error) { - XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); - return error; - } - - XFS_BTREE_STATS_INC(cur, free); - - cur->bc_bufs[level] = NULL; - cur->bc_nlevels--; - - XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); - return 0; -} - #ifdef DEBUG STATIC int xfs_inobt_keys_inorder( @@ -309,7 +277,6 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_inobt_set_root, - .kill_root = xfs_inobt_kill_root, .alloc_block = xfs_inobt_alloc_block, .free_block = xfs_inobt_free_block, .get_minrecs = xfs_inobt_get_minrecs, diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index b1ecc6f97ad..0cdd26932d8 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -365,8 +365,8 @@ xfs_iget( xfs_perag_t *pag; xfs_agino_t agino; - /* the radix tree exists only in inode capable AGs */ - if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) + /* reject inode numbers outside existing AGs */ + if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) return EINVAL; /* get the perag structure and ensure that it's inode capable */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 34798f391c4..108c7a085f9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -660,7 +660,8 @@ xfs_dinode_from_disk( to->di_uid = be32_to_cpu(from->di_uid); to->di_gid = be32_to_cpu(from->di_gid); to->di_nlink = be32_to_cpu(from->di_nlink); - to->di_projid = be16_to_cpu(from->di_projid); + to->di_projid_lo = be16_to_cpu(from->di_projid_lo); + to->di_projid_hi = be16_to_cpu(from->di_projid_hi); memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); to->di_flushiter = be16_to_cpu(from->di_flushiter); to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); @@ -695,7 +696,8 @@ xfs_dinode_to_disk( to->di_uid = cpu_to_be32(from->di_uid); to->di_gid = cpu_to_be32(from->di_gid); to->di_nlink = cpu_to_be32(from->di_nlink); - to->di_projid = cpu_to_be16(from->di_projid); + to->di_projid_lo = cpu_to_be16(from->di_projid_lo); + to->di_projid_hi = cpu_to_be16(from->di_projid_hi); memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); to->di_flushiter = cpu_to_be16(from->di_flushiter); to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); @@ -874,7 +876,7 @@ xfs_iread( if (ip->i_d.di_version == 1) { ip->i_d.di_nlink = ip->i_d.di_onlink; ip->i_d.di_onlink = 0; - ip->i_d.di_projid = 0; + xfs_set_projid(ip, 0); } ip->i_delayed_blks = 0; @@ -982,8 +984,7 @@ xfs_ialloc( mode_t mode, xfs_nlink_t nlink, xfs_dev_t rdev, - cred_t *cr, - xfs_prid_t prid, + prid_t prid, int okalloc, xfs_buf_t **ialloc_context, boolean_t *call_again, @@ -1027,7 +1028,7 @@ xfs_ialloc( ASSERT(ip->i_d.di_nlink == nlink); ip->i_d.di_uid = current_fsuid(); ip->i_d.di_gid = current_fsgid(); - ip->i_d.di_projid = prid; + xfs_set_projid(ip, prid); memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); /* @@ -2725,7 +2726,7 @@ cluster_corrupt_out: XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp); XFS_BUF_ERROR(bp,EIO); - xfs_biodone(bp); + xfs_buf_ioend(bp, 0); } else { XFS_BUF_STALE(bp); xfs_buf_relse(bp); @@ -3008,7 +3009,7 @@ xfs_iflush_int( memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); memset(&(dip->di_pad[0]), 0, sizeof(dip->di_pad)); - ASSERT(ip->i_d.di_projid == 0); + ASSERT(xfs_get_projid(ip) == 0); } } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 0898c5417d1..fac52290de9 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -134,8 +134,9 @@ typedef struct xfs_icdinode { __uint32_t di_uid; /* owner's user id */ __uint32_t di_gid; /* owner's group id */ __uint32_t di_nlink; /* number of links to file */ - __uint16_t di_projid; /* owner's project id */ - __uint8_t di_pad[8]; /* unused, zeroed space */ + __uint16_t di_projid_lo; /* lower part of owner's project id */ + __uint16_t di_projid_hi; /* higher part of owner's project id */ + __uint8_t di_pad[6]; /* unused, zeroed space */ __uint16_t di_flushiter; /* incremented on flush */ xfs_ictimestamp_t di_atime; /* time last accessed */ xfs_ictimestamp_t di_mtime; /* time last modified */ @@ -212,7 +213,6 @@ typedef struct xfs_icdinode { #ifdef __KERNEL__ struct bhv_desc; -struct cred; struct xfs_buf; struct xfs_bmap_free; struct xfs_bmbt_irec; @@ -335,6 +335,25 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) } /* + * Project quota id helpers (previously projid was 16bit only + * and using two 16bit values to hold new 32bit projid was choosen + * to retain compatibility with "old" filesystems). + */ +static inline prid_t +xfs_get_projid(struct xfs_inode *ip) +{ + return (prid_t)ip->i_d.di_projid_hi << 16 | ip->i_d.di_projid_lo; +} + +static inline void +xfs_set_projid(struct xfs_inode *ip, + prid_t projid) +{ + ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16); + ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); +} + +/* * Manage the i_flush queue embedded in the inode. This completion * queue synchronizes processes attempting to flush the in-core * inode back to disk. @@ -456,8 +475,8 @@ void xfs_inode_free(struct xfs_inode *ip); * xfs_inode.c prototypes. */ int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, - xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t, - int, struct xfs_buf **, boolean_t *, xfs_inode_t **); + xfs_nlink_t, xfs_dev_t, prid_t, int, + struct xfs_buf **, boolean_t *, xfs_inode_t **); uint xfs_ip2xflags(struct xfs_inode *); uint xfs_dic2xflags(struct xfs_dinode *); @@ -471,7 +490,6 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_iunpin_wait(xfs_inode_t *); int xfs_iflush(xfs_inode_t *, uint); -void xfs_ichgtime(xfs_inode_t *, int); void xfs_lock_inodes(xfs_inode_t **, int, uint); void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index fe00777e279..c7ac020705d 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -223,15 +223,6 @@ xfs_inode_item_format( nvecs = 1; /* - * Make sure the linux inode is dirty. We do this before - * clearing i_update_core as the VFS will call back into - * XFS here and set i_update_core, so we need to dirty the - * inode first so that the ordering of i_update_core and - * unlogged modifications still works as described below. - */ - xfs_mark_inode_dirty_sync(ip); - - /* * Clear i_update_core if the timestamps (or any other * non-transactional modification) need flushing/logging * and we're about to log them with the rest of the core. diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 7e3626e5925..dc1882adaf5 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -92,7 +92,8 @@ xfs_bulkstat_one_int( * further change. */ buf->bs_nlink = dic->di_nlink; - buf->bs_projid = dic->di_projid; + buf->bs_projid_lo = dic->di_projid_lo; + buf->bs_projid_hi = dic->di_projid_hi; buf->bs_ino = ino; buf->bs_mode = dic->di_mode; buf->bs_uid = dic->di_uid; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ba8e36e0b4e..cee4ab9f8a9 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1118,7 +1118,8 @@ xlog_alloc_log(xfs_mount_t *mp, iclog->ic_prev = prev_iclog; prev_iclog = iclog; - bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); + bp = xfs_buf_get_uncached(mp->m_logdev_targp, + log->l_iclog_size, 0); if (!bp) goto out_free_iclog; if (!XFS_BUF_CPSEMA(bp)) @@ -1296,7 +1297,7 @@ xlog_bdstrat( if (iclog->ic_state & XLOG_STATE_IOERROR) { XFS_BUF_ERROR(bp, EIO); XFS_BUF_STALE(bp); - xfs_biodone(bp); + xfs_buf_ioend(bp, 0); /* * It would seem logical to return EIO here, but we rely on * the log state machine to propagate I/O errors instead of diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 7e206fc1fa3..23d6ceb5e97 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -146,102 +146,6 @@ xlog_cil_init_post_recovery( } /* - * Insert the log item into the CIL and calculate the difference in space - * consumed by the item. Add the space to the checkpoint ticket and calculate - * if the change requires additional log metadata. If it does, take that space - * as well. Remove the amount of space we addded to the checkpoint ticket from - * the current transaction ticket so that the accounting works out correctly. - * - * If this is the first time the item is being placed into the CIL in this - * context, pin it so it can't be written to disk until the CIL is flushed to - * the iclog and the iclog written to disk. - */ -static void -xlog_cil_insert( - struct log *log, - struct xlog_ticket *ticket, - struct xfs_log_item *item, - struct xfs_log_vec *lv) -{ - struct xfs_cil *cil = log->l_cilp; - struct xfs_log_vec *old = lv->lv_item->li_lv; - struct xfs_cil_ctx *ctx = cil->xc_ctx; - int len; - int diff_iovecs; - int iclog_space; - - if (old) { - /* existing lv on log item, space used is a delta */ - ASSERT(!list_empty(&item->li_cil)); - ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); - - len = lv->lv_buf_len - old->lv_buf_len; - diff_iovecs = lv->lv_niovecs - old->lv_niovecs; - kmem_free(old->lv_buf); - kmem_free(old); - } else { - /* new lv, must pin the log item */ - ASSERT(!lv->lv_item->li_lv); - ASSERT(list_empty(&item->li_cil)); - - len = lv->lv_buf_len; - diff_iovecs = lv->lv_niovecs; - IOP_PIN(lv->lv_item); - - } - len += diff_iovecs * sizeof(xlog_op_header_t); - - /* attach new log vector to log item */ - lv->lv_item->li_lv = lv; - - spin_lock(&cil->xc_cil_lock); - list_move_tail(&item->li_cil, &cil->xc_cil); - ctx->nvecs += diff_iovecs; - - /* - * If this is the first time the item is being committed to the CIL, - * store the sequence number on the log item so we can tell - * in future commits whether this is the first checkpoint the item is - * being committed into. - */ - if (!item->li_seq) - item->li_seq = ctx->sequence; - - /* - * Now transfer enough transaction reservation to the context ticket - * for the checkpoint. The context ticket is special - the unit - * reservation has to grow as well as the current reservation as we - * steal from tickets so we can correctly determine the space used - * during the transaction commit. - */ - if (ctx->ticket->t_curr_res == 0) { - /* first commit in checkpoint, steal the header reservation */ - ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len); - ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; - ticket->t_curr_res -= ctx->ticket->t_unit_res; - } - - /* do we need space for more log record headers? */ - iclog_space = log->l_iclog_size - log->l_iclog_hsize; - if (len > 0 && (ctx->space_used / iclog_space != - (ctx->space_used + len) / iclog_space)) { - int hdrs; - - hdrs = (len + iclog_space - 1) / iclog_space; - /* need to take into account split region headers, too */ - hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); - ctx->ticket->t_unit_res += hdrs; - ctx->ticket->t_curr_res += hdrs; - ticket->t_curr_res -= hdrs; - ASSERT(ticket->t_curr_res >= len); - } - ticket->t_curr_res -= len; - ctx->space_used += len; - - spin_unlock(&cil->xc_cil_lock); -} - -/* * Format log item into a flat buffers * * For delayed logging, we need to hold a formatted buffer containing all the @@ -286,7 +190,7 @@ xlog_cil_format_items( len += lv->lv_iovecp[index].i_len; lv->lv_buf_len = len; - lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); + lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); ptr = lv->lv_buf; for (index = 0; index < lv->lv_niovecs; index++) { @@ -300,21 +204,136 @@ xlog_cil_format_items( } } +/* + * Prepare the log item for insertion into the CIL. Calculate the difference in + * log space and vectors it will consume, and if it is a new item pin it as + * well. + */ +STATIC void +xfs_cil_prepare_item( + struct log *log, + struct xfs_log_vec *lv, + int *len, + int *diff_iovecs) +{ + struct xfs_log_vec *old = lv->lv_item->li_lv; + + if (old) { + /* existing lv on log item, space used is a delta */ + ASSERT(!list_empty(&lv->lv_item->li_cil)); + ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); + + *len += lv->lv_buf_len - old->lv_buf_len; + *diff_iovecs += lv->lv_niovecs - old->lv_niovecs; + kmem_free(old->lv_buf); + kmem_free(old); + } else { + /* new lv, must pin the log item */ + ASSERT(!lv->lv_item->li_lv); + ASSERT(list_empty(&lv->lv_item->li_cil)); + + *len += lv->lv_buf_len; + *diff_iovecs += lv->lv_niovecs; + IOP_PIN(lv->lv_item); + + } + + /* attach new log vector to log item */ + lv->lv_item->li_lv = lv; + + /* + * If this is the first time the item is being committed to the + * CIL, store the sequence number on the log item so we can + * tell in future commits whether this is the first checkpoint + * the item is being committed into. + */ + if (!lv->lv_item->li_seq) + lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence; +} + +/* + * Insert the log items into the CIL and calculate the difference in space + * consumed by the item. Add the space to the checkpoint ticket and calculate + * if the change requires additional log metadata. If it does, take that space + * as well. Remove the amount of space we addded to the checkpoint ticket from + * the current transaction ticket so that the accounting works out correctly. + */ static void xlog_cil_insert_items( struct log *log, struct xfs_log_vec *log_vector, - struct xlog_ticket *ticket, - xfs_lsn_t *start_lsn) + struct xlog_ticket *ticket) { - struct xfs_log_vec *lv; - - if (start_lsn) - *start_lsn = log->l_cilp->xc_ctx->sequence; + struct xfs_cil *cil = log->l_cilp; + struct xfs_cil_ctx *ctx = cil->xc_ctx; + struct xfs_log_vec *lv; + int len = 0; + int diff_iovecs = 0; + int iclog_space; ASSERT(log_vector); + + /* + * Do all the accounting aggregation and switching of log vectors + * around in a separate loop to the insertion of items into the CIL. + * Then we can do a separate loop to update the CIL within a single + * lock/unlock pair. This reduces the number of round trips on the CIL + * lock from O(nr_logvectors) to O(1) and greatly reduces the overall + * hold time for the transaction commit. + * + * If this is the first time the item is being placed into the CIL in + * this context, pin it so it can't be written to disk until the CIL is + * flushed to the iclog and the iclog written to disk. + * + * We can do this safely because the context can't checkpoint until we + * are done so it doesn't matter exactly how we update the CIL. + */ + for (lv = log_vector; lv; lv = lv->lv_next) + xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); + + /* account for space used by new iovec headers */ + len += diff_iovecs * sizeof(xlog_op_header_t); + + spin_lock(&cil->xc_cil_lock); + + /* move the items to the tail of the CIL */ for (lv = log_vector; lv; lv = lv->lv_next) - xlog_cil_insert(log, ticket, lv->lv_item, lv); + list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil); + + ctx->nvecs += diff_iovecs; + + /* + * Now transfer enough transaction reservation to the context ticket + * for the checkpoint. The context ticket is special - the unit + * reservation has to grow as well as the current reservation as we + * steal from tickets so we can correctly determine the space used + * during the transaction commit. + */ + if (ctx->ticket->t_curr_res == 0) { + /* first commit in checkpoint, steal the header reservation */ + ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len); + ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; + ticket->t_curr_res -= ctx->ticket->t_unit_res; + } + + /* do we need space for more log record headers? */ + iclog_space = log->l_iclog_size - log->l_iclog_hsize; + if (len > 0 && (ctx->space_used / iclog_space != + (ctx->space_used + len) / iclog_space)) { + int hdrs; + + hdrs = (len + iclog_space - 1) / iclog_space; + /* need to take into account split region headers, too */ + hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); + ctx->ticket->t_unit_res += hdrs; + ctx->ticket->t_curr_res += hdrs; + ticket->t_curr_res -= hdrs; + ASSERT(ticket->t_curr_res >= len); + } + ticket->t_curr_res -= len; + ctx->space_used += len; + + spin_unlock(&cil->xc_cil_lock); } static void @@ -638,7 +657,10 @@ xfs_log_commit_cil( /* lock out background commit */ down_read(&log->l_cilp->xc_ctx_lock); - xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn); + if (commit_lsn) + *commit_lsn = log->l_cilp->xc_ctx->sequence; + + xlog_cil_insert_items(log, log_vector, tp->t_ticket); /* check we didn't blow the reservation */ if (tp->t_ticket->t_curr_res < 0) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 6f3f5fa37ac..966d3f97458 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -107,7 +107,8 @@ xlog_get_bp( nbblks += log->l_sectBBsize; nbblks = round_up(nbblks, log->l_sectBBsize); - return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); + return xfs_buf_get_uncached(log->l_mp->m_logdev_targp, + BBTOB(nbblks), 0); } STATIC void @@ -167,7 +168,7 @@ xlog_bread_noalign( XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); xfsbdstrat(log->l_mp, bp); - error = xfs_iowait(bp); + error = xfs_buf_iowait(bp); if (error) xfs_ioerror_alert("xlog_bread", log->l_mp, bp, XFS_BUF_ADDR(bp)); @@ -321,12 +322,13 @@ xlog_recover_iodone( * this during recovery. One strike! */ xfs_ioerror_alert("xlog_recover_iodone", - bp->b_mount, bp, XFS_BUF_ADDR(bp)); - xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); + bp->b_target->bt_mount, bp, + XFS_BUF_ADDR(bp)); + xfs_force_shutdown(bp->b_target->bt_mount, + SHUTDOWN_META_IO_ERROR); } - bp->b_mount = NULL; XFS_BUF_CLR_IODONE_FUNC(bp); - xfs_biodone(bp); + xfs_buf_ioend(bp, 0); } /* @@ -2275,8 +2277,7 @@ xlog_recover_do_buffer_trans( XFS_BUF_STALE(bp); error = xfs_bwrite(mp, bp); } else { - ASSERT(bp->b_mount == NULL || bp->b_mount == mp); - bp->b_mount = mp; + ASSERT(bp->b_target->bt_mount == mp); XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); } @@ -2540,8 +2541,7 @@ xlog_recover_do_inode_trans( } write_inode_buffer: - ASSERT(bp->b_mount == NULL || bp->b_mount == mp); - bp->b_mount = mp; + ASSERT(bp->b_target->bt_mount == mp); XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); error: @@ -2678,8 +2678,7 @@ xlog_recover_do_dquot_trans( memcpy(ddq, recddq, item->ri_buf[1].i_len); ASSERT(dq_f->qlf_size == 2); - ASSERT(bp->b_mount == NULL || bp->b_mount == mp); - bp->b_mount = mp; + ASSERT(bp->b_target->bt_mount == mp); XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); xfs_bdwrite(mp, bp); @@ -3817,7 +3816,7 @@ xlog_do_recover( XFS_BUF_READ(bp); XFS_BUF_UNASYNC(bp); xfsbdstrat(log->l_mp, bp); - error = xfs_iowait(bp); + error = xfs_buf_iowait(bp); if (error) { xfs_ioerror_alert("xlog_do_recover", log->l_mp, bp, XFS_BUF_ADDR(bp)); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index aeb9d72ebf6..b1498ab5a39 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -52,16 +52,11 @@ STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int); STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, int); -STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, - int64_t, int); STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); - #else #define xfs_icsb_balance_counter(mp, a, b) do { } while (0) #define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) -#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) - #endif static const struct { @@ -199,6 +194,8 @@ xfs_uuid_unmount( /* * Reference counting access wrappers to the perag structures. + * Because we never free per-ag structures, the only thing we + * have to protect against changes is the tree structure itself. */ struct xfs_perag * xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) @@ -206,19 +203,43 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) struct xfs_perag *pag; int ref = 0; - spin_lock(&mp->m_perag_lock); + rcu_read_lock(); pag = radix_tree_lookup(&mp->m_perag_tree, agno); if (pag) { ASSERT(atomic_read(&pag->pag_ref) >= 0); - /* catch leaks in the positive direction during testing */ - ASSERT(atomic_read(&pag->pag_ref) < 1000); ref = atomic_inc_return(&pag->pag_ref); } - spin_unlock(&mp->m_perag_lock); + rcu_read_unlock(); trace_xfs_perag_get(mp, agno, ref, _RET_IP_); return pag; } +/* + * search from @first to find the next perag with the given tag set. + */ +struct xfs_perag * +xfs_perag_get_tag( + struct xfs_mount *mp, + xfs_agnumber_t first, + int tag) +{ + struct xfs_perag *pag; + int found; + int ref; + + rcu_read_lock(); + found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, + (void **)&pag, first, 1, tag); + if (found <= 0) { + rcu_read_unlock(); + return NULL; + } + ref = atomic_inc_return(&pag->pag_ref); + rcu_read_unlock(); + trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_); + return pag; +} + void xfs_perag_put(struct xfs_perag *pag) { @@ -229,10 +250,18 @@ xfs_perag_put(struct xfs_perag *pag) trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); } +STATIC void +__xfs_free_perag( + struct rcu_head *head) +{ + struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); + + ASSERT(atomic_read(&pag->pag_ref) == 0); + kmem_free(pag); +} + /* - * Free up the resources associated with a mount structure. Assume that - * the structure was initially zeroed, so we can tell which fields got - * initialized. + * Free up the per-ag resources associated with the mount structure. */ STATIC void xfs_free_perag( @@ -244,10 +273,9 @@ xfs_free_perag( for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { spin_lock(&mp->m_perag_lock); pag = radix_tree_delete(&mp->m_perag_tree, agno); - ASSERT(pag); - ASSERT(atomic_read(&pag->pag_ref) == 0); spin_unlock(&mp->m_perag_lock); - kmem_free(pag); + ASSERT(pag); + call_rcu(&pag->rcu_head, __xfs_free_perag); } } @@ -444,7 +472,10 @@ xfs_initialize_perag( pag->pag_agno = index; pag->pag_mount = mp; rwlock_init(&pag->pag_ici_lock); + mutex_init(&pag->pag_ici_reclaim_lock); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); + spin_lock_init(&pag->pag_buf_lock); + pag->pag_buf_tree = RB_ROOT; if (radix_tree_preload(GFP_NOFS)) goto out_unwind; @@ -639,7 +670,6 @@ int xfs_readsb(xfs_mount_t *mp, int flags) { unsigned int sector_size; - unsigned int extra_flags; xfs_buf_t *bp; int error; @@ -652,28 +682,24 @@ xfs_readsb(xfs_mount_t *mp, int flags) * access to the superblock. */ sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); - extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED; - bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), - extra_flags); - if (!bp || XFS_BUF_ISERROR(bp)) { - xfs_fs_mount_cmn_err(flags, "SB read failed"); - error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; - goto fail; +reread: + bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, + XFS_SB_DADDR, sector_size, 0); + if (!bp) { + xfs_fs_mount_cmn_err(flags, "SB buffer read failed"); + return EIO; } - ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); /* * Initialize the mount structure from the superblock. * But first do some basic consistency checking. */ xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); - error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); if (error) { xfs_fs_mount_cmn_err(flags, "SB validate failed"); - goto fail; + goto release_buf; } /* @@ -684,7 +710,7 @@ xfs_readsb(xfs_mount_t *mp, int flags) "device supports only %u byte sectors (not %u)", sector_size, mp->m_sb.sb_sectsize); error = ENOSYS; - goto fail; + goto release_buf; } /* @@ -692,33 +718,20 @@ xfs_readsb(xfs_mount_t *mp, int flags) * re-read the superblock so the buffer is correctly sized. */ if (sector_size < mp->m_sb.sb_sectsize) { - XFS_BUF_UNMANAGE(bp); xfs_buf_relse(bp); sector_size = mp->m_sb.sb_sectsize; - bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, - BTOBB(sector_size), extra_flags); - if (!bp || XFS_BUF_ISERROR(bp)) { - xfs_fs_mount_cmn_err(flags, "SB re-read failed"); - error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; - goto fail; - } - ASSERT(XFS_BUF_ISBUSY(bp)); - ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); + goto reread; } /* Initialize per-cpu counters */ xfs_icsb_reinit_counters(mp); mp->m_sb_bp = bp; - xfs_buf_relse(bp); - ASSERT(XFS_BUF_VALUSEMA(bp) > 0); + xfs_buf_unlock(bp); return 0; - fail: - if (bp) { - XFS_BUF_UNMANAGE(bp); - xfs_buf_relse(bp); - } +release_buf: + xfs_buf_relse(bp); return error; } @@ -991,42 +1004,35 @@ xfs_check_sizes(xfs_mount_t *mp) { xfs_buf_t *bp; xfs_daddr_t d; - int error; d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { - cmn_err(CE_WARN, "XFS: size check 1 failed"); + cmn_err(CE_WARN, "XFS: filesystem size mismatch detected"); return XFS_ERROR(EFBIG); } - error = xfs_read_buf(mp, mp->m_ddev_targp, - d - XFS_FSS_TO_BB(mp, 1), - XFS_FSS_TO_BB(mp, 1), 0, &bp); - if (!error) { - xfs_buf_relse(bp); - } else { - cmn_err(CE_WARN, "XFS: size check 2 failed"); - if (error == ENOSPC) - error = XFS_ERROR(EFBIG); - return error; + bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, + d - XFS_FSS_TO_BB(mp, 1), + BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); + if (!bp) { + cmn_err(CE_WARN, "XFS: last sector read failed"); + return EIO; } + xfs_buf_relse(bp); if (mp->m_logdev_targp != mp->m_ddev_targp) { d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { - cmn_err(CE_WARN, "XFS: size check 3 failed"); + cmn_err(CE_WARN, "XFS: log size mismatch detected"); return XFS_ERROR(EFBIG); } - error = xfs_read_buf(mp, mp->m_logdev_targp, - d - XFS_FSB_TO_BB(mp, 1), - XFS_FSB_TO_BB(mp, 1), 0, &bp); - if (!error) { - xfs_buf_relse(bp); - } else { - cmn_err(CE_WARN, "XFS: size check 3 failed"); - if (error == ENOSPC) - error = XFS_ERROR(EFBIG); - return error; + bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp, + d - XFS_FSB_TO_BB(mp, 1), + XFS_FSB_TO_B(mp, 1), 0); + if (!bp) { + cmn_err(CE_WARN, "XFS: log device read failed"); + return EIO; } + xfs_buf_relse(bp); } return 0; } @@ -1601,7 +1607,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) XFS_BUF_UNASYNC(sbp); ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); xfsbdstrat(mp, sbp); - error = xfs_iowait(sbp); + error = xfs_buf_iowait(sbp); if (error) xfs_ioerror_alert("xfs_unmountfs_writesb", mp, sbp, XFS_BUF_ADDR(sbp)); @@ -1832,135 +1838,72 @@ xfs_mod_incore_sb_unlocked( */ int xfs_mod_incore_sb( - xfs_mount_t *mp, - xfs_sb_field_t field, - int64_t delta, - int rsvd) + struct xfs_mount *mp, + xfs_sb_field_t field, + int64_t delta, + int rsvd) { - int status; + int status; - /* check for per-cpu counters */ - switch (field) { #ifdef HAVE_PERCPU_SB - case XFS_SBS_ICOUNT: - case XFS_SBS_IFREE: - case XFS_SBS_FDBLOCKS: - if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { - status = xfs_icsb_modify_counters(mp, field, - delta, rsvd); - break; - } - /* FALLTHROUGH */ + ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS); #endif - default: - spin_lock(&mp->m_sb_lock); - status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); - spin_unlock(&mp->m_sb_lock); - break; - } + spin_lock(&mp->m_sb_lock); + status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); + spin_unlock(&mp->m_sb_lock); return status; } /* - * xfs_mod_incore_sb_batch() is used to change more than one field - * in the in-core superblock structure at a time. This modification - * is protected by a lock internal to this module. The fields and - * changes to those fields are specified in the array of xfs_mod_sb - * structures passed in. + * Change more than one field in the in-core superblock structure at a time. * - * Either all of the specified deltas will be applied or none of - * them will. If any modified field dips below 0, then all modifications - * will be backed out and EINVAL will be returned. + * The fields and changes to those fields are specified in the array of + * xfs_mod_sb structures passed in. Either all of the specified deltas + * will be applied or none of them will. If any modified field dips below 0, + * then all modifications will be backed out and EINVAL will be returned. + * + * Note that this function may not be used for the superblock values that + * are tracked with the in-memory per-cpu counters - a direct call to + * xfs_icsb_modify_counters is required for these. */ int -xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) +xfs_mod_incore_sb_batch( + struct xfs_mount *mp, + xfs_mod_sb_t *msb, + uint nmsb, + int rsvd) { - int status=0; - xfs_mod_sb_t *msbp; + xfs_mod_sb_t *msbp = &msb[0]; + int error = 0; /* - * Loop through the array of mod structures and apply each - * individually. If any fail, then back out all those - * which have already been applied. Do all of this within - * the scope of the m_sb_lock so that all of the changes will - * be atomic. + * Loop through the array of mod structures and apply each individually. + * If any fail, then back out all those which have already been applied. + * Do all of this within the scope of the m_sb_lock so that all of the + * changes will be atomic. */ spin_lock(&mp->m_sb_lock); - msbp = &msb[0]; for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { - /* - * Apply the delta at index n. If it fails, break - * from the loop so we'll fall into the undo loop - * below. - */ - switch (msbp->msb_field) { -#ifdef HAVE_PERCPU_SB - case XFS_SBS_ICOUNT: - case XFS_SBS_IFREE: - case XFS_SBS_FDBLOCKS: - if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { - spin_unlock(&mp->m_sb_lock); - status = xfs_icsb_modify_counters(mp, - msbp->msb_field, - msbp->msb_delta, rsvd); - spin_lock(&mp->m_sb_lock); - break; - } - /* FALLTHROUGH */ -#endif - default: - status = xfs_mod_incore_sb_unlocked(mp, - msbp->msb_field, - msbp->msb_delta, rsvd); - break; - } + ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || + msbp->msb_field > XFS_SBS_FDBLOCKS); - if (status != 0) { - break; - } + error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, + msbp->msb_delta, rsvd); + if (error) + goto unwind; } + spin_unlock(&mp->m_sb_lock); + return 0; - /* - * If we didn't complete the loop above, then back out - * any changes made to the superblock. If you add code - * between the loop above and here, make sure that you - * preserve the value of status. Loop back until - * we step below the beginning of the array. Make sure - * we don't touch anything back there. - */ - if (status != 0) { - msbp--; - while (msbp >= msb) { - switch (msbp->msb_field) { -#ifdef HAVE_PERCPU_SB - case XFS_SBS_ICOUNT: - case XFS_SBS_IFREE: - case XFS_SBS_FDBLOCKS: - if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { - spin_unlock(&mp->m_sb_lock); - status = xfs_icsb_modify_counters(mp, - msbp->msb_field, - -(msbp->msb_delta), - rsvd); - spin_lock(&mp->m_sb_lock); - break; - } - /* FALLTHROUGH */ -#endif - default: - status = xfs_mod_incore_sb_unlocked(mp, - msbp->msb_field, - -(msbp->msb_delta), - rsvd); - break; - } - ASSERT(status == 0); - msbp--; - } +unwind: + while (--msbp >= msb) { + error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, + -msbp->msb_delta, rsvd); + ASSERT(error == 0); } spin_unlock(&mp->m_sb_lock); - return status; + return error; } /* @@ -1998,18 +1941,13 @@ xfs_getsb( */ void xfs_freesb( - xfs_mount_t *mp) + struct xfs_mount *mp) { - xfs_buf_t *bp; + struct xfs_buf *bp = mp->m_sb_bp; - /* - * Use xfs_getsb() so that the buffer will be locked - * when we call xfs_buf_relse(). - */ - bp = xfs_getsb(mp, 0); - XFS_BUF_UNMANAGE(bp); - xfs_buf_relse(bp); + xfs_buf_lock(bp); mp->m_sb_bp = NULL; + xfs_buf_relse(bp); } /* @@ -2496,7 +2434,7 @@ xfs_icsb_balance_counter( spin_unlock(&mp->m_sb_lock); } -STATIC int +int xfs_icsb_modify_counters( xfs_mount_t *mp, xfs_sb_field_t field, diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 622da2179a5..5861b498074 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -53,7 +53,6 @@ typedef struct xfs_trans_reservations { #include "xfs_sync.h" -struct cred; struct log; struct xfs_mount_args; struct xfs_inode; @@ -91,6 +90,8 @@ extern void xfs_icsb_reinit_counters(struct xfs_mount *); extern void xfs_icsb_destroy_counters(struct xfs_mount *); extern void xfs_icsb_sync_counters(struct xfs_mount *, int); extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); +extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t, + int64_t, int); #else #define xfs_icsb_init_counters(mp) (0) @@ -98,6 +99,8 @@ extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); #define xfs_icsb_reinit_counters(mp) do { } while (0) #define xfs_icsb_sync_counters(mp, flags) do { } while (0) #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) +#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \ + xfs_mod_incore_sb(mp, field, delta, rsvd) #endif typedef struct xfs_mount { @@ -232,8 +235,6 @@ typedef struct xfs_mount { #define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */ #define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred * I/O size in stat() */ -#define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock - counters */ #define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams allocator */ #define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ @@ -327,6 +328,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) * perag get/put wrappers for ref counting */ struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); +struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, + int tag); void xfs_perag_put(struct xfs_perag *pag); /* diff --git a/fs/xfs/xfs_refcache.h b/fs/xfs/xfs_refcache.h deleted file mode 100644 index 2dec79edb51..00000000000 --- a/fs/xfs/xfs_refcache.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_REFCACHE_H__ -#define __XFS_REFCACHE_H__ - -#ifdef HAVE_REFCACHE -/* - * Maximum size (in inodes) for the NFS reference cache - */ -#define XFS_REFCACHE_SIZE_MAX 512 - -struct xfs_inode; -struct xfs_mount; - -extern void xfs_refcache_insert(struct xfs_inode *); -extern void xfs_refcache_purge_ip(struct xfs_inode *); -extern void xfs_refcache_purge_mp(struct xfs_mount *); -extern void xfs_refcache_purge_some(struct xfs_mount *); -extern void xfs_refcache_resize(int); -extern void xfs_refcache_destroy(void); - -extern void xfs_refcache_iunlock(struct xfs_inode *, uint); - -#else - -#define xfs_refcache_insert(ip) do { } while (0) -#define xfs_refcache_purge_ip(ip) do { } while (0) -#define xfs_refcache_purge_mp(mp) do { } while (0) -#define xfs_refcache_purge_some(mp) do { } while (0) -#define xfs_refcache_resize(size) do { } while (0) -#define xfs_refcache_destroy() do { } while (0) - -#define xfs_refcache_iunlock(ip, flags) xfs_iunlock(ip, flags) - -#endif - -#endif /* __XFS_REFCACHE_H__ */ diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 8fca957200d..d2af0a8381a 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -183,7 +183,7 @@ xfs_rename( * tree quota mechanism would be circumvented. */ if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && - (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { + (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { error = XFS_ERROR(EXDEV); goto error_return; } @@ -211,7 +211,9 @@ xfs_rename( goto error_return; if (error) goto abort_return; - xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + + xfs_trans_ichgtime(tp, target_dp, + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); @@ -249,7 +251,9 @@ xfs_rename( &first_block, &free_list, spaceres); if (error) goto abort_return; - xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + + xfs_trans_ichgtime(tp, target_dp, + XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Decrement the link count on the target since the target @@ -292,7 +296,7 @@ xfs_rename( * inode isn't really being changed, but old unix file systems did * it and some incremental backup programs won't work without it. */ - xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); /* * Adjust the link count on src_dp. This is necessary when @@ -315,7 +319,7 @@ xfs_rename( if (error) goto abort_return; - xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); if (new_parent) xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 891260fea11..12a19138531 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -39,6 +39,7 @@ #include "xfs_trans_space.h" #include "xfs_utils.h" #include "xfs_trace.h" +#include "xfs_buf.h" /* @@ -1883,13 +1884,13 @@ xfs_growfs_rt( /* * Read in the last block of the device, make sure it exists. */ - error = xfs_read_buf(mp, mp->m_rtdev_targp, - XFS_FSB_TO_BB(mp, nrblocks - 1), - XFS_FSB_TO_BB(mp, 1), 0, &bp); - if (error) - return error; - ASSERT(bp); + bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp, + XFS_FSB_TO_BB(mp, nrblocks - 1), + XFS_FSB_TO_B(mp, 1), 0); + if (!bp) + return EIO; xfs_buf_relse(bp); + /* * Calculate new parameters. These are the final values to be reached. */ @@ -2215,7 +2216,6 @@ xfs_rtmount_init( { xfs_buf_t *bp; /* buffer for last block of subvolume */ xfs_daddr_t d; /* address of last block of subvolume */ - int error; /* error return value */ xfs_sb_t *sbp; /* filesystem superblock copy in mount */ sbp = &mp->m_sb; @@ -2242,15 +2242,12 @@ xfs_rtmount_init( (unsigned long long) mp->m_sb.sb_rblocks); return XFS_ERROR(EFBIG); } - error = xfs_read_buf(mp, mp->m_rtdev_targp, - d - XFS_FSB_TO_BB(mp, 1), - XFS_FSB_TO_BB(mp, 1), 0, &bp); - if (error) { - cmn_err(CE_WARN, - "XFS: realtime mount -- xfs_read_buf failed, returned %d", error); - if (error == ENOSPC) - return XFS_ERROR(EFBIG); - return error; + bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp, + d - XFS_FSB_TO_BB(mp, 1), + XFS_FSB_TO_B(mp, 1), 0); + if (!bp) { + cmn_err(CE_WARN, "XFS: realtime device size check failed"); + return EIO; } xfs_buf_relse(bp); return 0; diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 1b017c65749..1eb2ba58681 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -80,10 +80,12 @@ struct xfs_mount; #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ #define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ +#define XFS_SB_VERSION2_PROJID32BIT 0x00000080 /* 32 bit project id */ #define XFS_SB_VERSION2_OKREALFBITS \ (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ - XFS_SB_VERSION2_ATTR2BIT) + XFS_SB_VERSION2_ATTR2BIT | \ + XFS_SB_VERSION2_PROJID32BIT) #define XFS_SB_VERSION2_OKSASHFBITS \ (0) #define XFS_SB_VERSION2_OKREALBITS \ @@ -495,6 +497,12 @@ static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp) sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; } +static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) +{ + return xfs_sb_version_hasmorebits(sbp) && + (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT); +} + /* * end of superblock version macros */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 1c47edaea0d..f6d956b7711 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -696,7 +696,7 @@ xfs_trans_reserve( * fail if the count would go below zero. */ if (blocks > 0) { - error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, + error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, -((int64_t)blocks), rsvd); if (error != 0) { current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); @@ -767,7 +767,7 @@ undo_log: undo_blocks: if (blocks > 0) { - (void) xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, + xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, (int64_t)blocks, rsvd); tp->t_blk_res = 0; } @@ -1009,7 +1009,7 @@ void xfs_trans_unreserve_and_mod_sb( xfs_trans_t *tp) { - xfs_mod_sb_t msb[14]; /* If you add cases, add entries */ + xfs_mod_sb_t msb[9]; /* If you add cases, add entries */ xfs_mod_sb_t *msbp; xfs_mount_t *mp = tp->t_mountp; /* REFERENCED */ @@ -1017,55 +1017,61 @@ xfs_trans_unreserve_and_mod_sb( int rsvd; int64_t blkdelta = 0; int64_t rtxdelta = 0; + int64_t idelta = 0; + int64_t ifreedelta = 0; msbp = msb; rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; - /* calculate free blocks delta */ + /* calculate deltas */ if (tp->t_blk_res > 0) blkdelta = tp->t_blk_res; - if ((tp->t_fdblocks_delta != 0) && (xfs_sb_version_haslazysbcount(&mp->m_sb) || (tp->t_flags & XFS_TRANS_SB_DIRTY))) blkdelta += tp->t_fdblocks_delta; - if (blkdelta != 0) { - msbp->msb_field = XFS_SBS_FDBLOCKS; - msbp->msb_delta = blkdelta; - msbp++; - } - - /* calculate free realtime extents delta */ if (tp->t_rtx_res > 0) rtxdelta = tp->t_rtx_res; - if ((tp->t_frextents_delta != 0) && (tp->t_flags & XFS_TRANS_SB_DIRTY)) rtxdelta += tp->t_frextents_delta; + if (xfs_sb_version_haslazysbcount(&mp->m_sb) || + (tp->t_flags & XFS_TRANS_SB_DIRTY)) { + idelta = tp->t_icount_delta; + ifreedelta = tp->t_ifree_delta; + } + + /* apply the per-cpu counters */ + if (blkdelta) { + error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, + blkdelta, rsvd); + if (error) + goto out; + } + + if (idelta) { + error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, + idelta, rsvd); + if (error) + goto out_undo_fdblocks; + } + + if (ifreedelta) { + error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, + ifreedelta, rsvd); + if (error) + goto out_undo_icount; + } + + /* apply remaining deltas */ if (rtxdelta != 0) { msbp->msb_field = XFS_SBS_FREXTENTS; msbp->msb_delta = rtxdelta; msbp++; } - /* apply remaining deltas */ - - if (xfs_sb_version_haslazysbcount(&mp->m_sb) || - (tp->t_flags & XFS_TRANS_SB_DIRTY)) { - if (tp->t_icount_delta != 0) { - msbp->msb_field = XFS_SBS_ICOUNT; - msbp->msb_delta = tp->t_icount_delta; - msbp++; - } - if (tp->t_ifree_delta != 0) { - msbp->msb_field = XFS_SBS_IFREE; - msbp->msb_delta = tp->t_ifree_delta; - msbp++; - } - } - if (tp->t_flags & XFS_TRANS_SB_DIRTY) { if (tp->t_dblocks_delta != 0) { msbp->msb_field = XFS_SBS_DBLOCKS; @@ -1115,8 +1121,24 @@ xfs_trans_unreserve_and_mod_sb( if (msbp > msb) { error = xfs_mod_incore_sb_batch(tp->t_mountp, msb, (uint)(msbp - msb), rsvd); - ASSERT(error == 0); + if (error) + goto out_undo_ifreecount; } + + return; + +out_undo_ifreecount: + if (ifreedelta) + xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd); +out_undo_icount: + if (idelta) + xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd); +out_undo_fdblocks: + if (blkdelta) + xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); +out: + ASSERT(error = 0); + return; } /* @@ -1389,15 +1411,12 @@ xfs_trans_item_committed( */ STATIC void xfs_trans_committed( - struct xfs_trans *tp, + void *arg, int abortflag) { + struct xfs_trans *tp = arg; struct xfs_log_item_desc *lidp, *next; - /* Call the transaction's completion callback if there is one. */ - if (tp->t_callback != NULL) - tp->t_callback(tp, tp->t_callarg); - list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) { xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag); xfs_trans_free_item_desc(lidp); @@ -1525,7 +1544,7 @@ xfs_trans_commit_iclog( * running in simulation mode (the log is explicitly turned * off). */ - tp->t_logcb.cb_func = (void(*)(void*, int))xfs_trans_committed; + tp->t_logcb.cb_func = xfs_trans_committed; tp->t_logcb.cb_arg = tp; /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index c13c0f97b49..246286b77a8 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -399,8 +399,6 @@ typedef struct xfs_trans { * transaction. */ struct xfs_mount *t_mountp; /* ptr to fs mount struct */ struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ - xfs_trans_callback_t t_callback; /* transaction callback */ - void *t_callarg; /* callback arg */ unsigned int t_flags; /* misc flags */ int64_t t_icount_delta; /* superblock icount change */ int64_t t_ifree_delta; /* superblock ifree change */ @@ -473,6 +471,7 @@ void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, xfs_ino_t , uint, uint, struct xfs_inode **); +void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 90af025e683..c47918c302a 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -336,7 +336,7 @@ xfs_trans_read_buf( ASSERT(!XFS_BUF_ISASYNC(bp)); XFS_BUF_READ(bp); xfsbdstrat(tp->t_mountp, bp); - error = xfs_iowait(bp); + error = xfs_buf_iowait(bp); if (error) { xfs_ioerror_alert("xfs_trans_read_buf", mp, bp, blkno); diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index cdc53a1050c..ccb34532768 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -118,6 +118,36 @@ xfs_trans_ijoin_ref( } /* + * Transactional inode timestamp update. Requires the inode to be locked and + * joined to the transaction supplied. Relies on the transaction subsystem to + * track dirty state and update/writeback the inode accordingly. + */ +void +xfs_trans_ichgtime( + struct xfs_trans *tp, + struct xfs_inode *ip, + int flags) +{ + struct inode *inode = VFS_I(ip); + timespec_t tv; + + ASSERT(tp); + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(ip->i_transp == tp); + + tv = current_fs_time(inode->i_sb); + + if ((flags & XFS_ICHGTIME_MOD) && + !timespec_equal(&inode->i_mtime, &tv)) { + inode->i_mtime = tv; + } + if ((flags & XFS_ICHGTIME_CHG) && + !timespec_equal(&inode->i_ctime, &tv)) { + inode->i_ctime = tv; + } +} + +/* * This is called to mark the fields indicated in fieldmask as needing * to be logged when the transaction is committed. The inode must * already be associated with the given transaction. diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 320775295e3..26d1867d815 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */ typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ -typedef __uint16_t xfs_prid_t; /* prid_t truncated to 16bits in XFS */ - typedef __uint32_t xlog_tid_t; /* transaction ID type */ /* diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index b7d5769d2df..8b32d1a4c5a 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -56,7 +56,6 @@ xfs_dir_ialloc( mode_t mode, xfs_nlink_t nlink, xfs_dev_t rdev, - cred_t *credp, prid_t prid, /* project id */ int okalloc, /* ok to allocate new space */ xfs_inode_t **ipp, /* pointer to inode; it will be @@ -93,7 +92,7 @@ xfs_dir_ialloc( * transaction commit so that no other process can steal * the inode(s) that we've just allocated. */ - code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, okalloc, + code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, &ialloc_context, &call_again, &ip); /* @@ -197,7 +196,7 @@ xfs_dir_ialloc( * other allocations in this allocation group, * this call should always succeed. */ - code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, + code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, &ialloc_context, &call_again, &ip); /* @@ -235,7 +234,7 @@ xfs_droplink( { int error; - xfs_ichgtime(ip, XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); ASSERT (ip->i_d.di_nlink > 0); ip->i_d.di_nlink--; @@ -299,7 +298,7 @@ xfs_bumplink( { if (ip->i_d.di_nlink >= XFS_MAXLINK) return XFS_ERROR(EMLINK); - xfs_ichgtime(ip, XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); ASSERT(ip->i_d.di_nlink > 0); ip->i_d.di_nlink++; diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index f55b9678264..456fca31493 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h @@ -19,8 +19,7 @@ #define __XFS_UTILS_H__ extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, - xfs_dev_t, cred_t *, prid_t, int, - xfs_inode_t **, int *); + xfs_dev_t, prid_t, int, xfs_inode_t **, int *); extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *); extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *); extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 4c7c7bfb2b2..8e4a63c4151 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -114,7 +114,7 @@ xfs_setattr( */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); - code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid, + code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), qflags, &udqp, &gdqp); if (code) return code; @@ -184,8 +184,11 @@ xfs_setattr( ip->i_size == 0 && ip->i_d.di_nextents == 0) { xfs_iunlock(ip, XFS_ILOCK_EXCL); lock_flags &= ~XFS_ILOCK_EXCL; - if (mask & ATTR_CTIME) - xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + if (mask & ATTR_CTIME) { + inode->i_mtime = inode->i_ctime = + current_fs_time(inode->i_sb); + xfs_mark_inode_dirty_sync(ip); + } code = 0; goto error_return; } @@ -1253,8 +1256,7 @@ xfs_create( struct xfs_name *name, mode_t mode, xfs_dev_t rdev, - xfs_inode_t **ipp, - cred_t *credp) + xfs_inode_t **ipp) { int is_dir = S_ISDIR(mode); struct xfs_mount *mp = dp->i_mount; @@ -1266,7 +1268,7 @@ xfs_create( boolean_t unlock_dp_on_error = B_FALSE; uint cancel_flags; int committed; - xfs_prid_t prid; + prid_t prid; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; uint resblks; @@ -1279,9 +1281,9 @@ xfs_create( return XFS_ERROR(EIO); if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) - prid = dp->i_d.di_projid; + prid = xfs_get_projid(dp); else - prid = dfltprid; + prid = XFS_PROJID_DEFAULT; /* * Make sure that we have allocated dquot(s) on disk. @@ -1360,7 +1362,7 @@ xfs_create( * entry pointing to them, but a directory also the "." entry * pointing to itself. */ - error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, credp, + error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, resblks > 0, &ip, &committed); if (error) { if (error == ENOSPC) @@ -1391,7 +1393,7 @@ xfs_create( ASSERT(error != ENOSPC); goto out_trans_abort; } - xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); if (is_dir) { @@ -1742,7 +1744,7 @@ xfs_remove( ASSERT(error != ENOENT); goto out_bmap_cancel; } - xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (is_dir) { /* @@ -1880,7 +1882,7 @@ xfs_link( * the tree quota mechanism could be circumvented. */ if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && - (tdp->i_d.di_projid != sip->i_d.di_projid))) { + (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { error = XFS_ERROR(EXDEV); goto error_return; } @@ -1895,7 +1897,7 @@ xfs_link( &first_block, &free_list, resblks); if (error) goto abort_return; - xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); error = xfs_bumplink(tp, sip); @@ -1933,8 +1935,7 @@ xfs_symlink( struct xfs_name *link_name, const char *target_path, mode_t mode, - xfs_inode_t **ipp, - cred_t *credp) + xfs_inode_t **ipp) { xfs_mount_t *mp = dp->i_mount; xfs_trans_t *tp; @@ -1955,7 +1956,7 @@ xfs_symlink( int byte_cnt; int n; xfs_buf_t *bp; - xfs_prid_t prid; + prid_t prid; struct xfs_dquot *udqp, *gdqp; uint resblks; @@ -1978,9 +1979,9 @@ xfs_symlink( udqp = gdqp = NULL; if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) - prid = dp->i_d.di_projid; + prid = xfs_get_projid(dp); else - prid = (xfs_prid_t)dfltprid; + prid = XFS_PROJID_DEFAULT; /* * Make sure that we have allocated dquot(s) on disk. @@ -2046,8 +2047,8 @@ xfs_symlink( /* * Allocate an inode for the symlink. */ - error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), - 1, 0, credp, prid, resblks > 0, &ip, NULL); + error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, + prid, resblks > 0, &ip, NULL); if (error) { if (error == ENOSPC) goto error_return; @@ -2129,7 +2130,7 @@ xfs_symlink( &first_block, &free_list, resblks); if (error) goto error1; - xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); /* @@ -2272,7 +2273,7 @@ xfs_alloc_file_space( count = len; imapp = &imaps[0]; nimaps = 1; - bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); + bmapi_flag = XFS_BMAPI_WRITE | alloc_type; startoffset_fsb = XFS_B_TO_FSBT(mp, offset); allocatesize_fsb = XFS_B_TO_FSB(mp, count); @@ -2431,9 +2432,9 @@ xfs_zero_remaining_bytes( if (endoff > ip->i_size) endoff = ip->i_size; - bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, - XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp); + bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? + mp->m_rtdev_targp : mp->m_ddev_targp, + mp->m_sb.sb_blocksize, XBF_DONT_BLOCK); if (!bp) return XFS_ERROR(ENOMEM); @@ -2459,7 +2460,7 @@ xfs_zero_remaining_bytes( XFS_BUF_READ(bp); XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); xfsbdstrat(mp, bp); - error = xfs_iowait(bp); + error = xfs_buf_iowait(bp); if (error) { xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", mp, bp, XFS_BUF_ADDR(bp)); @@ -2472,7 +2473,7 @@ xfs_zero_remaining_bytes( XFS_BUF_UNREAD(bp); XFS_BUF_WRITE(bp); xfsbdstrat(mp, bp); - error = xfs_iowait(bp); + error = xfs_buf_iowait(bp); if (error) { xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", mp, bp, XFS_BUF_ADDR(bp)); @@ -2711,6 +2712,7 @@ xfs_change_file_space( xfs_off_t llen; xfs_trans_t *tp; struct iattr iattr; + int prealloc_type; if (!S_ISREG(ip->i_d.di_mode)) return XFS_ERROR(EINVAL); @@ -2753,12 +2755,17 @@ xfs_change_file_space( * size to be changed. */ setprealloc = clrprealloc = 0; + prealloc_type = XFS_BMAPI_PREALLOC; switch (cmd) { + case XFS_IOC_ZERO_RANGE: + prealloc_type |= XFS_BMAPI_CONVERT; + xfs_tosspages(ip, startoffset, startoffset + bf->l_len, 0); + /* FALLTHRU */ case XFS_IOC_RESVSP: case XFS_IOC_RESVSP64: error = xfs_alloc_file_space(ip, startoffset, bf->l_len, - 1, attr_flags); + prealloc_type, attr_flags); if (error) return error; setprealloc = 1; @@ -2827,7 +2834,7 @@ xfs_change_file_space( if (ip->i_d.di_mode & S_IXGRP) ip->i_d.di_mode &= ~S_ISGID; - xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } if (setprealloc) ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index d8dfa8d0dad..f6702927eee 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -2,7 +2,6 @@ #define _XFS_VNODEOPS_H 1 struct attrlist_cursor_kern; -struct cred; struct file; struct iattr; struct inode; @@ -26,7 +25,7 @@ int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, - xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp); + xfs_dev_t rdev, struct xfs_inode **ipp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode *ip); int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, @@ -34,8 +33,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, xfs_off_t *offset, filldir_t filldir); int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, - const char *target_path, mode_t mode, struct xfs_inode **ipp, - cred_t *credp); + const char *target_path, mode_t mode, struct xfs_inode **ipp); int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); int xfs_change_file_space(struct xfs_inode *ip, int cmd, xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); |