diff options
66 files changed, 3263 insertions, 1140 deletions
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6 index b49989bb89a..e7a9a83f008 100644 --- a/fs/xfs/Makefile-linux-2.6 +++ b/fs/xfs/Makefile-linux-2.6 @@ -64,6 +64,7 @@ xfs-y += xfs_alloc.o \ xfs_dir2_sf.o \ xfs_error.o \ xfs_extfree_item.o \ + xfs_filestream.o \ xfs_fsops.o \ xfs_ialloc.o \ xfs_ialloc_btree.o \ @@ -77,6 +78,7 @@ xfs-y += xfs_alloc.o \ xfs_log.o \ xfs_log_recover.o \ xfs_mount.o \ + xfs_mru_cache.o \ xfs_rename.o \ xfs_trans.o \ xfs_trans_ail.o \ diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 9ebabdf7829..4b6470cf87f 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -100,25 +100,6 @@ kmem_zone_destroy(kmem_zone_t *zone) extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); -/* - * Low memory cache shrinkers - */ - -typedef struct shrinker *kmem_shaker_t; -typedef int (*kmem_shake_func_t)(int, gfp_t); - -static inline kmem_shaker_t -kmem_shake_register(kmem_shake_func_t sfunc) -{ - return set_shrinker(DEFAULT_SEEKS, sfunc); -} - -static inline void -kmem_shake_deregister(kmem_shaker_t shrinker) -{ - remove_shrinker(shrinker); -} - static inline int kmem_shake_allow(gfp_t gfp_mask) { diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 7361861e3aa..fd4105d662e 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -108,14 +108,19 @@ xfs_page_trace( /* * Schedule IO completion handling on a xfsdatad if this was - * the final hold on this ioend. + * the final hold on this ioend. If we are asked to wait, + * flush the workqueue. */ STATIC void xfs_finish_ioend( - xfs_ioend_t *ioend) + xfs_ioend_t *ioend, + int wait) { - if (atomic_dec_and_test(&ioend->io_remaining)) + if (atomic_dec_and_test(&ioend->io_remaining)) { queue_work(xfsdatad_workqueue, &ioend->io_work); + if (wait) + flush_workqueue(xfsdatad_workqueue); + } } /* @@ -156,6 +161,8 @@ xfs_setfilesize( xfs_fsize_t bsize; ip = xfs_vtoi(ioend->io_vnode); + if (!ip) + return; ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); ASSERT(ioend->io_type != IOMAP_READ); @@ -334,7 +341,7 @@ xfs_end_bio( bio->bi_end_io = NULL; bio_put(bio); - xfs_finish_ioend(ioend); + xfs_finish_ioend(ioend, 0); return 0; } @@ -470,7 +477,7 @@ xfs_submit_ioend( } if (bio) xfs_submit_ioend_bio(ioend, bio); - xfs_finish_ioend(ioend); + xfs_finish_ioend(ioend, 0); } while ((ioend = next) != NULL); } @@ -1003,6 +1010,8 @@ xfs_page_state_convert( if (buffer_unwritten(bh) || buffer_delay(bh) || ((buffer_uptodate(bh) || PageUptodate(page)) && !buffer_mapped(bh) && (unmapped || startio))) { + int new_ioend = 0; + /* * Make sure we don't use a read-only iomap */ @@ -1021,6 +1030,15 @@ xfs_page_state_convert( } if (!iomap_valid) { + /* + * if we didn't have a valid mapping then we + * need to ensure that we put the new mapping + * in a new ioend structure. This needs to be + * done to ensure that the ioends correctly + * reflect the block mappings at io completion + * for unwritten extent conversion. + */ + new_ioend = 1; if (type == IOMAP_NEW) { size = xfs_probe_cluster(inode, page, bh, head, 0); @@ -1040,7 +1058,7 @@ xfs_page_state_convert( if (startio) { xfs_add_to_ioend(inode, bh, offset, type, &ioend, - !iomap_valid); + new_ioend); } else { set_buffer_dirty(bh); unlock_buffer(bh); @@ -1416,6 +1434,13 @@ xfs_end_io_direct( * This is not necessary for synchronous direct I/O, but we do * it anyway to keep the code uniform and simpler. * + * Well, if only it were that simple. Because synchronous direct I/O + * requires extent conversion to occur *before* we return to userspace, + * we have to wait for extent conversion to complete. Look at the + * iocb that has been passed to us to determine if this is AIO or + * not. If it is synchronous, tell xfs_finish_ioend() to kick the + * workqueue and wait for it to complete. + * * The core direct I/O code might be changed to always call the * completion handler in the future, in which case all this can * go away. @@ -1423,9 +1448,9 @@ xfs_end_io_direct( ioend->io_offset = offset; ioend->io_size = size; if (ioend->io_type == IOMAP_READ) { - xfs_finish_ioend(ioend); + xfs_finish_ioend(ioend, 0); } else if (private && size > 0) { - xfs_finish_ioend(ioend); + xfs_finish_ioend(ioend, is_sync_kiocb(iocb)); } else { /* * A direct I/O write ioend starts it's life in unwritten @@ -1434,7 +1459,7 @@ xfs_end_io_direct( * handler. */ INIT_WORK(&ioend->io_work, xfs_end_bio_written); - xfs_finish_ioend(ioend); + xfs_finish_ioend(ioend, 0); } /* diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index fe4f66a5af1..2df63622354 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -35,7 +35,7 @@ #include <linux/freezer.h> static kmem_zone_t *xfs_buf_zone; -static kmem_shaker_t xfs_buf_shake; +static struct shrinker *xfs_buf_shake; STATIC int xfsbufd(void *); STATIC int xfsbufd_wakeup(int, gfp_t); STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); @@ -314,7 +314,7 @@ xfs_buf_free( ASSERT(list_empty(&bp->b_hash_list)); - if (bp->b_flags & _XBF_PAGE_CACHE) { + if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { uint i; if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) @@ -323,18 +323,11 @@ xfs_buf_free( for (i = 0; i < bp->b_page_count; i++) { struct page *page = bp->b_pages[i]; - ASSERT(!PagePrivate(page)); + if (bp->b_flags & _XBF_PAGE_CACHE) + ASSERT(!PagePrivate(page)); page_cache_release(page); } _xfs_buf_free_pages(bp); - } else if (bp->b_flags & _XBF_KMEM_ALLOC) { - /* - * XXX(hch): bp->b_count_desired might be incorrect (see - * xfs_buf_associate_memory for details), but fortunately - * the Linux version of kmem_free ignores the len argument.. - */ - kmem_free(bp->b_addr, bp->b_count_desired); - _xfs_buf_free_pages(bp); } xfs_buf_deallocate(bp); @@ -764,43 +757,44 @@ xfs_buf_get_noaddr( size_t len, xfs_buftarg_t *target) { - size_t malloc_len = len; + unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; + int error, i; xfs_buf_t *bp; - void *data; - int error; bp = xfs_buf_allocate(0); if (unlikely(bp == NULL)) goto fail; _xfs_buf_initialize(bp, target, 0, len, 0); - try_again: - data = kmem_alloc(malloc_len, KM_SLEEP | KM_MAYFAIL | KM_LARGE); - if (unlikely(data == NULL)) + error = _xfs_buf_get_pages(bp, page_count, 0); + if (error) goto fail_free_buf; - /* check whether alignment matches.. */ - if ((__psunsigned_t)data != - ((__psunsigned_t)data & ~target->bt_smask)) { - /* .. else double the size and try again */ - kmem_free(data, malloc_len); - malloc_len <<= 1; - goto try_again; + for (i = 0; i < page_count; i++) { + bp->b_pages[i] = alloc_page(GFP_KERNEL); + if (!bp->b_pages[i]) + goto fail_free_mem; } + bp->b_flags |= _XBF_PAGES; - error = xfs_buf_associate_memory(bp, data, len); - if (error) + error = _xfs_buf_map_pages(bp, XBF_MAPPED); + if (unlikely(error)) { + printk(KERN_WARNING "%s: failed to map pages\n", + __FUNCTION__); goto fail_free_mem; - bp->b_flags |= _XBF_KMEM_ALLOC; + } xfs_buf_unlock(bp); - XB_TRACE(bp, "no_daddr", data); + XB_TRACE(bp, "no_daddr", len); return bp; + fail_free_mem: - kmem_free(data, malloc_len); + while (--i >= 0) + __free_page(bp->b_pages[i]); + _xfs_buf_free_pages(bp); fail_free_buf: - xfs_buf_free(bp); + xfs_buf_deallocate(bp); fail: return NULL; } @@ -1453,6 +1447,7 @@ xfs_free_buftarg( int external) { xfs_flush_buftarg(btp, 1); + xfs_blkdev_issue_flush(btp); if (external) xfs_blkdev_put(btp->bt_bdev); xfs_free_bufhash(btp); @@ -1837,7 +1832,7 @@ xfs_buf_init(void) if (!xfsdatad_workqueue) goto out_destroy_xfslogd_workqueue; - xfs_buf_shake = kmem_shake_register(xfsbufd_wakeup); + xfs_buf_shake = set_shrinker(DEFAULT_SEEKS, xfsbufd_wakeup); if (!xfs_buf_shake) goto out_destroy_xfsdatad_workqueue; @@ -1859,7 +1854,7 @@ xfs_buf_init(void) void xfs_buf_terminate(void) { - kmem_shake_deregister(xfs_buf_shake); + remove_shrinker(xfs_buf_shake); destroy_workqueue(xfsdatad_workqueue); destroy_workqueue(xfslogd_workqueue); kmem_zone_destroy(xfs_buf_zone); diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index b6241f6201a..b5908a34b15 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -63,7 +63,7 @@ typedef enum { /* flags used only internally */ _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */ - _XBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc() */ + _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ } xfs_buf_flags_t; diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 8c43cd2e237..cbcd40c8c2a 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -184,15 +184,6 @@ xfs_file_open( } STATIC int -xfs_file_close( - struct file *filp, - fl_owner_t id) -{ - return -bhv_vop_close(vn_from_inode(filp->f_path.dentry->d_inode), 0, - file_count(filp) > 1 ? L_FALSE : L_TRUE, NULL); -} - -STATIC int xfs_file_release( struct inode *inode, struct file *filp) @@ -436,7 +427,6 @@ const struct file_operations xfs_file_operations = { #endif .mmap = xfs_file_mmap, .open = xfs_file_open, - .flush = xfs_file_close, .release = xfs_file_release, .fsync = xfs_file_fsync, #ifdef HAVE_FOP_OPEN_EXEC @@ -458,7 +448,6 @@ const struct file_operations xfs_invis_file_operations = { #endif .mmap = xfs_file_mmap, .open = xfs_file_open, - .flush = xfs_file_close, .release = xfs_file_release, .fsync = xfs_file_fsync, }; diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index ed3a5e1b4b6..bb72c3d4141 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -46,6 +46,7 @@ xfs_param_t xfs_params = { .inherit_nosym = { 0, 0, 1 }, .rotorstep = { 1, 1, 255 }, .inherit_nodfrg = { 0, 1, 1 }, + .fstrm_timer = { 1, 50, 3600*100}, }; /* diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index ff5c41ff8d4..5917808abbd 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -1019,7 +1019,7 @@ xfs_ioc_bulkstat( if (cmd == XFS_IOC_FSINUMBERS) error = xfs_inumbers(mp, &inlast, &count, - bulkreq.ubuffer); + bulkreq.ubuffer, xfs_inumbers_fmt); else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) error = xfs_bulkstat_single(mp, &inlast, bulkreq.ubuffer, &done); diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index b83cebc165f..141cf15067c 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -23,10 +23,25 @@ #include <linux/fs.h> #include <asm/uaccess.h> #include "xfs.h" -#include "xfs_types.h" #include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_attr_sf.h" +#include "xfs_dir2_sf.h" #include "xfs_vfs.h" #include "xfs_vnode.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_error.h" #include "xfs_dfrag.h" #define _NATIVE_IOC(cmd, type) \ @@ -34,6 +49,7 @@ #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) #define BROKEN_X86_ALIGNMENT +#define _PACKED __attribute__((packed)) /* on ia32 l_start is on a 32-bit boundary */ typedef struct xfs_flock64_32 { __s16 l_type; @@ -75,35 +91,276 @@ xfs_ioctl32_flock( return (unsigned long)p; } +typedef struct compat_xfs_fsop_geom_v1 { + __u32 blocksize; /* filesystem (data) block size */ + __u32 rtextsize; /* realtime extent size */ + __u32 agblocks; /* fsblocks in an AG */ + __u32 agcount; /* number of allocation groups */ + __u32 logblocks; /* fsblocks in the log */ + __u32 sectsize; /* (data) sector size, bytes */ + __u32 inodesize; /* inode size in bytes */ + __u32 imaxpct; /* max allowed inode space(%) */ + __u64 datablocks; /* fsblocks in data subvolume */ + __u64 rtblocks; /* fsblocks in realtime subvol */ + __u64 rtextents; /* rt extents in realtime subvol*/ + __u64 logstart; /* starting fsblock of the log */ + unsigned char uuid[16]; /* unique id of the filesystem */ + __u32 sunit; /* stripe unit, fsblocks */ + __u32 swidth; /* stripe width, fsblocks */ + __s32 version; /* structure version */ + __u32 flags; /* superblock version flags */ + __u32 logsectsize; /* log sector size, bytes */ + __u32 rtsectsize; /* realtime sector size, bytes */ + __u32 dirblocksize; /* directory block size, bytes */ +} __attribute__((packed)) compat_xfs_fsop_geom_v1_t; + +#define XFS_IOC_FSGEOMETRY_V1_32 \ + _IOR ('X', 100, struct compat_xfs_fsop_geom_v1) + +STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg) +{ + compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg; + xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p)); + + if (copy_in_user(p, p32, sizeof(*p32))) + return -EFAULT; + return (unsigned long)p; +} + +typedef struct compat_xfs_inogrp { + __u64 xi_startino; /* starting inode number */ + __s32 xi_alloccount; /* # bits set in allocmask */ + __u64 xi_allocmask; /* mask of allocated inodes */ +} __attribute__((packed)) compat_xfs_inogrp_t; + +STATIC int xfs_inumbers_fmt_compat( + void __user *ubuffer, + const xfs_inogrp_t *buffer, + long count, + long *written) +{ + compat_xfs_inogrp_t *p32 = ubuffer; + long i; + + for (i = 0; i < count; i++) { + if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || + put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || + put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) + return -EFAULT; + } + *written = count * sizeof(*p32); + return 0; +} + #else -typedef struct xfs_fsop_bulkreq32 { +#define xfs_inumbers_fmt_compat xfs_inumbers_fmt +#define _PACKED + +#endif + +/* XFS_IOC_FSBULKSTAT and friends */ + +typedef struct compat_xfs_bstime { + __s32 tv_sec; /* seconds */ + __s32 tv_nsec; /* and nanoseconds */ +} compat_xfs_bstime_t; + +STATIC int xfs_bstime_store_compat( + compat_xfs_bstime_t __user *p32, + const xfs_bstime_t *p) +{ + __s32 sec32; + + sec32 = p->tv_sec; + if (put_user(sec32, &p32->tv_sec) || + put_user(p->tv_nsec, &p32->tv_nsec)) + return -EFAULT; + return 0; +} + +typedef struct compat_xfs_bstat { + __u64 bs_ino; /* inode number */ + __u16 bs_mode; /* type and mode */ + __u16 bs_nlink; /* number of links */ + __u32 bs_uid; /* user id */ + __u32 bs_gid; /* group id */ + __u32 bs_rdev; /* device value */ + __s32 bs_blksize; /* block size */ + __s64 bs_size; /* file size */ + compat_xfs_bstime_t bs_atime; /* access time */ + compat_xfs_bstime_t bs_mtime; /* modify time */ + compat_xfs_bstime_t bs_ctime; /* inode change time */ + int64_t bs_blocks; /* number of blocks */ + __u32 bs_xflags; /* extended flags */ + __s32 bs_extsize; /* extent size */ + __s32 bs_extents; /* number of extents */ + __u32 bs_gen; /* generation count */ + __u16 bs_projid; /* project id */ + unsigned char bs_pad[14]; /* pad space, unused */ + __u32 bs_dmevmask; /* DMIG event mask */ + __u16 bs_dmstate; /* DMIG state info */ + __u16 bs_aextents; /* attribute number of extents */ +} _PACKED compat_xfs_bstat_t; + +STATIC int xfs_bulkstat_one_fmt_compat( + void __user *ubuffer, + const xfs_bstat_t *buffer) +{ + compat_xfs_bstat_t __user *p32 = ubuffer; + + if (put_user(buffer->bs_ino, &p32->bs_ino) || + put_user(buffer->bs_mode, &p32->bs_mode) || + put_user(buffer->bs_nlink, &p32->bs_nlink) || + put_user(buffer->bs_uid, &p32->bs_uid) || + put_user(buffer->bs_gid, &p32->bs_gid) || + put_user(buffer->bs_rdev, &p32->bs_rdev) || + put_user(buffer->bs_blksize, &p32->bs_blksize) || + put_user(buffer->bs_size, &p32->bs_size) || + xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) || + xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) || + xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) || + put_user(buffer->bs_blocks, &p32->bs_blocks) || + put_user(buffer->bs_xflags, &p32->bs_xflags) || + put_user(buffer->bs_extsize, &p32->bs_extsize) || + put_user(buffer->bs_extents, &p32->bs_extents) || + put_user(buffer->bs_gen, &p32->bs_gen) || + put_user(buffer->bs_projid, &p32->bs_projid) || + put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || + put_user(buffer->bs_dmstate, &p32->bs_dmstate) || + put_user(buffer->bs_aextents, &p32->bs_aextents)) + return -EFAULT; + return sizeof(*p32); +} + + + +typedef struct compat_xfs_fsop_bulkreq { compat_uptr_t lastip; /* last inode # pointer */ __s32 icount; /* count of entries in buffer */ compat_uptr_t ubuffer; /* user buffer for inode desc. */ - __s32 ocount; /* output count pointer */ -} xfs_fsop_bulkreq32_t; + compat_uptr_t ocount; /* output count pointer */ +} compat_xfs_fsop_bulkreq_t; -STATIC unsigned long -xfs_ioctl32_bulkstat( - unsigned long arg) +#define XFS_IOC_FSBULKSTAT_32 \ + _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ + _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSINUMBERS_32 \ + _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) + +/* copied from xfs_ioctl.c */ +STATIC int +xfs_ioc_bulkstat_compat( + xfs_mount_t *mp, + unsigned int cmd, + void __user *arg) { - xfs_fsop_bulkreq32_t __user *p32 = (void __user *)arg; - xfs_fsop_bulkreq_t __user *p = compat_alloc_user_space(sizeof(*p)); + compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg; u32 addr; + xfs_fsop_bulkreq_t bulkreq; + int count; /* # of records returned */ + xfs_ino_t inlast; /* last inode number */ + int done; + int error; + + /* done = 1 if there are more stats to get and if bulkstat */ + /* should be called again (unused here, but used in dmapi) */ + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (XFS_ |