diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2006-02-08 11:50:51 +0000 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2006-02-08 11:50:51 +0000 |
commit | 18ec7d5c3f434aed9661ed10a9e1f48cdeb4981d (patch) | |
tree | a7161a4c4b3592052e6772e1c23849de16cac649 | |
parent | 257f9b4e97e9a6cceeb247cead92119a4396d37b (diff) |
[GFS2] Make journaled data files identical to normal files on disk
This is a very large patch, with a few still to be resolved issues
so you might want to check out the previous head of the tree since
this is known to be unstable. Fixes for the various bugs will be
forthcoming shortly.
This patch removes the special data format which has been used
up till now for journaled data files. Directories still retain the
old format so that they will remain on disk compatible with earlier
releases. As a result you can now do the following with journaled
data files:
1) mmap them
2) export them over NFS
3) convert to/from normal files whenever you want to (the zero length
restriction is gone)
In addition the level at which GFS' locking is done has changed for all
files (since they all now use the page cache) such that the locking is
done at the page cache level rather than the level of the fs operations.
This should mean that things like loopback mounts and other things which
touch the page cache directly should now work.
Current known issues:
1. There is a lock mode inversion problem related to the resource
group hold function which needs to be resolved.
2. Any significant amount of I/O causes an oops with an offset of hex 320
(NULL pointer dereference) which appears to be related to a journaled data
buffer appearing on a list where it shouldn't be.
3. Direct I/O writes are disabled for the time being (will reappear later)
4. There is probably a deadlock between the page lock and GFS' locks under
certain combinations of mmap and fs operation I/O.
5. Issue relating to ref counting on internally used inodes causes a hang
on umount (discovered before this patch, and not fixed by it)
6. One part of the directory metadata is different from GFS1 and will need
to be resolved before next release.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
-rw-r--r-- | fs/gfs2/Makefile | 1 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 59 | ||||
-rw-r--r-- | fs/gfs2/dir.c | 4 | ||||
-rw-r--r-- | fs/gfs2/dir.h | 2 | ||||
-rw-r--r-- | fs/gfs2/inode.h | 7 | ||||
-rw-r--r-- | fs/gfs2/jdata.c | 389 | ||||
-rw-r--r-- | fs/gfs2/jdata.h | 52 | ||||
-rw-r--r-- | fs/gfs2/log.c | 4 | ||||
-rw-r--r-- | fs/gfs2/lops.c | 280 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 16 | ||||
-rw-r--r-- | fs/gfs2/ops_address.c | 260 | ||||
-rw-r--r-- | fs/gfs2/ops_file.c | 967 | ||||
-rw-r--r-- | fs/gfs2/ops_vm.c | 3 | ||||
-rw-r--r-- | fs/gfs2/page.c | 10 | ||||
-rw-r--r-- | fs/gfs2/quota.c | 114 | ||||
-rw-r--r-- | fs/gfs2/trans.c | 19 | ||||
-rw-r--r-- | fs/gfs2/trans.h | 1 | ||||
-rw-r--r-- | fs/gfs2/util.c | 3 | ||||
-rw-r--r-- | include/linux/gfs2_ondisk.h | 7 |
19 files changed, 721 insertions, 1477 deletions
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index 4e87b8661af..88f92794811 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -10,7 +10,6 @@ gfs2-y := \ glock.o \ glops.o \ inode.o \ - jdata.o \ lm.o \ log.o \ lops.o \ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index bd194f645c5..4efcd8a39e9 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -18,12 +18,12 @@ #include "bmap.h" #include "glock.h" #include "inode.h" -#include "jdata.h" #include "meta_io.h" #include "page.h" #include "quota.h" #include "rgrp.h" #include "trans.h" +#include "dir.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k * block is 512, so __u16 is fine for that. It saves stack space to @@ -90,7 +90,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer, { struct buffer_head *bh, *dibh; uint64_t block = 0; - int journaled = gfs2_is_jdata(ip); + int isdir = gfs2_is_dir(ip); int error; down_write(&ip->i_rw_mutex); @@ -103,10 +103,10 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer, /* Get a free block, fill it with the stuffed data, and write it out to disk */ - if (journaled) { + if (isdir) { block = gfs2_alloc_meta(ip); - error = gfs2_jdata_get_buffer(ip, block, 1, &bh); + error = gfs2_dir_get_buffer(ip, block, 1, &bh); if (error) goto out_brelse; gfs2_buffer_copy_tail(bh, @@ -168,7 +168,7 @@ static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size) if (ip->i_di.di_size > size) size = ip->i_di.di_size; - if (gfs2_is_jdata(ip)) { + if (gfs2_is_dir(ip)) { arr = sdp->sd_jheightsize; max = sdp->sd_max_jheight; } else { @@ -377,7 +377,7 @@ static void lookup_block(struct gfs2_inode *ip, struct buffer_head *bh, return; if (height == ip->i_di.di_height - 1 && - !gfs2_is_jdata(ip)) + !gfs2_is_dir(ip)) *block = gfs2_alloc_data(ip); else *block = gfs2_alloc_meta(ip); @@ -430,7 +430,7 @@ int gfs2_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new, if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) goto out; - bsize = (gfs2_is_jdata(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; + bsize = (gfs2_is_dir(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; height = calc_tree_height(ip, (lblock + 1) * bsize); if (ip->i_di.di_height < height) { @@ -618,7 +618,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, sm->sm_first = 0; } - metadata = (height != ip->i_di.di_height - 1) || gfs2_is_jdata(ip); + metadata = (height != ip->i_di.di_height - 1); if (metadata) revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; @@ -814,33 +814,6 @@ static int do_grow(struct gfs2_inode *ip, uint64_t size) return error; } -static int truncator_journaled(struct gfs2_inode *ip, uint64_t size) -{ - uint64_t lbn, dbn; - uint32_t off; - struct buffer_head *bh; - int new = 0; - int error; - - lbn = size; - off = do_div(lbn, ip->i_sbd->sd_jbsize); - - error = gfs2_block_map(ip, lbn, &new, &dbn, NULL); - if (error || !dbn) - return error; - - error = gfs2_jdata_get_buffer(ip, dbn, 0, &bh); - if (error) - return error; - - gfs2_trans_add_bh(ip->i_gl, bh, 1); - gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header) + off); - - brelse(bh); - - return 0; -} - static int trunc_start(struct gfs2_inode *ip, uint64_t size) { struct gfs2_sbd *sdp = ip->i_sbd; @@ -866,12 +839,7 @@ static int trunc_start(struct gfs2_inode *ip, uint64_t size) error = 1; } else { - if (journaled) { - uint64_t junk = size; - /* we're just interested in the modulus */ - if (do_div(junk, sdp->sd_jbsize)) - error = truncator_journaled(ip, size); - } else if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1)) + if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1)) error = gfs2_block_truncate_page(ip->i_vnode->i_mapping); if (!error) { @@ -900,10 +868,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, uint64_t size) if (!size) lblock = 0; - else if (gfs2_is_jdata(ip)) { - lblock = size - 1; - do_div(lblock, ip->i_sbd->sd_jbsize); - } else + else lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift; find_metapath(ip, lblock, &mp); @@ -1051,7 +1016,7 @@ void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len, struct gfs2_sbd *sdp = ip->i_sbd; unsigned int tmp; - if (gfs2_is_jdata(ip)) { + if (gfs2_is_dir(ip)) { *data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2; *ind_blocks = 3 * (sdp->sd_max_jheight - 1); } else { @@ -1096,7 +1061,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset, return 0; } - if (gfs2_is_jdata(ip)) { + if (gfs2_is_dir(ip)) { unsigned int bsize = sdp->sd_jbsize; lblock = offset; do_div(lblock, bsize); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index ada283a0f5f..c77e18048d9 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -86,8 +86,8 @@ typedef int (*leaf_call_t) (struct gfs2_inode *dip, uint32_t index, uint32_t len, uint64_t leaf_no, void *data); -static int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, - struct buffer_head **bhp) +int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, + struct buffer_head **bhp) { struct buffer_head *bh; int error = 0; diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index ff6d1c597ee..5b01497b3ab 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -45,5 +45,7 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename, int *alloc_required); +int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, + struct buffer_head **bhp); #endif /* __DIR_DOT_H__ */ diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index e42ae38d677..214975c6bb2 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -20,6 +20,11 @@ static inline int gfs2_is_jdata(struct gfs2_inode *ip) return ip->i_di.di_flags & GFS2_DIF_JDATA; } +static inline int gfs2_is_dir(struct gfs2_inode *ip) +{ + return S_ISDIR(ip->i_di.di_mode); +} + void gfs2_inode_attr_in(struct gfs2_inode *ip); void gfs2_inode_attr_out(struct gfs2_inode *ip); struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip); @@ -72,9 +77,9 @@ static inline int gfs2_lookup_simple(struct inode *dip, char *name, err = gfs2_lookupi(get_v2ip(dip), &qstr, 1, &ip); if (err == 0) { *ipp = gfs2_ip2v(ip); + gfs2_inode_put(ip); if (*ipp == NULL) err = -ENOMEM; - gfs2_inode_put(ip); } return err; } diff --git a/fs/gfs2/jdata.c b/fs/gfs2/jdata.c deleted file mode 100644 index e43eaf133f1..00000000000 --- a/fs/gfs2/jdata.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - */ - -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/completion.h> -#include <linux/buffer_head.h> -#include <asm/semaphore.h> -#include <asm/uaccess.h> - -#include "gfs2.h" -#include "bmap.h" -#include "inode.h" -#include "jdata.h" -#include "meta_io.h" -#include "trans.h" - -int gfs2_internal_read(struct gfs2_inode *ip, - struct file_ra_state *ra_state, - char *buf, loff_t *pos, unsigned size) -{ - return gfs2_jdata_read_mem(ip, buf, *pos, size); -} - -int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, - struct buffer_head **bhp) -{ - struct buffer_head *bh; - int error = 0; - - if (new) { - bh = gfs2_meta_new(ip->i_gl, block); - gfs2_trans_add_bh(ip->i_gl, bh, 1); - gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); - gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); - } else { - error = gfs2_meta_read(ip->i_gl, block, - DIO_START | DIO_WAIT, &bh); - if (error) - return error; - if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) { - brelse(bh); - return -EIO; - } - } - - *bhp = bh; - - return 0; -} - -/** - * gfs2_copy2mem - Trivial copy function for gfs2_jdata_read() - * @bh: The buffer to copy from, or NULL meaning zero the buffer - * @buf: The buffer to copy/zero - * @offset: The offset in the buffer to copy from - * @size: The amount of data to copy/zero - * - * Returns: errno - */ - -int gfs2_copy2mem(struct buffer_head *bh, char **buf, unsigned int offset, - unsigned int size) -{ - if (bh) - memcpy(*buf, bh->b_data + offset, size); - else - memset(*buf, 0, size); - *buf += size; - return 0; -} - -/** - * gfs2_copy2user - Copy bytes to user space for gfs2_jdata_read() - * @bh: The buffer - * @buf: The destination of the data - * @offset: The offset into the buffer - * @size: The amount of data to copy - * - * Returns: errno - */ - -int gfs2_copy2user(struct buffer_head *bh, char **buf, unsigned int offset, - unsigned int size) -{ - int error; - - if (bh) - error = copy_to_user(*buf, bh->b_data + offset, size); - else - error = clear_user(*buf, size); - - if (error) - error = -EFAULT; - else - *buf += size; - - return error; -} - -static int jdata_read_stuffed(struct gfs2_inode *ip, char *buf, - unsigned int offset, unsigned int size, - read_copy_fn_t copy_fn) -{ - struct buffer_head *dibh; - int error; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (!error) { - error = copy_fn(dibh, &buf, - offset + sizeof(struct gfs2_dinode), size); - brelse(dibh); - } - - return (error) ? error : size; -} - -/** - * gfs2_jdata_read - Read a jdata file - * @ip: The GFS2 Inode - * @buf: The buffer to place result into - * @offset: File offset to begin jdata_readng from - * @size: Amount of data to transfer - * @copy_fn: Function to actually perform the copy - * - * The @copy_fn only copies a maximum of a single block at once so - * we are safe calling it with int arguments. It is done so that - * we don't needlessly put 64bit arguments on the stack and it - * also makes the code in the @copy_fn nicer too. - * - * Returns: The amount of data actually copied or the error - */ - -int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf, uint64_t offset, - unsigned int size, read_copy_fn_t copy_fn) -{ - struct gfs2_sbd *sdp = ip->i_sbd; - uint64_t lblock, dblock; - uint32_t extlen = 0; - unsigned int o; - int copied = 0; - int error = 0; - - if (offset >= ip->i_di.di_size) - return 0; - - if ((offset + size) > ip->i_di.di_size) - size = ip->i_di.di_size - offset; - - if (!size) - return 0; - - if (gfs2_is_stuffed(ip)) - return jdata_read_stuffed(ip, buf, (unsigned int)offset, size, - copy_fn); - - if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) - return -EINVAL; - - lblock = offset; - o = do_div(lblock, sdp->sd_jbsize) + - sizeof(struct gfs2_meta_header); - - while (copied < size) { - unsigned int amount; - struct buffer_head *bh; - int new; - - amount = size - copied; - if (amount > sdp->sd_sb.sb_bsize - o) - amount = sdp->sd_sb.sb_bsize - o; - - if (!extlen) { - new = 0; - error = gfs2_block_map(ip, lblock, &new, - &dblock, &extlen); - if (error) - goto fail; - } - - if (extlen > 1) - gfs2_meta_ra(ip->i_gl, dblock, extlen); - - if (dblock) { - error = gfs2_jdata_get_buffer(ip, dblock, new, &bh); - if (error) - goto fail; - dblock++; - extlen--; - } else - bh = NULL; - - error = copy_fn(bh, &buf, o, amount); - brelse(bh); - if (error) - goto fail; - - copied += amount; - lblock++; - - o = sizeof(struct gfs2_meta_header); - } - - return copied; - - fail: - return (copied) ? copied : error; -} - -/** - * gfs2_copy_from_mem - Trivial copy function for gfs2_jdata_write() - * @bh: The buffer to copy to or clear - * @buf: The buffer to copy from - * @offset: The offset in the buffer to write to - * @size: The amount of data to write - * - * Returns: errno - */ - -int gfs2_copy_from_mem(struct gfs2_inode *ip, struct buffer_head *bh, - const char **buf, unsigned int offset, unsigned int size) -{ - gfs2_trans_add_bh(ip->i_gl, bh, 1); - memcpy(bh->b_data + offset, *buf, size); - - *buf += size; - - return 0; -} - -/** - * gfs2_copy_from_user - Copy bytes from user space for gfs2_jdata_write() - * @bh: The buffer to copy to or clear - * @buf: The buffer to copy from - * @offset: The offset in the buffer to write to - * @size: The amount of data to write - * - * Returns: errno - */ - -int gfs2_copy_from_user(struct gfs2_inode *ip, struct buffer_head *bh, - const char __user **buf, unsigned int offset, unsigned int size) -{ - int error = 0; - - gfs2_trans_add_bh(ip->i_gl, bh, 1); - if (copy_from_user(bh->b_data + offset, *buf, size)) - error = -EFAULT; - else - *buf += size; - - return error; -} - -static int jdata_write_stuffed(struct gfs2_inode *ip, char *buf, - unsigned int offset, unsigned int size, - write_copy_fn_t copy_fn) -{ - struct buffer_head *dibh; - int error; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - error = copy_fn(ip, - dibh, &buf, - offset + sizeof(struct gfs2_dinode), size); - if (!error) { - if (ip->i_di.di_size < offset + size) - ip->i_di.di_size = offset + size; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); - gfs2_dinode_out(&ip->i_di, dibh->b_data); - } - - brelse(dibh); - - return (error) ? error : size; -} - -/** - * gfs2_jdata_write - Write bytes to a file - * @ip: The GFS2 inode - * @buf: The buffer containing information to be written - * @offset: The file offset to start writing at - * @size: The amount of data to write - * @copy_fn: Function to do the actual copying - * - * Returns: The number of bytes correctly written or error code - */ - -int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf, uint64_t offset, - unsigned int size, write_copy_fn_t copy_fn) -{ - struct gfs2_sbd *sdp = ip->i_sbd; - struct buffer_head *dibh; - uint64_t lblock, dblock; - uint32_t extlen = 0; - unsigned int o; - int copied = 0; - int error = 0; - - if (!size) - return 0; - - if (gfs2_is_stuffed(ip) && - offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) - return jdata_write_stuffed(ip, buf, (unsigned int)offset, size, - copy_fn); - - if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) - return -EINVAL; - - if (gfs2_is_stuffed(ip)) { - error = gfs2_unstuff_dinode(ip, NULL, NULL); - if (error) - return error; - } - - lblock = offset; - o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); - - while (copied < size) { - unsigned int amount; - struct buffer_head *bh; - int new; - - amount = size - copied; - if (amount > sdp->sd_sb.sb_bsize - o) - amount = sdp->sd_sb.sb_bsize - o; - - if (!extlen) { - new = 1; - error = gfs2_block_map(ip, lblock, &new, - &dblock, &extlen); - if (error) - goto fail; - error = -EIO; - if (gfs2_assert_withdraw(sdp, dblock)) - goto fail; - } - - error = gfs2_jdata_get_buffer(ip, dblock, - (amount == sdp->sd_jbsize) ? 1 : new, - &bh); - if (error) - goto fail; - - error = copy_fn(ip, bh, &buf, o, amount); - brelse(bh); - if (error) - goto fail; - - copied += amount; - lblock++; - dblock++; - extlen--; - - o = sizeof(struct gfs2_meta_header); - } - - out: - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - if (ip->i_di.di_size < offset + copied) - ip->i_di.di_size = offset + copied; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); - brelse(dibh); - - return copied; - - fail: - if (copied) - goto out; - return error; -} - diff --git a/fs/gfs2/jdata.h b/fs/gfs2/jdata.h deleted file mode 100644 index 95e18fcb8f8..00000000000 --- a/fs/gfs2/jdata.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - */ - -#ifndef __FILE_DOT_H__ -#define __FILE_DOT_H__ - -int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, - struct buffer_head **bhp); - -typedef int (*read_copy_fn_t) (struct buffer_head *bh, char **buf, - unsigned int offset, unsigned int size); -typedef int (*write_copy_fn_t) (struct gfs2_inode *ip, - struct buffer_head *bh, const char **buf, - unsigned int offset, unsigned int size); - -int gfs2_copy2mem(struct buffer_head *bh, char **buf, - unsigned int offset, unsigned int size); -int gfs2_copy2user(struct buffer_head *bh, char __user **buf, - unsigned int offset, unsigned int size); -int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf, - uint64_t offset, unsigned int size, - read_copy_fn_t copy_fn); - -int gfs2_copy_from_mem(struct gfs2_inode *ip, - struct buffer_head *bh, const char **buf, - unsigned int offset, unsigned int size); -int gfs2_copy_from_user(struct gfs2_inode *ip, - struct buffer_head *bh, const char __user **buf, - unsigned int offset, unsigned int size); -int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf, - uint64_t offset, unsigned int size, - write_copy_fn_t copy_fn); - -static inline int gfs2_jdata_read_mem(struct gfs2_inode *ip, char *buf, - uint64_t offset, unsigned int size) -{ - return gfs2_jdata_read(ip, (__force char __user *)buf, offset, size, gfs2_copy2mem); -} - -static inline int gfs2_jdata_write_mem(struct gfs2_inode *ip, const char *buf, - uint64_t offset, unsigned int size) -{ - return gfs2_jdata_write(ip, (__force const char __user *)buf, offset, size, gfs2_copy_from_mem); -} - -#endif /* __FILE_DOT_H__ */ diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f6d00130f96..9b4484d366c 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -387,8 +387,7 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL); atomic_set(&bh->b_count, 1); bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate); - set_bh_page(bh, virt_to_page(real->b_data), - ((unsigned long)real->b_data) & (PAGE_SIZE - 1)); + set_bh_page(bh, real->b_page, bh_offset(real)); bh->b_blocknr = blkno; bh->b_size = sdp->sd_sb.sb_bsize; bh->b_bdev = sdp->sd_vfs->s_bdev; @@ -634,6 +633,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); + gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index a065f766723..dd41863810d 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -428,49 +428,188 @@ static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) gfs2_assert_warn(sdp, !sdp->sd_log_num_rg); } +/** + * databuf_lo_add - Add a databuf to the transaction. + * + * This is used in two distinct cases: + * i) In ordered write mode + * We put the data buffer on a list so that we can ensure that its + * synced to disk at the right time + * ii) In journaled data mode + * We need to journal the data block in the same way as metadata in + * the functions above. The difference is that here we have a tag + * which is two __be64's being the block number (as per meta data) + * and a flag which says whether the data block needs escaping or + * not. This means we need a new log entry for each 251 or so data + * blocks, which isn't an enormous overhead but twice as much as + * for normal metadata blocks. + */ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) { - get_transaction->tr_touched = 1; + struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); + struct gfs2_trans *tr = get_transaction; + struct address_space *mapping = bd->bd_bh->b_page->mapping; + struct gfs2_inode *ip = get_v2ip(mapping->host); + tr->tr_touched = 1; + if (!list_empty(&bd->bd_list_tr) && + (ip->i_di.di_flags & GFS2_DIF_JDATA)) { + tr->tr_num_buf++; + gfs2_trans_add_gl(bd->bd_gl); + list_add(&bd->bd_list_tr, &tr->tr_list_buf); + gfs2_pin(sdp, bd->bd_bh); + } else { + clear_buffer_pinned(bd->bd_bh); + } gfs2_log_lock(sdp); + if (ip->i_di.di_flags & GFS2_DIF_JDATA) + sdp->sd_log_num_jdata++; sdp->sd_log_num_databuf++; list_add(&le->le_list, &sdp->sd_log_le_databuf); gfs2_log_unlock(sdp); } +static int gfs2_check_magic(struct buffer_head *bh) +{ + struct page *page = bh->b_page; + void *kaddr; + __be32 *ptr; + int rv = 0; + + kaddr = kmap_atomic(page, KM_USER0); + ptr = kaddr + bh_offset(bh); + if (*ptr == cpu_to_be32(GFS2_MAGIC)) + rv = 1; + kunmap_atomic(page, KM_USER0); + + return rv; +} + +/** + * databuf_lo_before_commit - Scan the data buffers, writing as we go + * + * Here we scan through the lists of buffers and make the assumption + * that any buffer thats been pinned is being journaled, and that + * any unpinned buffer is an ordered write data buffer and therefore + * will be written back rather than journaled. + */ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) { - struct list_head *head = &sdp->sd_log_le_databuf; LIST_HEAD(started); - struct gfs2_bufdata *bd; - struct buffer_head *bh; + struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt; + struct buffer_head *bh = NULL; + unsigned int offset = sizeof(struct gfs2_log_descriptor); + struct gfs2_log_descriptor *ld; + unsigned int limit; + unsigned int total_dbuf = sdp->sd_log_num_databuf; + unsigned int total_jdata = sdp->sd_log_num_jdata; + unsigned int num, n; + __be64 *ptr; - while (!list_empty(head)) { - bd = list_entry(head->prev, struct gfs2_bufdata, bd_le.le_list); - list_move(&bd->bd_le.le_list, &started); + offset += (2*sizeof(__be64) - 1); + offset &= ~(2*sizeof(__be64) - 1); + limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64); - gfs2_log_lock(sdp); - bh = bd->bd_bh; + /* printk(KERN_INFO "totals: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */ + /* + * Start writing ordered buffers, write journaled buffers + * into the log along with a header + */ + bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, bd_le.le_list); + while(total_dbuf) { + num = total_jdata; + if (num > limit) + num = limit; + n = 0; + list_for_each_entry_safe_continue(bd1, bdt, &sdp->sd_log_le_databuf, bd_le.le_list) { + gfs2_log_lock(sdp); + /* An ordered write buffer */ + if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) { + list_move(&bd1->bd_le.le_list, &started); + if (bd1 == bd2) { + bd2 = NULL; + bd2 = list_prepare_entry(bd2, &sdp->sd_log_le_databuf, bd_le.le_list); + } + total_dbuf--; + if (bd1->bd_bh) { + get_bh(bd1->bd_bh); + gfs2_log_unlock(sdp); + if (buffer_dirty(bd1->bd_bh)) { + wait_on_buffer(bd1->bd_bh); + ll_rw_block(WRITE, 1, &bd1->bd_bh); + } + brelse(bd1->bd_bh); + continue; + } + gfs2_log_unlock(sdp); + continue; + } else if (bd1->bd_bh) { /* A journaled buffer */ + int magic; + gfs2_log_unlock(sdp); + /* printk(KERN_INFO "journaled buffer\n"); */ + if (!bh) { + bh = gfs2_log_get_buf(sdp); + ld = (struct gfs2_log_descriptor *)bh->b_data; + ptr = (__be64 *)(bh->b_data + offset); + ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); + ld->ld_header.mh_type = cpu_to_be16(GFS2_METATYPE_LD); + ld->ld_header.mh_format = cpu_to_be16(GFS2_FORMAT_LD); + ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_JDATA); + ld->ld_length = cpu_to_be32(num + 1); + ld->ld_data1 = cpu_to_be32(num); + ld->ld_data2 = cpu_to_be32(0); + memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); + } + magic = gfs2_check_magic(bd1->bd_bh); + *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr); + *ptr++ = cpu_to_be64((__u64)magic); + clear_buffer_escaped(bd1->bd_bh); + if (unlikely(magic != 0)) + set_buffer_escaped(bd1->bd_bh); + if (n++ > num) + break; + } + } if (bh) { - get_bh(bh); - gfs2_log_unlock(sdp); - if (buffer_dirty(bh)) { - wait_on_buffer(bh); - ll_rw_block(WRITE, 1, &bh); + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); + bh = NULL; + } + n = 0; + /* printk(KERN_INFO "totals2: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */ + list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, bd_le.le_list) { + if (!bd2->bd_bh) + continue; + /* copy buffer if it needs escaping */ + if (unlikely(buffer_escaped(bd2->bd_bh))) { + void *kaddr; + struct page *page = bd2->bd_bh->b_page; + bh = gfs2_log_get_buf(sdp); + kaddr = kmap_atomic(page, KM_USER0); + memcpy(bh->b_data, kaddr + bh_offset(bd2->bd_bh), sdp->sd_sb.sb_bsize); + kunmap_atomic(page, KM_USER0); + *(__be32 *)bh->b_data = 0; + } else { + bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); } - brelse(bh); - } else - gfs2_log_unlock(sdp); + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); + if (++n >= num) + break; + } + bh = NULL; + total_dbuf -= num; + total_jdata -= num; } - + /* printk(KERN_INFO "wait on ordered data buffers\n"); */ + /* Wait on all ordered buffers */ while (!list_empty(&started)) { - bd = list_entry(started.next, struct gfs2_bufdata, - bd_le.le_list); - list_del(&bd->bd_le.le_list); + bd1 = list_entry(started.next, struct gfs2_bufdata, bd_le.le_list); + list_del(&bd1->bd_le.le_list); sdp->sd_log_num_databuf--; gfs2_log_lock(sdp); - bh = bd->bd_bh; + bh = bd1->bd_bh; if (bh) { set_v2bd(bh, NULL); gfs2_log_unlock(sdp); @@ -479,12 +618,103 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) } else gfs2_log_unlock(sdp); - kfree(bd); + kfree(bd1); } + /* printk(KERN_INFO "sd_log_num_databuf %u sd_log_num_jdata %u\n", sdp->sd_log_num_databuf, sdp->sd_log_num_jdata); */ + /* We've removed all the ordered write bufs here, so only jdata left */ + gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata); +} + +static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, + struct gfs2_log_descriptor *ld, + __be64 *ptr, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + struct gfs2_glock *gl = jd->jd_inode->i_gl; + unsigned int blks = be32_to_cpu(ld->ld_data1); + struct buffer_head *bh_log, *bh_ip; + uint64_t blkno; + uint64_t esc; + int error = 0; + + if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA) + return 0; + + gfs2_replay_incr_blk(sdp, &start); + for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { + blkno = be64_to_cpu(*ptr++); + esc = be64_to_cpu(*ptr++); + + sdp->sd_found_blocks++; + + if (gfs2_revoke_check(sdp, blkno, start)) + continue; + + error = gfs2_replay_read_block(jd, start, &bh_log); + if (error) + return error; + + bh_ip = gfs2_meta_new(gl, blkno); + memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); + + /* Unescape */ + if (esc) { + __be32 *eptr = (__be32 *)bh_ip->b_data; + *eptr = cpu_to_be32(GFS2_MAGIC); + } + mark_buffer_dirty(bh_ip); + |