diff options
-rw-r--r-- | fs/gfs2/Makefile | 1 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 59 | ||||
-rw-r--r-- | fs/gfs2/dir.c | 4 | ||||
-rw-r--r-- | fs/gfs2/dir.h | 2 | ||||
-rw-r--r-- | fs/gfs2/inode.h | 7 | ||||
-rw-r--r-- | fs/gfs2/jdata.c | 389 | ||||
-rw-r--r-- | fs/gfs2/jdata.h | 52 | ||||
-rw-r--r-- | fs/gfs2/log.c | 4 | ||||
-rw-r--r-- | fs/gfs2/lops.c | 280 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 16 | ||||
-rw-r--r-- | fs/gfs2/ops_address.c | 260 | ||||
-rw-r--r-- | fs/gfs2/ops_file.c | 967 | ||||
-rw-r--r-- | fs/gfs2/ops_vm.c | 3 | ||||
-rw-r--r-- | fs/gfs2/page.c | 10 | ||||
-rw-r--r-- | fs/gfs2/quota.c | 114 | ||||
-rw-r--r-- | fs/gfs2/trans.c | 19 | ||||
-rw-r--r-- | fs/gfs2/trans.h | 1 | ||||
-rw-r--r-- | fs/gfs2/util.c | 3 | ||||
-rw-r--r-- | include/linux/gfs2_ondisk.h | 7 |
19 files changed, 721 insertions, 1477 deletions
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile index 4e87b8661af..88f92794811 100644 --- a/fs/gfs2/Makefile +++ b/fs/gfs2/Makefile @@ -10,7 +10,6 @@ gfs2-y := \ glock.o \ glops.o \ inode.o \ - jdata.o \ lm.o \ log.o \ lops.o \ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index bd194f645c5..4efcd8a39e9 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -18,12 +18,12 @@ #include "bmap.h" #include "glock.h" #include "inode.h" -#include "jdata.h" #include "meta_io.h" #include "page.h" #include "quota.h" #include "rgrp.h" #include "trans.h" +#include "dir.h" /* This doesn't need to be that large as max 64 bit pointers in a 4k * block is 512, so __u16 is fine for that. It saves stack space to @@ -90,7 +90,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer, { struct buffer_head *bh, *dibh; uint64_t block = 0; - int journaled = gfs2_is_jdata(ip); + int isdir = gfs2_is_dir(ip); int error; down_write(&ip->i_rw_mutex); @@ -103,10 +103,10 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer, /* Get a free block, fill it with the stuffed data, and write it out to disk */ - if (journaled) { + if (isdir) { block = gfs2_alloc_meta(ip); - error = gfs2_jdata_get_buffer(ip, block, 1, &bh); + error = gfs2_dir_get_buffer(ip, block, 1, &bh); if (error) goto out_brelse; gfs2_buffer_copy_tail(bh, @@ -168,7 +168,7 @@ static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size) if (ip->i_di.di_size > size) size = ip->i_di.di_size; - if (gfs2_is_jdata(ip)) { + if (gfs2_is_dir(ip)) { arr = sdp->sd_jheightsize; max = sdp->sd_max_jheight; } else { @@ -377,7 +377,7 @@ static void lookup_block(struct gfs2_inode *ip, struct buffer_head *bh, return; if (height == ip->i_di.di_height - 1 && - !gfs2_is_jdata(ip)) + !gfs2_is_dir(ip)) *block = gfs2_alloc_data(ip); else *block = gfs2_alloc_meta(ip); @@ -430,7 +430,7 @@ int gfs2_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new, if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) goto out; - bsize = (gfs2_is_jdata(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; + bsize = (gfs2_is_dir(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; height = calc_tree_height(ip, (lblock + 1) * bsize); if (ip->i_di.di_height < height) { @@ -618,7 +618,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, sm->sm_first = 0; } - metadata = (height != ip->i_di.di_height - 1) || gfs2_is_jdata(ip); + metadata = (height != ip->i_di.di_height - 1); if (metadata) revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; @@ -814,33 +814,6 @@ static int do_grow(struct gfs2_inode *ip, uint64_t size) return error; } -static int truncator_journaled(struct gfs2_inode *ip, uint64_t size) -{ - uint64_t lbn, dbn; - uint32_t off; - struct buffer_head *bh; - int new = 0; - int error; - - lbn = size; - off = do_div(lbn, ip->i_sbd->sd_jbsize); - - error = gfs2_block_map(ip, lbn, &new, &dbn, NULL); - if (error || !dbn) - return error; - - error = gfs2_jdata_get_buffer(ip, dbn, 0, &bh); - if (error) - return error; - - gfs2_trans_add_bh(ip->i_gl, bh, 1); - gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header) + off); - - brelse(bh); - - return 0; -} - static int trunc_start(struct gfs2_inode *ip, uint64_t size) { struct gfs2_sbd *sdp = ip->i_sbd; @@ -866,12 +839,7 @@ static int trunc_start(struct gfs2_inode *ip, uint64_t size) error = 1; } else { - if (journaled) { - uint64_t junk = size; - /* we're just interested in the modulus */ - if (do_div(junk, sdp->sd_jbsize)) - error = truncator_journaled(ip, size); - } else if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1)) + if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1)) error = gfs2_block_truncate_page(ip->i_vnode->i_mapping); if (!error) { @@ -900,10 +868,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, uint64_t size) if (!size) lblock = 0; - else if (gfs2_is_jdata(ip)) { - lblock = size - 1; - do_div(lblock, ip->i_sbd->sd_jbsize); - } else + else lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift; find_metapath(ip, lblock, &mp); @@ -1051,7 +1016,7 @@ void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len, struct gfs2_sbd *sdp = ip->i_sbd; unsigned int tmp; - if (gfs2_is_jdata(ip)) { + if (gfs2_is_dir(ip)) { *data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2; *ind_blocks = 3 * (sdp->sd_max_jheight - 1); } else { @@ -1096,7 +1061,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset, return 0; } - if (gfs2_is_jdata(ip)) { + if (gfs2_is_dir(ip)) { unsigned int bsize = sdp->sd_jbsize; lblock = offset; do_div(lblock, bsize); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index ada283a0f5f..c77e18048d9 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -86,8 +86,8 @@ typedef int (*leaf_call_t) (struct gfs2_inode *dip, uint32_t index, uint32_t len, uint64_t leaf_no, void *data); -static int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, - struct buffer_head **bhp) +int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, + struct buffer_head **bhp) { struct buffer_head *bh; int error = 0; diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index ff6d1c597ee..5b01497b3ab 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h @@ -45,5 +45,7 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename, int *alloc_required); +int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, + struct buffer_head **bhp); #endif /* __DIR_DOT_H__ */ diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index e42ae38d677..214975c6bb2 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -20,6 +20,11 @@ static inline int gfs2_is_jdata(struct gfs2_inode *ip) return ip->i_di.di_flags & GFS2_DIF_JDATA; } +static inline int gfs2_is_dir(struct gfs2_inode *ip) +{ + return S_ISDIR(ip->i_di.di_mode); +} + void gfs2_inode_attr_in(struct gfs2_inode *ip); void gfs2_inode_attr_out(struct gfs2_inode *ip); struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip); @@ -72,9 +77,9 @@ static inline int gfs2_lookup_simple(struct inode *dip, char *name, err = gfs2_lookupi(get_v2ip(dip), &qstr, 1, &ip); if (err == 0) { *ipp = gfs2_ip2v(ip); + gfs2_inode_put(ip); if (*ipp == NULL) err = -ENOMEM; - gfs2_inode_put(ip); } return err; } diff --git a/fs/gfs2/jdata.c b/fs/gfs2/jdata.c deleted file mode 100644 index e43eaf133f1..00000000000 --- a/fs/gfs2/jdata.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - */ - -#include <linux/sched.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/completion.h> -#include <linux/buffer_head.h> -#include <asm/semaphore.h> -#include <asm/uaccess.h> - -#include "gfs2.h" -#include "bmap.h" -#include "inode.h" -#include "jdata.h" -#include "meta_io.h" -#include "trans.h" - -int gfs2_internal_read(struct gfs2_inode *ip, - struct file_ra_state *ra_state, - char *buf, loff_t *pos, unsigned size) -{ - return gfs2_jdata_read_mem(ip, buf, *pos, size); -} - -int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, - struct buffer_head **bhp) -{ - struct buffer_head *bh; - int error = 0; - - if (new) { - bh = gfs2_meta_new(ip->i_gl, block); - gfs2_trans_add_bh(ip->i_gl, bh, 1); - gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); - gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); - } else { - error = gfs2_meta_read(ip->i_gl, block, - DIO_START | DIO_WAIT, &bh); - if (error) - return error; - if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) { - brelse(bh); - return -EIO; - } - } - - *bhp = bh; - - return 0; -} - -/** - * gfs2_copy2mem - Trivial copy function for gfs2_jdata_read() - * @bh: The buffer to copy from, or NULL meaning zero the buffer - * @buf: The buffer to copy/zero - * @offset: The offset in the buffer to copy from - * @size: The amount of data to copy/zero - * - * Returns: errno - */ - -int gfs2_copy2mem(struct buffer_head *bh, char **buf, unsigned int offset, - unsigned int size) -{ - if (bh) - memcpy(*buf, bh->b_data + offset, size); - else - memset(*buf, 0, size); - *buf += size; - return 0; -} - -/** - * gfs2_copy2user - Copy bytes to user space for gfs2_jdata_read() - * @bh: The buffer - * @buf: The destination of the data - * @offset: The offset into the buffer - * @size: The amount of data to copy - * - * Returns: errno - */ - -int gfs2_copy2user(struct buffer_head *bh, char **buf, unsigned int offset, - unsigned int size) -{ - int error; - - if (bh) - error = copy_to_user(*buf, bh->b_data + offset, size); - else - error = clear_user(*buf, size); - - if (error) - error = -EFAULT; - else - *buf += size; - - return error; -} - -static int jdata_read_stuffed(struct gfs2_inode *ip, char *buf, - unsigned int offset, unsigned int size, - read_copy_fn_t copy_fn) -{ - struct buffer_head *dibh; - int error; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (!error) { - error = copy_fn(dibh, &buf, - offset + sizeof(struct gfs2_dinode), size); - brelse(dibh); - } - - return (error) ? error : size; -} - -/** - * gfs2_jdata_read - Read a jdata file - * @ip: The GFS2 Inode - * @buf: The buffer to place result into - * @offset: File offset to begin jdata_readng from - * @size: Amount of data to transfer - * @copy_fn: Function to actually perform the copy - * - * The @copy_fn only copies a maximum of a single block at once so - * we are safe calling it with int arguments. It is done so that - * we don't needlessly put 64bit arguments on the stack and it - * also makes the code in the @copy_fn nicer too. - * - * Returns: The amount of data actually copied or the error - */ - -int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf, uint64_t offset, - unsigned int size, read_copy_fn_t copy_fn) -{ - struct gfs2_sbd *sdp = ip->i_sbd; - uint64_t lblock, dblock; - uint32_t extlen = 0; - unsigned int o; - int copied = 0; - int error = 0; - - if (offset >= ip->i_di.di_size) - return 0; - - if ((offset + size) > ip->i_di.di_size) - size = ip->i_di.di_size - offset; - - if (!size) - return 0; - - if (gfs2_is_stuffed(ip)) - return jdata_read_stuffed(ip, buf, (unsigned int)offset, size, - copy_fn); - - if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) - return -EINVAL; - - lblock = offset; - o = do_div(lblock, sdp->sd_jbsize) + - sizeof(struct gfs2_meta_header); - - while (copied < size) { - unsigned int amount; - struct buffer_head *bh; - int new; - - amount = size - copied; - if (amount > sdp->sd_sb.sb_bsize - o) - amount = sdp->sd_sb.sb_bsize - o; - - if (!extlen) { - new = 0; - error = gfs2_block_map(ip, lblock, &new, - &dblock, &extlen); - if (error) - goto fail; - } - - if (extlen > 1) - gfs2_meta_ra(ip->i_gl, dblock, extlen); - - if (dblock) { - error = gfs2_jdata_get_buffer(ip, dblock, new, &bh); - if (error) - goto fail; - dblock++; - extlen--; - } else - bh = NULL; - - error = copy_fn(bh, &buf, o, amount); - brelse(bh); - if (error) - goto fail; - - copied += amount; - lblock++; - - o = sizeof(struct gfs2_meta_header); - } - - return copied; - - fail: - return (copied) ? copied : error; -} - -/** - * gfs2_copy_from_mem - Trivial copy function for gfs2_jdata_write() - * @bh: The buffer to copy to or clear - * @buf: The buffer to copy from - * @offset: The offset in the buffer to write to - * @size: The amount of data to write - * - * Returns: errno - */ - -int gfs2_copy_from_mem(struct gfs2_inode *ip, struct buffer_head *bh, - const char **buf, unsigned int offset, unsigned int size) -{ - gfs2_trans_add_bh(ip->i_gl, bh, 1); - memcpy(bh->b_data + offset, *buf, size); - - *buf += size; - - return 0; -} - -/** - * gfs2_copy_from_user - Copy bytes from user space for gfs2_jdata_write() - * @bh: The buffer to copy to or clear - * @buf: The buffer to copy from - * @offset: The offset in the buffer to write to - * @size: The amount of data to write - * - * Returns: errno - */ - -int gfs2_copy_from_user(struct gfs2_inode *ip, struct buffer_head *bh, - const char __user **buf, unsigned int offset, unsigned int size) -{ - int error = 0; - - gfs2_trans_add_bh(ip->i_gl, bh, 1); - if (copy_from_user(bh->b_data + offset, *buf, size)) - error = -EFAULT; - else - *buf += size; - - return error; -} - -static int jdata_write_stuffed(struct gfs2_inode *ip, char *buf, - unsigned int offset, unsigned int size, - write_copy_fn_t copy_fn) -{ - struct buffer_head *dibh; - int error; - - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - error = copy_fn(ip, - dibh, &buf, - offset + sizeof(struct gfs2_dinode), size); - if (!error) { - if (ip->i_di.di_size < offset + size) - ip->i_di.di_size = offset + size; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); - gfs2_dinode_out(&ip->i_di, dibh->b_data); - } - - brelse(dibh); - - return (error) ? error : size; -} - -/** - * gfs2_jdata_write - Write bytes to a file - * @ip: The GFS2 inode - * @buf: The buffer containing information to be written - * @offset: The file offset to start writing at - * @size: The amount of data to write - * @copy_fn: Function to do the actual copying - * - * Returns: The number of bytes correctly written or error code - */ - -int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf, uint64_t offset, - unsigned int size, write_copy_fn_t copy_fn) -{ - struct gfs2_sbd *sdp = ip->i_sbd; - struct buffer_head *dibh; - uint64_t lblock, dblock; - uint32_t extlen = 0; - unsigned int o; - int copied = 0; - int error = 0; - - if (!size) - return 0; - - if (gfs2_is_stuffed(ip) && - offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) - return jdata_write_stuffed(ip, buf, (unsigned int)offset, size, - copy_fn); - - if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) - return -EINVAL; - - if (gfs2_is_stuffed(ip)) { - error = gfs2_unstuff_dinode(ip, NULL, NULL); - if (error) - return error; - } - - lblock = offset; - o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); - - while (copied < size) { - unsigned int amount; - struct buffer_head *bh; - int new; - - amount = size - copied; - if (amount > sdp->sd_sb.sb_bsize - o) - amount = sdp->sd_sb.sb_bsize - o; - - if (!extlen) { - new = 1; - error = gfs2_block_map(ip, lblock, &new, - &dblock, &extlen); - if (error) - goto fail; - error = -EIO; - if (gfs2_assert_withdraw(sdp, dblock)) - goto fail; - } - - error = gfs2_jdata_get_buffer(ip, dblock, - (amount == sdp->sd_jbsize) ? 1 : new, - &bh); - if (error) - goto fail; - - error = copy_fn(ip, bh, &buf, o, amount); - brelse(bh); - if (error) - goto fail; - - copied += amount; - lblock++; - dblock++; - extlen--; - - o = sizeof(struct gfs2_meta_header); - } - - out: - error = gfs2_meta_inode_buffer(ip, &dibh); - if (error) - return error; - - if (ip->i_di.di_size < offset + copied) - ip->i_di.di_size = offset + copied; - ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); - - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(&ip->i_di, dibh->b_data); - brelse(dibh); - - return copied; - - fail: - if (copied) - goto out; - return error; -} - diff --git a/fs/gfs2/jdata.h b/fs/gfs2/jdata.h deleted file mode 100644 index 95e18fcb8f8..00000000000 --- a/fs/gfs2/jdata.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. - * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - */ - -#ifndef __FILE_DOT_H__ -#define __FILE_DOT_H__ - -int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, - struct buffer_head **bhp); - -typedef int (*read_copy_fn_t) (struct buffer_head *bh, char **buf, - unsigned int offset, unsigned int size); -typedef int (*write_copy_fn_t) (struct gfs2_inode *ip, - struct buffer_head *bh, const char **buf, - unsigned int offset, unsigned int size); - -int gfs2_copy2mem(struct buffer_head *bh, char **buf, - unsigned int offset, unsigned int size); -int gfs2_copy2user(struct buffer_head *bh, char __user **buf, - unsigned int offset, unsigned int size); -int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf, - uint64_t offset, unsigned int size, - read_copy_fn_t copy_fn); - -int gfs2_copy_from_mem(struct gfs2_inode *ip, - struct buffer_head *bh, const char **buf, - unsigned int offset, unsigned int size); -int gfs2_copy_from_user(struct gfs2_inode *ip, - struct buffer_head *bh, const char __user **buf, - unsigned int offset, unsigned int size); -int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf, - uint64_t offset, unsigned int size, - write_copy_fn_t copy_fn); - -static inline int gfs2_jdata_read_mem(struct gfs2_inode *ip, char *buf, - uint64_t offset, unsigned int size) -{ - return gfs2_jdata_read(ip, (__force char __user *)buf, offset, size, gfs2_copy2mem); -} - -static inline int gfs2_jdata_write_mem(struct gfs2_inode *ip, const char *buf, - uint64_t offset, unsigned int size) -{ - return gfs2_jdata_write(ip, (__force const char __user *)buf, offset, size, gfs2_copy_from_mem); -} - -#endif /* __FILE_DOT_H__ */ diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f6d00130f96..9b4484d366c 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -387,8 +387,7 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL); atomic_set(&bh->b_count, 1); bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate); - set_bh_page(bh, virt_to_page(real->b_data), - ((unsigned long)real->b_data) & (PAGE_SIZE - 1)); + set_bh_page(bh, real->b_page, bh_offset(real)); bh->b_blocknr = blkno; bh->b_size = sdp->sd_sb.sb_bsize; bh->b_bdev = sdp->sd_vfs->s_bdev; @@ -634,6 +633,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); + gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index a065f766723..dd41863810d 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -428,49 +428,188 @@ static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) gfs2_assert_warn(sdp, !sdp->sd_log_num_rg); } +/** + * databuf_lo_add - Add a databuf to the transaction. + * + * This is used in two distinct cases: + * i) In ordered write mode + * We put the data buffer on a list so that we can ensure that its + * synced to disk at the right time + * ii) In journaled data mode + * We need to journal the data block in the same way as metadata in + * the functions above. The difference is that here we have a tag + * which is two __be64's being the block number (as per meta data) + * and a flag which says whether the data block needs escaping or + * not. This means we need a new log entry for each 251 or so data + * blocks, which isn't an enormous overhead but twice as much as + * for normal metadata blocks. + */ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) { - get_transaction->tr_touched = 1; + struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); + struct gfs2_trans *tr = get_transaction; + struct address_space *mapping = bd->bd_bh->b_page->mapping; + struct gfs2_inode *ip = get_v2ip(mapping->host); + tr->tr_touched = 1; + if (!list_empty(&bd->bd_list_tr) && + (ip->i_di.di_flags & GFS2_DIF_JDATA)) { + tr->tr_num_buf++; + gfs2_trans_add_gl(bd->bd_gl); + list_add(&bd->bd_list_tr, &tr->tr_list_buf); + gfs2_pin(sdp, bd->bd_bh); + } else { + clear_buffer_pinned(bd->bd_bh); + } gfs2_log_lock(sdp); + if (ip->i_di.di_flags & GFS2_DIF_JDATA) + sdp->sd_log_num_jdata++; sdp->sd_log_num_databuf++; list_add(&le->le_list, &sdp->sd_log_le_databuf); gfs2_log_unlock(sdp); } +static int gfs2_check_magic(struct buffer_head *bh) +{ + struct page *page = bh->b_page; + void *kaddr; + __be32 *ptr; + int rv = 0; + + kaddr = kmap_atomic(page, KM_USER0); + ptr = kaddr + bh_offset(bh); + if (*ptr == cpu_to_be32(GFS2_MAGIC)) + rv = 1; + kunmap_atomic(page, KM_USER0); + + return rv; +} + +/** + * databuf_lo_before_commit - Scan the data buffers, writing as we go + * + * Here we scan through the lists of buffers and make the assumption + * that any buffer thats been pinned is being journaled, and that + * any unpinned buffer is an ordered write data buffer and therefore + * will be written back rather than journaled. + */ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) { - struct list_head *head = &sdp->sd_log_le_databuf; LIST_HEAD(started); - struct gfs2_bufdata *bd; - struct buffer_head *bh; + struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt; + struct buffer_head *bh = NULL; + unsigned int offset = sizeof(struct gfs2_log_descriptor); + struct gfs2_log_descriptor *ld; + unsigned int limit; + unsigned int total_dbuf = sdp->sd_log_num_databuf; + unsigned int total_jdata = sdp->sd_log_num_jdata; + unsigned int num, n; + __be64 *ptr; - while (!list_empty(head)) { - bd = list_entry(head->prev, struct gfs2_bufdata, bd_le.le_list); - list_move(&bd->bd_le.le_list, &started); + offset += (2*sizeof(__be64) - 1); + offset &= ~(2*sizeof(__be64) - 1); + limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64); - gfs2_log_lock(sdp); - bh = bd->bd_bh; + /* printk(KERN_INFO "totals: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */ + /* + * Start writing ordered buffers, write journaled buffers + * into the log along with a header + */ + bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, bd_le.le_list); + while(total_dbuf) { + num = total_jdata; + if (num > limit) + num = limit; + n = 0; + list_for_each_entry_safe_continue(bd1, bdt, &sdp->sd_log_le_databuf, bd_le.le_list) { + gfs2_log_lock(sdp); + /* An ordered write buffer */ + if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) { + list_move(&bd1->bd_le.le_list, &started); + if (bd1 == bd2) { + bd2 = NULL; + bd2 = list_prepare_entry(bd2, &sdp->sd_log_le_databuf, bd_le.le_list); + } + total_dbuf--; + if (bd1->bd_bh) { + get_bh(bd1->bd_bh); + gfs2_log_unlock(sdp); + if (buffer_dirty(bd1->bd_bh)) { + wait_on_buffer(bd1->bd_bh); + ll_rw_block(WRITE, 1, &bd1->bd_bh); + } + brelse(bd1->bd_bh); + continue; + } + gfs2_log_unlock(sdp); + continue; + } else if (bd1->bd_bh) { /* A journaled buffer */ + int magic; + gfs2_log_unlock(sdp); + /* printk(KERN_INFO "journaled buffer\n"); */ + if (!bh) { + bh = gfs2_log_get_buf(sdp); + ld = (struct gfs2_log_descriptor *)bh->b_data; + ptr = (__be64 *)(bh->b_data + offset); + ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); + ld->ld_header.mh_type = cpu_to_be16(GFS2_METATYPE_LD); + ld->ld_header.mh_format = cpu_to_be16(GFS2_FORMAT_LD); + ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_JDATA); + ld->ld_length = cpu_to_be32(num + 1); + ld->ld_data1 = cpu_to_be32(num); + ld->ld_data2 = cpu_to_be32(0); + memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); + } + magic = gfs2_check_magic(bd1->bd_bh); + *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr); + *ptr++ = cpu_to_be64((__u64)magic); + clear_buffer_escaped(bd1->bd_bh); + if (unlikely(magic != 0)) + set_buffer_escaped(bd1->bd_bh); + if (n++ > num) + break; + } + } if (bh) { - get_bh(bh); - gfs2_log_unlock(sdp); - if (buffer_dirty(bh)) { - wait_on_buffer(bh); - ll_rw_block(WRITE, 1, &bh); + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); + bh = NULL; + } + n = 0; + /* printk(KERN_INFO "totals2: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */ + list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, bd_le.le_list) { + if (!bd2->bd_bh) + continue; + /* copy buffer if it needs escaping */ + if (unlikely(buffer_escaped(bd2->bd_bh))) { + void *kaddr; + struct page *page = bd2->bd_bh->b_page; + bh = gfs2_log_get_buf(sdp); + kaddr = kmap_atomic(page, KM_USER0); + memcpy(bh->b_data, kaddr + bh_offset(bd2->bd_bh), sdp->sd_sb.sb_bsize); + kunmap_atomic(page, KM_USER0); + *(__be32 *)bh->b_data = 0; + } else { + bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); } - brelse(bh); - } else - gfs2_log_unlock(sdp); + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); + if (++n >= num) + break; + } + bh = NULL; + total_dbuf -= num; + total_jdata -= num; } - + /* printk(KERN_INFO "wait on ordered data buffers\n"); */ + /* Wait on all ordered buffers */ while (!list_empty(&started)) { - bd = list_entry(started.next, struct gfs2_bufdata, - bd_le.le_list); - list_del(&bd->bd_le.le_list); + bd1 = list_entry(started.next, struct gfs2_bufdata, bd_le.le_list); + list_del(&bd1->bd_le.le_list); sdp->sd_log_num_databuf--; gfs2_log_lock(sdp); - bh = bd->bd_bh; + bh = bd1->bd_bh; if (bh) { set_v2bd(bh, NULL); gfs2_log_unlock(sdp); @@ -479,12 +618,103 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) } else gfs2_log_unlock(sdp); - kfree(bd); + kfree(bd1); } + /* printk(KERN_INFO "sd_log_num_databuf %u sd_log_num_jdata %u\n", sdp->sd_log_num_databuf, sdp->sd_log_num_jdata); */ + /* We've removed all the ordered write bufs here, so only jdata left */ + gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata); +} + +static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, + struct gfs2_log_descriptor *ld, + __be64 *ptr, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + struct gfs2_glock *gl = jd->jd_inode->i_gl; + unsigned int blks = be32_to_cpu(ld->ld_data1); + struct buffer_head *bh_log, *bh_ip; + uint64_t blkno; + uint64_t esc; + int error = 0; + + if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA) + return 0; + + gfs2_replay_incr_blk(sdp, &start); + for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { + blkno = be64_to_cpu(*ptr++); + esc = be64_to_cpu(*ptr++); + + sdp->sd_found_blocks++; + + if (gfs2_revoke_check(sdp, blkno, start)) + continue; + + error = gfs2_replay_read_block(jd, start, &bh_log); + if (error) + return error; + + bh_ip = gfs2_meta_new(gl, blkno); + memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); + + /* Unescape */ + if (esc) { + __be32 *eptr = (__be32 *)bh_ip->b_data; + *eptr = cpu_to_be32(GFS2_MAGIC); + } + mark_buffer_dirty(bh_ip); + + brelse(bh_log); + brelse(bh_ip); + if (error) + break; + + sdp->sd_replayed_blocks++; + } + + return error; +} + +/* FIXME: sort out accounting for log blocks etc. */ + +static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + + if (error) { + gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT); + return; + } + if (pass != 1) + return; + + /* data sync? */ + gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT); + + fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", + jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); +} + +static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head = &sdp->sd_log_le_databuf; + struct gfs2_bufdata *bd; + + while (!list_empty(head)) { + bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); + list_del_init(&bd->bd_le.le_list); + sdp->sd_log_num_databuf--; + sdp->sd_log_num_jdata--; + gfs2_unpin(sdp, bd->bd_bh, ai); + brelse(bd->bd_bh); + kfree(bd); + } gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); + gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata); } + struct gfs2_log_operations gfs2_glock_lops = { .lo_add = glock_lo_add, .lo_after_commit = glock_lo_after_commit, @@ -519,7 +749,11 @@ struct gfs2_log_operations gfs2_rg_lops = { struct gfs2_log_operations gfs2_databuf_lops = { .lo_add = databuf_lo_add, + .lo_incore_commit = buf_lo_incore_commit, .lo_before_commit = databuf_lo_before_commit, + .lo_after_commit = databuf_lo_after_commit, + .lo_scan_elements = databuf_lo_scan_ele |