diff options
Diffstat (limited to 'fs/gfs2')
75 files changed, 27006 insertions, 0 deletions
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig new file mode 100644 index 00000000000..8c27de8b956 --- /dev/null +++ b/fs/gfs2/Kconfig @@ -0,0 +1,44 @@ +config GFS2_FS + tristate "GFS2 file system support" + depends on EXPERIMENTAL + select FS_POSIX_ACL + help + A cluster filesystem. + + Allows a cluster of computers to simultaneously use a block device + that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads + and writes to the block device like a local filesystem, but also uses + a lock module to allow the computers coordinate their I/O so + filesystem consistency is maintained. One of the nifty features of + GFS is perfect consistency -- changes made to the filesystem on one + machine show up immediately on all other machines in the cluster. + + To use the GFS2 filesystem, you will need to enable one or more of + the below locking modules. Documentation and utilities for GFS2 can + be found here: http://sources.redhat.com/cluster + +config GFS2_FS_LOCKING_NOLOCK + tristate "GFS2 \"nolock\" locking module" + depends on GFS2_FS + help + Single node locking module for GFS2. + + Use this module if you want to use GFS2 on a single node without + its clustering features. You can still take advantage of the + large file support, and upgrade to running a full cluster later on + if required. + + If you will only be using GFS2 in cluster mode, you do not need this + module. + +config GFS2_FS_LOCKING_DLM + tristate "GFS2 DLM locking module" + depends on GFS2_FS + select DLM + help + Multiple node locking module for GFS2 + + Most users of GFS2 will require this module. It provides the locking + interface between GFS2 and the DLM, which is required to use GFS2 + in a cluster environment. + diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile new file mode 100644 index 00000000000..e3f1ada643a --- /dev/null +++ b/fs/gfs2/Makefile @@ -0,0 +1,10 @@ +obj-$(CONFIG_GFS2_FS) += gfs2.o +gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \ + glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \ + mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \ + ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \ + recovery.o rgrp.o super.o sys.o trans.o util.o + +obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/ +obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/ + diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c new file mode 100644 index 00000000000..5f959b8ce40 --- /dev/null +++ b/fs/gfs2/acl.c @@ -0,0 +1,309 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License version 2. + */ + +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/completion.h> +#include <linux/buffer_head.h> +#include <linux/posix_acl.h> +#include <linux/posix_acl_xattr.h> +#include <linux/gfs2_ondisk.h> +#include <linux/lm_interface.h> + +#include "gfs2.h" +#include "incore.h" +#include "acl.h" +#include "eaops.h" +#include "eattr.h" +#include "glock.h" +#include "inode.h" +#include "meta_io.h" +#include "trans.h" +#include "util.h" + +#define ACL_ACCESS 1 +#define ACL_DEFAULT 0 + +int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, + struct gfs2_ea_request *er, + int *remove, mode_t *mode) +{ + struct posix_acl *acl; + int error; + + error = gfs2_acl_validate_remove(ip, access); + if (error) + return error; + + if (!er->er_data) + return -EINVAL; + + acl = posix_acl_from_xattr(er->er_data, er->er_data_len); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (!acl) { + *remove = 1; + return 0; + } + + error = posix_acl_valid(acl); + if (error) + goto out; + + if (access) { + error = posix_acl_equiv_mode(acl, mode); + if (!error) + *remove = 1; + else if (error > 0) + error = 0; + } + +out: + posix_acl_release(acl); + return error; +} + +int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access) +{ + if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl) + return -EOPNOTSUPP; + if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER)) + return -EPERM; + if (S_ISLNK(ip->i_di.di_mode)) + return -EOPNOTSUPP; + if (!access && !S_ISDIR(ip->i_di.di_mode)) + return -EACCES; + + return 0; +} + +static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl, + struct gfs2_ea_location *el, char **data, unsigned int *len) +{ + struct gfs2_ea_request er; + struct gfs2_ea_location el_this; + int error; + + if (!ip->i_di.di_eattr) + return 0; + + memset(&er, 0, sizeof(struct gfs2_ea_request)); + if (access) { + er.er_name = GFS2_POSIX_ACL_ACCESS; + er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN; + } else { + er.er_name = GFS2_POSIX_ACL_DEFAULT; + er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN; + } + er.er_type = GFS2_EATYPE_SYS; + + if (!el) + el = &el_this; + + error = gfs2_ea_find(ip, &er, el); + if (error) + return error; + if (!el->el_ea) + return 0; + if (!GFS2_EA_DATA_LEN(el->el_ea)) + goto out; + + er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea); + er.er_data = kmalloc(er.er_data_len, GFP_KERNEL); + error = -ENOMEM; + if (!er.er_data) + goto out; + + error = gfs2_ea_get_copy(ip, el, er.er_data); + if (error) + goto out_kfree; + + if (acl) { + *acl = posix_acl_from_xattr(er.er_data, er.er_data_len); + if (IS_ERR(*acl)) + error = PTR_ERR(*acl); + } + +out_kfree: + if (error || !data) + kfree(er.er_data); + else { + *data = er.er_data; + *len = er.er_data_len; + } +out: + if (error || el == &el_this) + brelse(el->el_bh); + return error; +} + +/** + * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something + * @inode: the file we want to do something to + * @mask: what we want to do + * + * Returns: errno + */ + +int gfs2_check_acl_locked(struct inode *inode, int mask) +{ + struct posix_acl *acl = NULL; + int error; + + error = acl_get(GFS2_I(inode), ACL_ACCESS, &acl, NULL, NULL, NULL); + if (error) + return error; + + if (acl) { + error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return error; + } + + return -EAGAIN; +} + +int gfs2_check_acl(struct inode *inode, int mask) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_holder i_gh; + int error; + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (!error) { + error = gfs2_check_acl_locked(inode, mask); + gfs2_glock_dq_uninit(&i_gh); + } + + return error; +} + +static int munge_mode(struct gfs2_inode *ip, mode_t mode) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct buffer_head *dibh; + int error; + + error = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (error) + return error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + gfs2_assert_withdraw(sdp, + (ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT)); + ip->i_di.di_mode = mode; + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + gfs2_trans_end(sdp); + + return 0; +} + +int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); + struct posix_acl *acl = NULL, *clone; + struct gfs2_ea_request er; + mode_t mode = ip->i_di.di_mode; + int error; + + if (!sdp->sd_args.ar_posix_acl) + return 0; + if (S_ISLNK(ip->i_di.di_mode)) + return 0; + + memset(&er, 0, sizeof(struct gfs2_ea_request)); + er.er_type = GFS2_EATYPE_SYS; + + error = acl_get(dip, ACL_DEFAULT, &acl, NULL, + &er.er_data, &er.er_data_len); + if (error) + return error; + if (!acl) { + mode &= ~current->fs->umask; + if (mode != ip->i_di.di_mode) + error = munge_mode(ip, mode); + return error; + } + + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto out; + posix_acl_release(acl); + acl = clone; + + if (S_ISDIR(ip->i_di.di_mode)) { + er.er_name = GFS2_POSIX_ACL_DEFAULT; + er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN; + error = gfs2_system_eaops.eo_set(ip, &er); + if (error) + goto out; + } + + error = posix_acl_create_masq(acl, &mode); + if (error < 0) + goto out; + if (error > 0) { + er.er_name = GFS2_POSIX_ACL_ACCESS; + er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN; + posix_acl_to_xattr(acl, er.er_data, er.er_data_len); + er.er_mode = mode; + er.er_flags = GFS2_ERF_MODE; + error = gfs2_system_eaops.eo_set(ip, &er); + if (error) + goto out; + } else + munge_mode(ip, mode); + +out: + posix_acl_release(acl); + kfree(er.er_data); + return error; +} + +int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) +{ + struct posix_acl *acl = NULL, *clone; + struct gfs2_ea_location el; + char *data; + unsigned int len; + int error; + + error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len); + if (error) + return error; + if (!acl) + return gfs2_setattr_simple(ip, attr); + + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto out; + posix_acl_release(acl); + acl = clone; + + error = posix_acl_chmod_masq(acl, attr->ia_mode); + if (!error) { + posix_acl_to_xattr(acl, data, len); + error = gfs2_ea_acl_chmod(ip, &el, attr, data); + } + +out: + posix_acl_release(acl); + brelse(el.el_bh); + kfree(data); + return error; +} + diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h new file mode 100644 index 00000000000..05c294fe0d7 --- /dev/null +++ b/fs/gfs2/acl.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License version 2. + */ + +#ifndef __ACL_DOT_H__ +#define __ACL_DOT_H__ + +#include "incore.h" + +#define GFS2_POSIX_ACL_ACCESS "posix_acl_access" +#define GFS2_POSIX_ACL_ACCESS_LEN 16 +#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" +#define GFS2_POSIX_ACL_DEFAULT_LEN 17 + +#define GFS2_ACL_IS_ACCESS(name, len) \ + ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \ + !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len))) + +#define GFS2_ACL_IS_DEFAULT(name, len) \ + ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \ + !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len))) + +struct gfs2_ea_request; + +int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, + struct gfs2_ea_request *er, + int *remove, mode_t *mode); +int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access); +int gfs2_check_acl_locked(struct inode *inode, int mask); +int gfs2_check_acl(struct inode *inode, int mask); +int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip); +int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); + +#endif /* __ACL_DOT_H__ */ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c new file mode 100644 index 00000000000..cc57f2ecd21 --- /dev/null +++ b/fs/gfs2/bmap.c @@ -0,0 +1,1221 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License version 2. + */ + +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/completion.h> +#include <linux/buffer_head.h> +#include <linux/gfs2_ondisk.h> +#include <linux/crc32.h> +#include <linux/lm_interface.h> + +#include "gfs2.h" +#include "incore.h" +#include "bmap.h" +#include "glock.h" +#include "inode.h" +#include "meta_io.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" +#include "dir.h" +#include "util.h" +#include "ops_address.h" + +/* This doesn't need to be that large as max 64 bit pointers in a 4k + * block is 512, so __u16 is fine for that. It saves stack space to + * keep it small. + */ +struct metapath { + __u16 mp_list[GFS2_MAX_META_HEIGHT]; +}; + +typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh, + struct buffer_head *bh, u64 *top, + u64 *bottom, unsigned int height, + void *data); + +struct strip_mine { + int sm_first; + unsigned int sm_height; +}; + +/** + * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page + * @ip: the inode + * @dibh: the dinode buffer + * @block: the block number that was allocated + * @private: any locked page held by the caller process + * + * Returns: errno + */ + +static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, + u64 block, struct page *page) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct inode *inode = &ip->i_inode; + struct buffer_head *bh; + int release = 0; + + if (!page || page->index) { + page = grab_cache_page(inode->i_mapping, 0); + if (!page) + return -ENOMEM; + release = 1; + } + + if (!PageUptodate(page)) { + void *kaddr = kmap(page); + + memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), + ip->i_di.di_size); + memset(kaddr + ip->i_di.di_size, 0, + PAGE_CACHE_SIZE - ip->i_di.di_size); + kunmap(page); + + SetPageUptodate(page); + } + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, + (1 << BH_Uptodate)); + + bh = page_buffers(page); + + if (!buffer_mapped(bh)) + map_bh(bh, inode->i_sb, block); + + set_buffer_uptodate(bh); + if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) + gfs2_trans_add_bh(ip->i_gl, bh, 0); + mark_buffer_dirty(bh); + + if (release) { + unlock_page(page); + page_cache_release(page); + } + + return 0; +} + +/** + * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big + * @ip: The GFS2 inode to unstuff + * @unstuffer: the routine that handles unstuffing a non-zero length file + * @private: private data for the unstuffer + * + * This routine unstuffs a dinode and returns it to a "normal" state such + * that the height can be grown in the traditional way. + * + * Returns: errno + */ + +int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) +{ + struct buffer_head *bh, *dibh; + struct gfs2_dinode *di; + u64 block = 0; + int isdir = gfs2_is_dir(ip); + int error; + + down_write(&ip->i_rw_mutex); + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out; + + if (ip->i_di.di_size) { + /* Get a free block, fill it with the stuffed data, + and write it out to disk */ + + if (isdir) { + block = gfs2_alloc_meta(ip); + + error = gfs2_dir_get_new_buffer(ip, block, &bh); + if (error) + goto out_brelse; + gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header), + dibh, sizeof(struct gfs2_dinode)); + brelse(bh); + } else { + block = gfs2_alloc_data(ip); + + error = gfs2_unstuffer_page(ip, dibh, block, page); + if (error) + goto out_brelse; + } + } + + /* Set up the pointer to the new block */ + + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + di = (struct gfs2_dinode *)dibh->b_data; + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + + if (ip->i_di.di_size) { + *(__be64 *)(di + 1) = cpu_to_be64(block); + ip->i_di.di_blocks++; + di->di_blocks = cpu_to_be64(ip->i_di.di_blocks); + } + + ip->i_di.di_height = 1; + di->di_height = cpu_to_be16(1); + +out_brelse: + brelse(dibh); +out: + up_write(&ip->i_rw_mutex); + return error; +} + +/** + * calc_tree_height - Calculate the height of a metadata tree + * @ip: The GFS2 inode + * @size: The proposed size of the file + * + * Work out how tall a metadata tree needs to be in order to accommodate a + * file of a particular size. If size is less than the current size of + * the inode, then the current size of the inode is used instead of the + * supplied one. + * + * Returns: the height the tree should be + */ + +static unsigned int calc_tree_height(struct gfs2_inode *ip, u64 size) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + u64 *arr; + u |