diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-16 10:52:55 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-16 10:52:55 -0700 |
commit | add096909da63ef32d6766f6771c07c9f16c6ee5 (patch) | |
tree | 58594bcf68cbb6f777d5270d098ab8ca69cbaee3 /fs | |
parent | e245befce7af0a1e1347079ed62695b059594bd4 (diff) | |
parent | 54c57dc3b6578356c0a428c767d4bf080254a2ee (diff) |
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (32 commits)
[PATCH] ocfs2: zero_user_page conversion
ocfs2: Support xfs style space reservation ioctls
ocfs2: support for removing file regions
ocfs2: update truncate handling of partial clusters
ocfs2: btree support for removal of arbirtrary extents
ocfs2: Support creation of unwritten extents
ocfs2: support writing of unwritten extents
ocfs2: small cleanup of ocfs2_write_begin_nolock()
ocfs2: btree changes for unwritten extents
ocfs2: abstract btree growing calls
ocfs2: use all extent block suballocators
ocfs2: plug truncate into cached dealloc routines
ocfs2: simplify deallocation locking
ocfs2: harden buffer check during mapping of page blocks
ocfs2: shared writeable mmap
ocfs2: factor out write aops into nolock variants
ocfs2: rework ocfs2_buffered_write_cluster()
ocfs2: take ip_alloc_sem during entire truncate
ocfs2: Add "preferred slot" mount option
[KJ PATCH] Replacing memset(<addr>,0,PAGE_SIZE) with clear_page() in fs/ocfs2/dlm/dlmrecovery.c
...
Diffstat (limited to 'fs')
36 files changed, 4548 insertions, 1036 deletions
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index 7b48c034b31..3b0185fdf9a 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -29,10 +29,11 @@ struct configfs_dirent { atomic_t s_count; + int s_dependent_count; struct list_head s_sibling; struct list_head s_children; struct list_head s_links; - void * s_element; + void * s_element; int s_type; umode_t s_mode; struct dentry * s_dentry; @@ -41,8 +42,8 @@ struct configfs_dirent { #define CONFIGFS_ROOT 0x0001 #define CONFIGFS_DIR 0x0002 -#define CONFIGFS_ITEM_ATTR 0x0004 -#define CONFIGFS_ITEM_LINK 0x0020 +#define CONFIGFS_ITEM_ATTR 0x0004 +#define CONFIGFS_ITEM_LINK 0x0020 #define CONFIGFS_USET_DIR 0x0040 #define CONFIGFS_USET_DEFAULT 0x0080 #define CONFIGFS_USET_DROPPING 0x0100 diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 5e6e37e58f3..2f436d4f1d6 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -355,6 +355,10 @@ static int configfs_detach_prep(struct dentry *dentry) /* Mark that we've taken i_mutex */ sd->s_type |= CONFIGFS_USET_DROPPING; + /* + * Yup, recursive. If there's a problem, blame + * deep nesting of default_groups + */ ret = configfs_detach_prep(sd->s_dentry); if (!ret) continue; @@ -562,7 +566,7 @@ static int populate_groups(struct config_group *group) /* * All of link_obj/unlink_obj/link_group/unlink_group require that - * subsys->su_sem is held. + * subsys->su_mutex is held. */ static void unlink_obj(struct config_item *item) @@ -714,6 +718,28 @@ static void configfs_detach_group(struct config_item *item) } /* + * After the item has been detached from the filesystem view, we are + * ready to tear it out of the hierarchy. Notify the client before + * we do that so they can perform any cleanup that requires + * navigating the hierarchy. A client does not need to provide this + * callback. The subsystem semaphore MUST be held by the caller, and + * references must be valid for both items. It also assumes the + * caller has validated ci_type. + */ +static void client_disconnect_notify(struct config_item *parent_item, + struct config_item *item) +{ + struct config_item_type *type; + + type = parent_item->ci_type; + BUG_ON(!type); + + if (type->ct_group_ops && type->ct_group_ops->disconnect_notify) + type->ct_group_ops->disconnect_notify(to_config_group(parent_item), + item); +} + +/* * Drop the initial reference from make_item()/make_group() * This function assumes that reference is held on item * and that item holds a valid reference to the parent. Also, it @@ -733,11 +759,244 @@ static void client_drop_item(struct config_item *parent_item, */ if (type->ct_group_ops && type->ct_group_ops->drop_item) type->ct_group_ops->drop_item(to_config_group(parent_item), - item); + item); else config_item_put(item); } +#ifdef DEBUG +static void configfs_dump_one(struct configfs_dirent *sd, int level) +{ + printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd)); + +#define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type); + type_print(CONFIGFS_ROOT); + type_print(CONFIGFS_DIR); + type_print(CONFIGFS_ITEM_ATTR); + type_print(CONFIGFS_ITEM_LINK); + type_print(CONFIGFS_USET_DIR); + type_print(CONFIGFS_USET_DEFAULT); + type_print(CONFIGFS_USET_DROPPING); +#undef type_print +} + +static int configfs_dump(struct configfs_dirent *sd, int level) +{ + struct configfs_dirent *child_sd; + int ret = 0; + + configfs_dump_one(sd, level); + + if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT))) + return 0; + + list_for_each_entry(child_sd, &sd->s_children, s_sibling) { + ret = configfs_dump(child_sd, level + 2); + if (ret) + break; + } + + return ret; +} +#endif + + +/* + * configfs_depend_item() and configfs_undepend_item() + * + * WARNING: Do not call these from a configfs callback! + * + * This describes these functions and their helpers. + * + * Allow another kernel system to depend on a config_item. If this + * happens, the item cannot go away until the dependant can live without + * it. The idea is to give client modules as simple an interface as + * possible. When a system asks them to depend on an item, they just + * call configfs_depend_item(). If the item is live and the client + * driver is in good shape, we'll happily do the work for them. + * + * Why is the locking complex? Because configfs uses the VFS to handle + * all locking, but this function is called outside the normal + * VFS->configfs path. So it must take VFS locks to prevent the + * VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc). This is + * why you can't call these functions underneath configfs callbacks. + * + * Note, btw, that this can be called at *any* time, even when a configfs + * subsystem isn't registered, or when configfs is loading or unloading. + * Just like configfs_register_subsystem(). So we take the same + * precautions. We pin the filesystem. We lock each i_mutex _in_order_ + * on our way down the tree. If we can find the target item in the + * configfs tree, it must be part of the subsystem tree as well, so we + * do not need the subsystem semaphore. Holding the i_mutex chain locks + * out mkdir() and rmdir(), who might be racing us. + */ + +/* + * configfs_depend_prep() + * + * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are + * attributes. This is similar but not the same to configfs_detach_prep(). + * Note that configfs_detach_prep() expects the parent to be locked when it + * is called, but we lock the parent *inside* configfs_depend_prep(). We + * do that so we can unlock it if we find nothing. + * + * Here we do a depth-first search of the dentry hierarchy looking for + * our object. We take i_mutex on each step of the way down. IT IS + * ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch, + * we'll drop the i_mutex. + * + * If the target is not found, -ENOENT is bubbled up and we have released + * all locks. If the target was found, the locks will be cleared by + * configfs_depend_rollback(). + * + * This adds a requirement that all config_items be unique! + * + * This is recursive because the locking traversal is tricky. There isn't + * much on the stack, though, so folks that need this function - be careful + * about your stack! Patches will be accepted to make it iterative. + */ +static int configfs_depend_prep(struct dentry *origin, + struct config_item *target) +{ + struct configfs_dirent *child_sd, *sd = origin->d_fsdata; + int ret = 0; + + BUG_ON(!origin || !sd); + + /* Lock this guy on the way down */ + mutex_lock(&sd->s_dentry->d_inode->i_mutex); + if (sd->s_element == target) /* Boo-yah */ + goto out; + + list_for_each_entry(child_sd, &sd->s_children, s_sibling) { + if (child_sd->s_type & CONFIGFS_DIR) { + ret = configfs_depend_prep(child_sd->s_dentry, + target); + if (!ret) + goto out; /* Child path boo-yah */ + } + } + + /* We looped all our children and didn't find target */ + mutex_unlock(&sd->s_dentry->d_inode->i_mutex); + ret = -ENOENT; + +out: + return ret; +} + +/* + * This is ONLY called if configfs_depend_prep() did its job. So we can + * trust the entire path from item back up to origin. + * + * We walk backwards from item, unlocking each i_mutex. We finish by + * unlocking origin. + */ +static void configfs_depend_rollback(struct dentry *origin, + struct config_item *item) +{ + struct dentry *dentry = item->ci_dentry; + + while (dentry != origin) { + mutex_unlock(&dentry->d_inode->i_mutex); + dentry = dentry->d_parent; + } + + mutex_unlock(&origin->d_inode->i_mutex); +} + +int configfs_depend_item(struct configfs_subsystem *subsys, + struct config_item *target) +{ + int ret; + struct configfs_dirent *p, *root_sd, *subsys_sd = NULL; + struct config_item *s_item = &subsys->su_group.cg_item; + + /* + * Pin the configfs filesystem. This means we can safely access + * the root of the configfs filesystem. + */ + ret = configfs_pin_fs(); + if (ret) + return ret; + + /* + * Next, lock the root directory. We're going to check that the + * subsystem is really registered, and so we need to lock out + * configfs_[un]register_subsystem(). + */ + mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); + + root_sd = configfs_sb->s_root->d_fsdata; + + list_for_each_entry(p, &root_sd->s_children, s_sibling) { + if (p->s_type & CONFIGFS_DIR) { + if (p->s_element == s_item) { + subsys_sd = p; + break; + } + } + } + + if (!subsys_sd) { + ret = -ENOENT; + goto out_unlock_fs; + } + + /* Ok, now we can trust subsys/s_item */ + + /* Scan the tree, locking i_mutex recursively, return 0 if found */ + ret = configfs_depend_prep(subsys_sd->s_dentry, target); + if (ret) + goto out_unlock_fs; + + /* We hold all i_mutexes from the subsystem down to the target */ + p = target->ci_dentry->d_fsdata; + p->s_dependent_count += 1; + + configfs_depend_rollback(subsys_sd->s_dentry, target); + +out_unlock_fs: + mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); + + /* + * If we succeeded, the fs is pinned via other methods. If not, + * we're done with it anyway. So release_fs() is always right. + */ + configfs_release_fs(); + + return ret; +} +EXPORT_SYMBOL(configfs_depend_item); + +/* + * Release the dependent linkage. This is much simpler than + * configfs_depend_item() because we know that that the client driver is + * pinned, thus the subsystem is pinned, and therefore configfs is pinned. + */ +void configfs_undepend_item(struct configfs_subsystem *subsys, + struct config_item *target) +{ + struct configfs_dirent *sd; + + /* + * Since we can trust everything is pinned, we just need i_mutex + * on the item. + */ + mutex_lock(&target->ci_dentry->d_inode->i_mutex); + + sd = target->ci_dentry->d_fsdata; + BUG_ON(sd->s_dependent_count < 1); + + sd->s_dependent_count -= 1; + + /* + * After this unlock, we cannot trust the item to stay alive! + * DO NOT REFERENCE item after this unlock. + */ + mutex_unlock(&target->ci_dentry->d_inode->i_mutex); +} +EXPORT_SYMBOL(configfs_undepend_item); static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { @@ -783,7 +1042,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); - down(&subsys->su_sem); + mutex_lock(&subsys->su_mutex); group = NULL; item = NULL; if (type->ct_group_ops->make_group) { @@ -797,7 +1056,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (item) link_obj(parent_item, item); } - up(&subsys->su_sem); + mutex_unlock(&subsys->su_mutex); kfree(name); if (!item) { @@ -841,13 +1100,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_unlink: if (ret) { /* Tear down everything we built up */ - down(&subsys->su_sem); + mutex_lock(&subsys->su_mutex); + + client_disconnect_notify(parent_item, item); if (group) unlink_group(group); else unlink_obj(item); client_drop_item(parent_item, item); - up(&subsys->su_sem); + + mutex_unlock(&subsys->su_mutex); if (module_got) module_put(owner); @@ -881,6 +1143,13 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) if (sd->s_type & CONFIGFS_USET_DEFAULT) return -EPERM; + /* + * Here's where we check for dependents. We're protected by + * i_mutex. + */ + if (sd->s_dependent_count) + return -EBUSY; + /* Get a working ref until we have the child */ parent_item = configfs_get_config_item(dentry->d_parent); subsys = to_config_group(parent_item)->cg_subsys; @@ -910,17 +1179,19 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) if (sd->s_type & CONFIGFS_USET_DIR) { configfs_detach_group(item); - down(&subsys->su_sem); + mutex_lock(&subsys->su_mutex); + client_disconnect_notify(parent_item, item); unlink_group(to_config_group(item)); } else { configfs_detach_item(item); - down(&subsys->su_sem); + mutex_lock(&subsys->su_mutex); + client_disconnect_notify(parent_item, item); unlink_obj(item); } client_drop_item(parent_item, item); - up(&subsys->su_sem); + mutex_unlock(&subsys->su_mutex); /* Drop our reference from above */ config_item_put(item); diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 3527c7c6def..a3658f9a082 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -27,19 +27,26 @@ #include <linux/fs.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/mutex.h> #include <asm/uaccess.h> -#include <asm/semaphore.h> #include <linux/configfs.h> #include "configfs_internal.h" +/* + * A simple attribute can only be 4096 characters. Why 4k? Because the + * original code limited it to PAGE_SIZE. That's a bad idea, though, + * because an attribute of 16k on ia64 won't work on x86. So we limit to + * 4k, our minimum common page size. + */ +#define SIMPLE_ATTR_SIZE 4096 struct configfs_buffer { size_t count; loff_t pos; char * page; struct configfs_item_operations * ops; - struct semaphore sem; + struct mutex mutex; int needs_read_fill; }; @@ -69,7 +76,7 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf count = ops->show_attribute(item,attr,buffer->page); buffer->needs_read_fill = 0; - BUG_ON(count > (ssize_t)PAGE_SIZE); + BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE); if (count >= 0) buffer->count = count; else @@ -102,7 +109,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp struct configfs_buffer * buffer = file->private_data; ssize_t retval = 0; - down(&buffer->sem); + mutex_lock(&buffer->mutex); if (buffer->needs_read_fill) { if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) goto out; @@ -112,7 +119,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp retval = simple_read_from_buffer(buf, count, ppos, buffer->page, buffer->count); out: - up(&buffer->sem); + mutex_unlock(&buffer->mutex); return retval; } @@ -137,8 +144,8 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size if (!buffer->page) return -ENOMEM; - if (count >= PAGE_SIZE) - count = PAGE_SIZE - 1; + if (count >= SIMPLE_ATTR_SIZE) + count = SIMPLE_ATTR_SIZE - 1; error = copy_from_user(buffer->page,buf,count); buffer->needs_read_fill = 1; /* if buf is assumed to contain a string, terminate it by \0, @@ -193,13 +200,13 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof struct configfs_buffer * buffer = file->private_data; ssize_t len; - down(&buffer->sem); + mutex_lock(&buffer->mutex); len = fill_write_buffer(buffer, buf, count); if (len > 0) len = flush_write_buffer(file->f_path.dentry, buffer, count); if (len > 0) *ppos += len; - up(&buffer->sem); + mutex_unlock(&buffer->mutex); return len; } @@ -253,7 +260,7 @@ static int check_perm(struct inode * inode, struct file * file) error = -ENOMEM; goto Enomem; } - init_MUTEX(&buffer->sem); + mutex_init(&buffer->mutex); buffer->needs_read_fill = 1; buffer->ops = ops; file->private_data = buffer; @@ -292,6 +299,7 @@ static int configfs_release(struct inode * inode, struct file * filp) if (buffer) { if (buffer->page) free_page((unsigned long)buffer->page); + mutex_destroy(&buffer->mutex); kfree(buffer); } return 0; diff --git a/fs/configfs/item.c b/fs/configfs/item.c index 24421209f85..76dc4c3e5d5 100644 --- a/fs/configfs/item.c +++ b/fs/configfs/item.c @@ -62,7 +62,6 @@ void config_item_init(struct config_item * item) * dynamically allocated string that @item->ci_name points to. * Otherwise, use the static @item->ci_namebuf array. */ - int config_item_set_name(struct config_item * item, const char * fmt, ...) { int error = 0; @@ -139,12 +138,7 @@ struct config_item * config_item_get(struct config_item * item) return item; } -/** - * config_item_cleanup - free config_item resources. - * @item: item. - */ - -void config_item_cleanup(struct config_item * item) +static void config_item_cleanup(struct config_item * item) { struct config_item_type * t = item->ci_type; struct config_group * s = item->ci_group; @@ -179,39 +173,35 @@ void config_item_put(struct config_item * item) kref_put(&item->ci_kref, config_item_release); } - /** * config_group_init - initialize a group for use * @k: group */ - void config_group_init(struct config_group *group) { config_item_init(&group->cg_item); INIT_LIST_HEAD(&group->cg_children); } - /** - * config_group_find_obj - search for item in group. + * config_group_find_item - search for item in group. * @group: group we're looking in. * @name: item's name. * - * Lock group via @group->cg_subsys, and iterate over @group->cg_list, - * looking for a matching config_item. If matching item is found - * take a reference and return the item. + * Iterate over @group->cg_list, looking for a matching config_item. + * If matching item is found take a reference and return the item. + * Caller must have locked group via @group->cg_subsys->su_mtx. */ - -struct config_item * config_group_find_obj(struct config_group * group, const char * name) +struct config_item *config_group_find_item(struct config_group *group, + const char *name) { struct list_head * entry; struct config_item * ret = NULL; - /* XXX LOCKING! */ list_for_each(entry,&group->cg_children) { struct config_item * item = to_item(entry); if (config_item_name(item) && - !strcmp(config_item_name(item), name)) { + !strcmp(config_item_name(item), name)) { ret = config_item_get(item); break; } @@ -219,9 +209,8 @@ struct config_item * config_group_find_obj(struct config_group * group, const ch return ret; } - EXPORT_SYMBOL(config_item_init); EXPORT_SYMBOL(config_group_init); EXPORT_SYMBOL(config_item_get); EXPORT_SYMBOL(config_item_put); -EXPORT_SYMBOL(config_group_find_obj); +EXPORT_SYMBOL(config_group_find_item); diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 5069b2cb5a1..2f8e3c81bc1 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -133,14 +133,6 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, return len; } -#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \ - .attr = { .ca_name = __stringify(_name), \ - .ca_mode = _mode, \ - .ca_owner = THIS_MODULE }, \ - .show = _read, \ - .store = _write, \ -} - #define CLUSTER_ATTR(name, check_zero) \ static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ { \ @@ -615,7 +607,7 @@ static struct clusters clusters_root = { int dlm_config_init(void) { config_group_init(&clusters_root.subsys.su_group); - init_MUTEX(&clusters_root.subsys.su_sem); + mutex_init(&clusters_root.subsys.su_mutex); return configfs_register_subsystem(&clusters_root.subsys); } @@ -759,9 +751,9 @@ static struct space *get_space(char *name) if (!space_list) return NULL; - down(&space_list->cg_subsys->su_sem); - i = config_group_find_obj(space_list, name); - up(&space_list->cg_subsys->su_sem); + mutex_lock(&space_list->cg_subsys->su_mutex); + i = config_group_find_item(space_list, name); + mutex_unlock(&space_list->cg_subsys->su_mutex); return to_space(i); } @@ -780,7 +772,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) if (!comm_list) return NULL; - down(&clusters_root.subsys.su_sem); + mutex_lock(&clusters_root.subsys.su_mutex); list_for_each_entry(i, &comm_list->cg_children, ci_entry) { cm = to_comm(i); @@ -800,7 +792,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) break; } } - up(&clusters_root.subsys.su_sem); + mutex_unlock(&clusters_root.subsys.su_mutex); if (!found) cm = NULL; diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 19712a7d145..f5e11f4fa95 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -50,6 +50,8 @@ #include "buffer_head_io.h" static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); +static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, + struct ocfs2_extent_block *eb); /* * Structures which describe a path through a btree, and functions to @@ -117,6 +119,31 @@ static void ocfs2_free_path(struct ocfs2_path *path) } /* + * All the elements of src into dest. After this call, src could be freed + * without affecting dest. + * + * Both paths should have the same root. Any non-root elements of dest + * will be freed. + */ +static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src) +{ + int i; + + BUG_ON(path_root_bh(dest) != path_root_bh(src)); + BUG_ON(path_root_el(dest) != path_root_el(src)); + + ocfs2_reinit_path(dest, 1); + + for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) { + dest->p_node[i].bh = src->p_node[i].bh; + dest->p_node[i].el = src->p_node[i].el; + + if (dest->p_node[i].bh) + get_bh(dest->p_node[i].bh); + } +} + +/* * Make the *dest path the same as src and re-initialize src path to * have a root only. */ @@ -212,10 +239,41 @@ out: return ret; } +/* + * Return the index of the extent record which contains cluster #v_cluster. + * -1 is returned if it was not found. + * + * Should work fine on interior and exterior nodes. + */ +int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster) +{ + int ret = -1; + int i; + struct ocfs2_extent_rec *rec; + u32 rec_end, rec_start, clusters; + + for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { + rec = &el->l_recs[i]; + + rec_start = le32_to_cpu(rec->e_cpos); + clusters = ocfs2_rec_clusters(el, rec); + + rec_end = rec_start + clusters; + + if (v_cluster >= rec_start && v_cluster < rec_end) { + ret = i; + break; + } + } + + return ret; +} + enum ocfs2_contig_type { CONTIG_NONE = 0, CONTIG_LEFT, - CONTIG_RIGHT + CONTIG_RIGHT, + CONTIG_LEFTRIGHT, }; @@ -253,6 +311,14 @@ static enum ocfs2_contig_type { u64 blkno = le64_to_cpu(insert_rec->e_blkno); + /* + * Refuse to coalesce extent records with different flag + * fields - we don't want to mix unwritten extents with user + * data. + */ + if (ext->e_flags != insert_rec->e_flags) + return CONTIG_NONE; + if (ocfs2_extents_adjacent(ext, insert_rec) && ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) return CONTIG_RIGHT; @@ -277,7 +343,14 @@ enum ocfs2_append_type { APPEND_TAIL, }; +enum ocfs2_split_type { + SPLIT_NONE = 0, + SPLIT_LEFT, + SPLIT_RIGHT, +}; + struct ocfs2_insert_type { + enum ocfs2_split_type ins_split; enum ocfs2_append_type ins_appending; enum ocfs2_contig_type ins_contig; int ins_contig_index; @@ -285,6 +358,13 @@ struct ocfs2_insert_type { int ins_tree_depth; }; +struct ocfs2_merge_ctxt { + enum ocfs2_contig_type c_contig_type; + int c_has_empty_extent; + int c_split_covers_rec; + int c_used_tail_recs; +}; + /* * How many free extents have we got before we need more meta data? */ @@ -384,13 +464,7 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); eb->h_blkno = cpu_to_le64(first_blkno); eb->h_fs_generation = cpu_to_le32(osb->fs_generation); - -#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS - /* we always use slot zero's suballocator */ - eb->h_suballoc_slot = 0; -#else eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); -#endif eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); eb->h_list.l_count = |