diff options
Diffstat (limited to 'fs/exofs/super.c')
| -rw-r--r-- | fs/exofs/super.c | 545 | 
1 files changed, 374 insertions, 171 deletions
diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 79c3ae6e045..ed73ed8ebbe 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -35,11 +35,14 @@  #include <linux/parser.h>  #include <linux/vfs.h>  #include <linux/random.h> +#include <linux/module.h>  #include <linux/exportfs.h>  #include <linux/slab.h>  #include "exofs.h" +#define EXOFS_DBGMSG2(M...) do {} while (0) +  /******************************************************************************   * MOUNT OPTIONS   *****************************************************************************/ @@ -48,6 +51,7 @@   * struct to hold what we get from mount options   */  struct exofs_mountopt { +	bool is_osdname;  	const char *dev_name;  	uint64_t pid;  	int timeout; @@ -56,7 +60,7 @@ struct exofs_mountopt {  /*   * exofs-specific mount-time options.   */ -enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err }; +enum { Opt_name, Opt_pid, Opt_to, Opt_err };  /*   * Our mount-time options.  These should ideally be 64-bit unsigned, but the @@ -64,6 +68,7 @@ enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err };   * sufficient for most applications now.   */  static match_table_t tokens = { +	{Opt_name, "osdname=%s"},  	{Opt_pid, "pid=%u"},  	{Opt_to, "to=%u"},  	{Opt_err, NULL} @@ -94,6 +99,14 @@ static int parse_options(char *options, struct exofs_mountopt *opts)  		token = match_token(p, tokens, args);  		switch (token) { +		case Opt_name: +			opts->dev_name = match_strdup(&args[0]); +			if (unlikely(!opts->dev_name)) { +				EXOFS_ERR("Error allocating dev_name"); +				return -ENOMEM; +			} +			opts->is_osdname = true; +			break;  		case Opt_pid:  			if (0 == match_strlcpy(str, &args[0], sizeof(str)))  				return -EINVAL; @@ -150,12 +163,18 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)  	return &oi->vfs_inode;  } +static void exofs_i_callback(struct rcu_head *head) +{ +	struct inode *inode = container_of(head, struct inode, i_rcu); +	kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); +} +  /*   * Remove an inode from the cache   */  static void exofs_destroy_inode(struct inode *inode)  { -	kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); +	call_rcu(&inode->i_rcu, exofs_i_callback);  }  /* @@ -187,10 +206,152 @@ static int init_inodecache(void)   */  static void destroy_inodecache(void)  { +	/* +	 * Make sure all delayed rcu free inodes are flushed before we +	 * destroy cache. +	 */ +	rcu_barrier();  	kmem_cache_destroy(exofs_inode_cachep);  }  /****************************************************************************** + * Some osd helpers + *****************************************************************************/ +void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj) +{ +	osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); +} + +static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, +		    u64 offset, void *p, unsigned length) +{ +	struct osd_request *or = osd_start_request(od, GFP_KERNEL); +/*	struct osd_sense_info osi = {.key = 0};*/ +	int ret; + +	if (unlikely(!or)) { +		EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__); +		return -ENOMEM; +	} +	ret = osd_req_read_kern(or, obj, offset, p, length); +	if (unlikely(ret)) { +		EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__); +		goto out; +	} + +	ret = osd_finalize_request(or, 0, cred, NULL); +	if (unlikely(ret)) { +		EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); +		goto out; +	} + +	ret = osd_execute_request(or); +	if (unlikely(ret)) +		EXOFS_DBGMSG("osd_execute_request() => %d\n", ret); +	/* osd_req_decode_sense(or, ret); */ + +out: +	osd_end_request(or); +	EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx " +		      "length=0x%llx dev=%p ret=>%d\n", +		      _LLU(obj->id), _LLU(offset), _LLU(length), od, ret); +	return ret; +} + +static const struct osd_attr g_attr_sb_stats = ATTR_DEF( +	EXOFS_APAGE_SB_DATA, +	EXOFS_ATTR_SB_STATS, +	sizeof(struct exofs_sb_stats)); + +static int __sbi_read_stats(struct exofs_sb_info *sbi) +{ +	struct osd_attr attrs[] = { +		[0] = g_attr_sb_stats, +	}; +	struct ore_io_state *ios; +	int ret; + +	ret = ore_get_io_state(&sbi->layout, &sbi->oc, &ios); +	if (unlikely(ret)) { +		EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); +		return ret; +	} + +	ios->in_attr = attrs; +	ios->in_attr_len = ARRAY_SIZE(attrs); + +	ret = ore_read(ios); +	if (unlikely(ret)) { +		EXOFS_ERR("Error reading super_block stats => %d\n", ret); +		goto out; +	} + +	ret = extract_attr_from_ios(ios, &attrs[0]); +	if (ret) { +		EXOFS_ERR("%s: extract_attr of sb_stats failed\n", __func__); +		goto out; +	} +	if (attrs[0].len) { +		struct exofs_sb_stats *ess; + +		if (unlikely(attrs[0].len != sizeof(*ess))) { +			EXOFS_ERR("%s: Wrong version of exofs_sb_stats " +				  "size(%d) != expected(%zd)\n", +				  __func__, attrs[0].len, sizeof(*ess)); +			goto out; +		} + +		ess = attrs[0].val_ptr; +		sbi->s_nextid = le64_to_cpu(ess->s_nextid); +		sbi->s_numfiles = le32_to_cpu(ess->s_numfiles); +	} + +out: +	ore_put_io_state(ios); +	return ret; +} + +static void stats_done(struct ore_io_state *ios, void *p) +{ +	ore_put_io_state(ios); +	/* Good thanks nothing to do anymore */ +} + +/* Asynchronously write the stats attribute */ +int exofs_sbi_write_stats(struct exofs_sb_info *sbi) +{ +	struct osd_attr attrs[] = { +		[0] = g_attr_sb_stats, +	}; +	struct ore_io_state *ios; +	int ret; + +	ret = ore_get_io_state(&sbi->layout, &sbi->oc, &ios); +	if (unlikely(ret)) { +		EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__); +		return ret; +	} + +	sbi->s_ess.s_nextid   = cpu_to_le64(sbi->s_nextid); +	sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles); +	attrs[0].val_ptr = &sbi->s_ess; + + +	ios->done = stats_done; +	ios->private = sbi; +	ios->out_attr = attrs; +	ios->out_attr_len = ARRAY_SIZE(attrs); + +	ret = ore_write(ios); +	if (unlikely(ret)) { +		EXOFS_ERR("%s: ore_write failed.\n", __func__); +		ore_put_io_state(ios); +	} + +	return ret; +} + +/******************************************************************************   * SUPERBLOCK FUNCTIONS   *****************************************************************************/  static const struct super_operations exofs_sops; @@ -199,60 +360,57 @@ static const struct export_operations exofs_export_ops;  /*   * Write the superblock to the OSD   */ -int exofs_sync_fs(struct super_block *sb, int wait) +static int exofs_sync_fs(struct super_block *sb, int wait)  {  	struct exofs_sb_info *sbi;  	struct exofs_fscb *fscb; -	struct exofs_io_state *ios; +	struct ore_comp one_comp; +	struct ore_components oc; +	struct ore_io_state *ios;  	int ret = -ENOMEM; -	lock_super(sb); +	fscb = kmalloc(sizeof(*fscb), GFP_KERNEL); +	if (unlikely(!fscb)) +		return -ENOMEM; +  	sbi = sb->s_fs_info; -	fscb = &sbi->s_fscb; -	ret = exofs_get_io_state(&sbi->layout, &ios); -	if (ret) +	/* NOTE: We no longer dirty the super_block anywhere in exofs. The +	 * reason we write the fscb here on unmount is so we can stay backwards +	 * compatible with fscb->s_version == 1. (What we are not compatible +	 * with is if a new version FS crashed and then we try to mount an old +	 * version). Otherwise the exofs_fscb is read-only from mkfs time. All +	 * the writeable info is set in exofs_sbi_write_stats() above. +	 */ + +	exofs_init_comps(&oc, &one_comp, sbi, EXOFS_SUPER_ID); + +	ret = ore_get_io_state(&sbi->layout, &oc, &ios); +	if (unlikely(ret))  		goto out; -	/* Note: We only write the changing part of the fscb. .i.e upto the -	 *       the fscb->s_dev_table_oid member. There is no read-modify-write -	 *       here. -	 */  	ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);  	memset(fscb, 0, ios->length);  	fscb->s_nextid = cpu_to_le64(sbi->s_nextid); -	fscb->s_numfiles = cpu_to_le32(sbi->s_numfiles); +	fscb->s_numfiles = cpu_to_le64(sbi->s_numfiles);  	fscb->s_magic = cpu_to_le16(sb->s_magic);  	fscb->s_newfs = 0;  	fscb->s_version = EXOFS_FSCB_VER; -	ios->obj.id = EXOFS_SUPER_ID;  	ios->offset = 0;  	ios->kern_buff = fscb; -	ios->cred = sbi->s_cred; -	ret = exofs_sbi_write(ios); -	if (unlikely(ret)) { -		EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); -		goto out; -	} -	sb->s_dirt = 0; +	ret = ore_write(ios); +	if (unlikely(ret)) +		EXOFS_ERR("%s: ore_write failed.\n", __func__);  out:  	EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); -	exofs_put_io_state(ios); -	unlock_super(sb); +	ore_put_io_state(ios); +	kfree(fscb);  	return ret;  } -static void exofs_write_super(struct super_block *sb) -{ -	if (!(sb->s_flags & MS_RDONLY)) -		exofs_sync_fs(sb, 1); -	else -		sb->s_dirt = 0; -} -  static void _exofs_print_device(const char *msg, const char *dev_path,  				struct osd_dev *od, u64 pid)  { @@ -262,17 +420,20 @@ static void _exofs_print_device(const char *msg, const char *dev_path,  		msg, dev_path ?: "", odi->osdname, _LLU(pid));  } -void exofs_free_sbi(struct exofs_sb_info *sbi) +static void exofs_free_sbi(struct exofs_sb_info *sbi)  { -	while (sbi->layout.s_numdevs) { -		int i = --sbi->layout.s_numdevs; -		struct osd_dev *od = sbi->layout.s_ods[i]; +	unsigned numdevs = sbi->oc.numdevs; + +	while (numdevs) { +		unsigned i = --numdevs; +		struct osd_dev *od = ore_comp_dev(&sbi->oc, i);  		if (od) { -			sbi->layout.s_ods[i] = NULL; +			ore_comp_set_dev(&sbi->oc, i, NULL);  			osduld_put_device(od);  		}  	} +	kfree(sbi->oc.ods);  	kfree(sbi);  } @@ -285,22 +446,24 @@ static void exofs_put_super(struct super_block *sb)  	int num_pend;  	struct exofs_sb_info *sbi = sb->s_fs_info; -	if (sb->s_dirt) -		exofs_write_super(sb); -  	/* make sure there are no pending commands */  	for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;  	     num_pend = atomic_read(&sbi->s_curr_pending)) {  		wait_queue_head_t wq; + +		printk(KERN_NOTICE "%s: !!Pending operations in flight. " +		       "This is a BUG. please report to osd-dev@open-osd.org\n", +		       __func__);  		init_waitqueue_head(&wq);  		wait_event_timeout(wq,  				  (atomic_read(&sbi->s_curr_pending) == 0),  				  msecs_to_jiffies(100));  	} -	_exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0], -			    sbi->layout.s_pid); +	_exofs_print_device("Unmounting", NULL, ore_comp_dev(&sbi->oc, 0), +			    sbi->one_comp.obj.partition); +	exofs_sysfs_sb_del(sbi);  	bdi_destroy(&sbi->bdi);  	exofs_free_sbi(sbi);  	sb->s_fs_info = NULL; @@ -309,78 +472,48 @@ static void exofs_put_super(struct super_block *sb)  static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,  				    struct exofs_device_table *dt)  { -	u64 stripe_length; +	int ret; -	sbi->data_map.odm_num_comps   = -				le32_to_cpu(dt->dt_data_map.cb_num_comps); -	sbi->data_map.odm_stripe_unit = +	sbi->layout.stripe_unit =  				le64_to_cpu(dt->dt_data_map.cb_stripe_unit); -	sbi->data_map.odm_group_width = +	sbi->layout.group_width =  				le32_to_cpu(dt->dt_data_map.cb_group_width); -	sbi->data_map.odm_group_depth = +	sbi->layout.group_depth =  				le32_to_cpu(dt->dt_data_map.cb_group_depth); -	sbi->data_map.odm_mirror_cnt  = -				le32_to_cpu(dt->dt_data_map.cb_mirror_cnt); -	sbi->data_map.odm_raid_algorithm  = +	sbi->layout.mirrors_p1  = +				le32_to_cpu(dt->dt_data_map.cb_mirror_cnt) + 1; +	sbi->layout.raid_algorithm  =  				le32_to_cpu(dt->dt_data_map.cb_raid_algorithm); -/* FIXME: Only raid0 for now. if not so, do not mount */ -	if (sbi->data_map.odm_num_comps != numdevs) { -		EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n", -			  sbi->data_map.odm_num_comps, numdevs); -		return -EINVAL; -	} -	if (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) { -		EXOFS_ERR("Only RAID_0 for now\n"); -		return -EINVAL; -	} -	if (0 != (numdevs % (sbi->data_map.odm_mirror_cnt + 1))) { -		EXOFS_ERR("Data Map wrong, numdevs=%d mirrors=%d\n", -			  numdevs, sbi->data_map.odm_mirror_cnt); -		return -EINVAL; -	} +	ret = ore_verify_layout(numdevs, &sbi->layout); + +	EXOFS_DBGMSG("exofs: layout: " +		"num_comps=%u stripe_unit=0x%x group_width=%u " +		"group_depth=0x%llx mirrors_p1=%u raid_algorithm=%u\n", +		numdevs, +		sbi->layout.stripe_unit, +		sbi->layout.group_width, +		_LLU(sbi->layout.group_depth), +		sbi->layout.mirrors_p1, +		sbi->layout.raid_algorithm); +	return ret; +} -	if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) { -		EXOFS_ERR("Stripe Unit(0x%llx)" -			  " must be Multples of PAGE_SIZE(0x%lx)\n", -			  _LLU(sbi->data_map.odm_stripe_unit), PAGE_SIZE); -		return -EINVAL; -	} +static unsigned __ra_pages(struct ore_layout *layout) +{ +	const unsigned _MIN_RA = 32; /* min 128K read-ahead */ +	unsigned ra_pages = layout->group_width * layout->stripe_unit / +				PAGE_SIZE; +	unsigned max_io_pages = exofs_max_io_pages(layout, ~0); -	sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit; -	sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1; +	ra_pages *= 2; /* two stripes */ +	if (ra_pages < _MIN_RA) +		ra_pages = roundup(_MIN_RA, ra_pages / 2); -	if (sbi->data_map.odm_group_width) { -		sbi->layout.group_width = sbi->data_map.odm_group_width; -		sbi->layout.group_depth = sbi->data_map.odm_group_depth; -		if (!sbi->layout.group_depth) { -			EXOFS_ERR("group_depth == 0 && group_width != 0\n"); -			return -EINVAL; -		} -		sbi->layout.group_count = sbi->data_map.odm_num_comps / -						sbi->layout.mirrors_p1 / -						sbi->data_map.odm_group_width; -	} else { -		if (sbi->data_map.odm_group_depth) { -			printk(KERN_NOTICE "Warning: group_depth ignored " -				"group_width == 0 && group_depth == %d\n", -				sbi->data_map.odm_group_depth); -			sbi->data_map.odm_group_depth = 0; -		} -		sbi->layout.group_width = sbi->data_map.odm_num_comps / -							sbi->layout.mirrors_p1; -		sbi->layout.group_depth = -1; -		sbi->layout.group_count = 1; -	} - -	stripe_length = (u64)sbi->layout.group_width * sbi->layout.stripe_unit; -	if (stripe_length >= (1ULL << 32)) { -		EXOFS_ERR("Total Stripe length(0x%llx)" -			  " >= 32bit is not supported\n", _LLU(stripe_length)); -		return -EINVAL; -	} +	if (ra_pages > max_io_pages) +		ra_pages = max_io_pages; -	return 0; +	return ra_pages;  }  /* @odi is valid only as long as @fscb_dev is valid */ @@ -388,7 +521,8 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,  			     struct osd_dev_info *odi)  {  	odi->systemid_len = le32_to_cpu(dt_dev->systemid_len); -	memcpy(odi->systemid, dt_dev->systemid, odi->systemid_len); +	if (likely(odi->systemid_len)) +		memcpy(odi->systemid, dt_dev->systemid, OSD_SYSTEMID_LEN);  	odi->osdname_len = le32_to_cpu(dt_dev->osdname_len);  	odi->osdname = dt_dev->osdname; @@ -409,14 +543,40 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,  	return !(odi->systemid_len || odi->osdname_len);  } -static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi, +static int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs, +		      struct exofs_dev **peds) +{ +	struct __alloc_ore_devs_and_exofs_devs { +		/* Twice bigger table: See exofs_init_comps() and comment at +		 * exofs_read_lookup_dev_table() +		 */ +		struct ore_dev *oreds[numdevs * 2 - 1]; +		struct exofs_dev eds[numdevs]; +	} *aoded; +	struct exofs_dev *eds; +	unsigned i; + +	aoded = kzalloc(sizeof(*aoded), GFP_KERNEL); +	if (unlikely(!aoded)) { +		EXOFS_ERR("ERROR: failed allocating Device array[%d]\n", +			  numdevs); +		return -ENOMEM; +	} + +	sbi->oc.ods = aoded->oreds; +	*peds = eds = aoded->eds; +	for (i = 0; i < numdevs; ++i) +		aoded->oreds[i] = &eds[i].ored; +	return 0; +} + +static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi, +				       struct osd_dev *fscb_od,  				       unsigned table_count)  { -	struct exofs_sb_info *sbi = *psbi; -	struct osd_dev *fscb_od; -	struct osd_obj_id obj = {.partition = sbi->layout.s_pid, -				 .id = EXOFS_DEVTABLE_ID}; +	struct ore_comp comp;  	struct exofs_device_table *dt; +	struct exofs_dev *eds;  	unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +  					     sizeof(*dt);  	unsigned numdevs, i; @@ -429,10 +589,14 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,  		return -ENOMEM;  	} -	fscb_od = sbi->layout.s_ods[0]; -	sbi->layout.s_ods[0] = NULL; -	sbi->layout.s_numdevs = 0; -	ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes); +	sbi->oc.numdevs = 0; + +	comp.obj.partition = sbi->one_comp.obj.partition; +	comp.obj.id = EXOFS_DEVTABLE_ID; +	exofs_make_credential(comp.cred, &comp.obj); + +	ret = exofs_read_kern(fscb_od, comp.cred, &comp.obj, 0, dt, +			      table_bytes);  	if (unlikely(ret)) {  		EXOFS_ERR("ERROR: reading device table\n");  		goto out; @@ -449,18 +613,22 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,  	if (unlikely(ret))  		goto out; -	if (likely(numdevs > 1)) { -		unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]); +	ret = __alloc_dev_table(sbi, numdevs, &eds); +	if (unlikely(ret)) +		goto out; +	/* exofs round-robins the device table view according to inode +	 * number. We hold a: twice bigger table hence inodes can point +	 * to any device and have a sequential view of the table +	 * starting at this device. See exofs_init_comps() +	 */ +	memcpy(&sbi->oc.ods[numdevs], &sbi->oc.ods[0], +		(numdevs - 1) * sizeof(sbi->oc.ods[0])); -		sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL); -		if (unlikely(!sbi)) { -			ret = -ENOMEM; -			goto out; -		} -		memset(&sbi->layout.s_ods[1], 0, -		       size - sizeof(sbi->layout.s_ods[0])); -		*psbi = sbi; -	} +	/* create sysfs subdir under which we put the device table +	 * And cluster layout. A Superblock is identified by the string: +	 *	"dev[0].osdname"_"pid" +	 */ +	exofs_sysfs_sb_add(sbi, &dt->dt_dev_table[0]);  	for (i = 0; i < numdevs; i++) {  		struct exofs_fscb fscb; @@ -476,32 +644,36 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,  		printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n",  		       i, odi.osdname); +		/* the exofs id is currently the table index */ +		eds[i].did = i; +  		/* On all devices the device table is identical. The user can  		 * specify any one of the participating devices on the command  		 * line. We always keep them in device-table order.  		 */  		if (fscb_od && osduld_device_same(fscb_od, &odi)) { -			sbi->layout.s_ods[i] = fscb_od; -			++sbi->layout.s_numdevs; +			eds[i].ored.od = fscb_od; +			++sbi->oc.numdevs;  			fscb_od = NULL; +			exofs_sysfs_odev_add(&eds[i], sbi);  			continue;  		}  		od = osduld_info_lookup(&odi); -		if (unlikely(IS_ERR(od))) { +		if (IS_ERR(od)) {  			ret = PTR_ERR(od);  			EXOFS_ERR("ERROR: device requested is not found "  				  "osd_name-%s =>%d\n", odi.osdname, ret);  			goto out;  		} -		sbi->layout.s_ods[i] = od; -		++sbi->layout.s_numdevs; +		eds[i].ored.od = od; +		++sbi->oc.numdevs;  		/* Read the fscb of the other devices to make sure the FS  		 * partition is there.  		 */ -		ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, +		ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb,  				      sizeof(fscb));  		if (unlikely(ret)) {  			EXOFS_ERR("ERROR: Malformed participating device " @@ -509,6 +681,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,  				  odi.osdname);  			goto out;  		} +		exofs_sysfs_odev_add(&eds[i], sbi);  		/* TODO: verify other information is correct and FS-uuid  		 *	 matches. Benny what did you say about device table @@ -518,13 +691,11 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,  out:  	kfree(dt); -	if (unlikely(!ret && fscb_od)) { -		EXOFS_ERR( -		      "ERROR: Bad device-table container device not present\n"); -		osduld_put_device(fscb_od); -		ret = -EINVAL; +	if (unlikely(fscb_od && !ret)) { +			EXOFS_ERR("ERROR: Bad device-table container device not present\n"); +			osduld_put_device(fscb_od); +			return -EINVAL;  	} -  	return ret;  } @@ -538,7 +709,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)  	struct exofs_sb_info *sbi;	/*extended info                  */  	struct osd_dev *od;		/* Master device                 */  	struct exofs_fscb fscb;		/*on-disk superblock info        */ -	struct osd_obj_id obj; +	struct ore_comp comp;  	unsigned table_count;  	int ret; @@ -546,14 +717,20 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)  	if (!sbi)  		return -ENOMEM; -	ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); -	if (ret) -		goto free_bdi; -  	/* use mount options to fill superblock */ -	od = osduld_path_lookup(opts->dev_name); +	if (opts->is_osdname) { +		struct osd_dev_info odi = {.systemid_len = 0}; + +		odi.osdname_len = strlen(opts->dev_name); +		odi.osdname = (u8 *)opts->dev_name; +		od = osduld_info_lookup(&odi); +		kfree(opts->dev_name); +		opts->dev_name = NULL; +	} else { +		od = osduld_path_lookup(opts->dev_name); +	}  	if (IS_ERR(od)) { -		ret = PTR_ERR(od); +		ret = -EINVAL;  		goto free_sbi;  	} @@ -563,30 +740,35 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)  	sbi->layout.group_width = 1;  	sbi->layout.group_depth = -1;  	sbi->layout.group_count = 1; -	sbi->layout.s_ods[0] = od; -	sbi->layout.s_numdevs = 1; -	sbi->layout.s_pid = opts->pid;  	sbi->s_timeout = opts->timeout; +	sbi->one_comp.obj.partition = opts->pid; +	sbi->one_comp.obj.id = 0; +	exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj); +	sbi->oc.single_comp = EC_SINGLE_COMP; +	sbi->oc.comps = &sbi->one_comp; +  	/* fill in some other data by hand */  	memset(sb->s_id, 0, sizeof(sb->s_id));  	strcpy(sb->s_id, "exofs");  	sb->s_blocksize = EXOFS_BLKSIZE;  	sb->s_blocksize_bits = EXOFS_BLKSHIFT;  	sb->s_maxbytes = MAX_LFS_FILESIZE; +	sb->s_max_links = EXOFS_LINK_MAX;  	atomic_set(&sbi->s_curr_pending, 0);  	sb->s_bdev = NULL;  	sb->s_dev = 0; -	obj.partition = sbi->layout.s_pid; -	obj.id = EXOFS_SUPER_ID; -	exofs_make_credential(sbi->s_cred, &obj); +	comp.obj.partition = sbi->one_comp.obj.partition; +	comp.obj.id = EXOFS_SUPER_ID; +	exofs_make_credential(comp.cred, &comp.obj); -	ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb)); +	ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb));  	if (unlikely(ret))  		goto free_sbi;  	sb->s_magic = le16_to_cpu(fscb.s_magic); +	/* NOTE: we read below to be backward compatible with old versions */  	sbi->s_nextid = le64_to_cpu(fscb.s_nextid);  	sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles); @@ -597,7 +779,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)  		ret = -EINVAL;  		goto free_sbi;  	} -	if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) { +	if (le32_to_cpu(fscb.s_version) > EXOFS_FSCB_VER) {  		EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",  			  EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));  		ret = -EINVAL; @@ -610,12 +792,24 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)  	table_count = le64_to_cpu(fscb.s_dev_table_count);  	if (table_count) { -		ret = exofs_read_lookup_dev_table(&sbi, table_count); +		ret = exofs_read_lookup_dev_table(sbi, od, table_count);  		if (unlikely(ret))  			goto free_sbi; +	} else { +		struct exofs_dev *eds; + +		ret = __alloc_dev_table(sbi, 1, &eds); +		if (unlikely(ret)) +			goto free_sbi; + +		ore_comp_set_dev(&sbi->oc, 0, od); +		sbi->oc.numdevs = 1;  	} +	__sbi_read_stats(sbi); +  	/* set up operation vectors */ +	sbi->bdi.ra_pages = __ra_pages(&sbi->layout);  	sb->s_bdi = &sbi->bdi;  	sb->s_fs_info = sbi;  	sb->s_op = &exofs_sops; @@ -626,9 +820,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)  		ret = PTR_ERR(root);  		goto free_sbi;  	} -	sb->s_root = d_alloc_root(root); +	sb->s_root = d_make_root(root);  	if (!sb->s_root) { -		iput(root);  		EXOFS_ERR("ERROR: get root inode failed\n");  		ret = -ENOMEM;  		goto free_sbi; @@ -643,15 +836,23 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)  		goto free_sbi;  	} -	_exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], -			    sbi->layout.s_pid); +	ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); +	if (ret) { +		EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); +		dput(sb->s_root); +		sb->s_root = NULL; +		goto free_sbi; +	} + +	exofs_sysfs_dbg_print(); +	_exofs_print_device("Mounting", opts->dev_name, +			    ore_comp_dev(&sbi->oc, 0), +			    sbi->one_comp.obj.partition);  	return 0;  free_sbi: -	bdi_destroy(&sbi->bdi); -free_bdi:  	EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", -		  opts->dev_name, sbi->layout.s_pid, ret); +		  opts->dev_name, sbi->one_comp.obj.partition, ret);  	exofs_free_sbi(sbi);  	return ret;  } @@ -670,7 +871,8 @@ static struct dentry *exofs_mount(struct file_system_type *type,  	if (ret)  		return ERR_PTR(ret); -	opts.dev_name = dev_name; +	if (!opts.dev_name) +		opts.dev_name = dev_name;  	return mount_nodev(type, flags, &opts, exofs_fill_super);  } @@ -682,7 +884,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)  {  	struct super_block *sb = dentry->d_sb;  	struct exofs_sb_info *sbi = sb->s_fs_info; -	struct exofs_io_state *ios; +	struct ore_io_state *ios;  	struct osd_attr attrs[] = {  		ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,  			OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)), @@ -691,21 +893,18 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)  	};  	uint64_t capacity = ULLONG_MAX;  	uint64_t used = ULLONG_MAX; -	uint8_t cred_a[OSD_CAP_LEN];  	int ret; -	ret = exofs_get_io_state(&sbi->layout, &ios); +	ret = ore_get_io_state(&sbi->layout, &sbi->oc, &ios);  	if (ret) { -		EXOFS_DBGMSG("exofs_get_io_state failed.\n"); +		EXOFS_DBGMSG("ore_get_io_state failed.\n");  		return ret;  	} -	exofs_make_credential(cred_a, &ios->obj); -	ios->cred = sbi->s_cred;  	ios->in_attr = attrs;  	ios->in_attr_len = ARRAY_SIZE(attrs); -	ret = exofs_sbi_read(ios); +	ret = ore_read(ios);  	if (unlikely(ret))  		goto out; @@ -734,7 +933,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)  	buf->f_namelen = EXOFS_NAME_LEN;  out: -	exofs_put_io_state(ios); +	ore_put_io_state(ios);  	return ret;  } @@ -744,7 +943,6 @@ static const struct super_operations exofs_sops = {  	.write_inode    = exofs_write_inode,  	.evict_inode    = exofs_evict_inode,  	.put_super      = exofs_put_super, -	.write_super    = exofs_write_super,  	.sync_fs	= exofs_sync_fs,  	.statfs         = exofs_statfs,  }; @@ -753,12 +951,12 @@ static const struct super_operations exofs_sops = {   * EXPORT OPERATIONS   *****************************************************************************/ -struct dentry *exofs_get_parent(struct dentry *child) +static struct dentry *exofs_get_parent(struct dentry *child)  {  	unsigned long ino = exofs_parent_ino(child);  	if (!ino) -		return NULL; +		return ERR_PTR(-ESTALE);  	return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino));  } @@ -812,6 +1010,7 @@ static struct file_system_type exofs_type = {  	.mount          = exofs_mount,  	.kill_sb        = generic_shutdown_super,  }; +MODULE_ALIAS_FS("exofs");  static int __init init_exofs(void)  { @@ -825,6 +1024,9 @@ static int __init init_exofs(void)  	if (err)  		goto out_d; +	/* We don't fail if sysfs creation failed */ +	exofs_sysfs_init(); +  	return 0;  out_d:  	destroy_inodecache(); @@ -834,6 +1036,7 @@ out:  static void __exit exit_exofs(void)  { +	exofs_sysfs_uninit();  	unregister_filesystem(&exofs_type);  	destroy_inodecache();  }  | 
