diff options
Diffstat (limited to 'drivers/base/memory.c')
| -rw-r--r-- | drivers/base/memory.c | 635 | 
1 files changed, 386 insertions, 249 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index cafeaaf0428..89f752dd846 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -1,5 +1,5 @@  /* - * drivers/base/memory.c - basic Memory class support + * Memory subsystem support   *   * Written by Matt Tolentino <matthew.e.tolentino@intel.com>   *            Dave Hansen <haveblue@us.ibm.com> @@ -10,46 +10,42 @@   * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.   */ -#include <linux/sysdev.h>  #include <linux/module.h>  #include <linux/init.h>  #include <linux/topology.h>  #include <linux/capability.h>  #include <linux/device.h>  #include <linux/memory.h> -#include <linux/kobject.h>  #include <linux/memory_hotplug.h>  #include <linux/mm.h>  #include <linux/mutex.h>  #include <linux/stat.h>  #include <linux/slab.h> -#include <asm/atomic.h> +#include <linux/atomic.h>  #include <asm/uaccess.h>  static DEFINE_MUTEX(mem_sysfs_mutex);  #define MEMORY_CLASS_NAME	"memory" -static struct sysdev_class memory_sysdev_class = { -	.name = MEMORY_CLASS_NAME, -}; +#define to_memory_block(dev) container_of(dev, struct memory_block, dev) -static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj) -{ -	return MEMORY_CLASS_NAME; -} +static int sections_per_block; -static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uevent_env *env) +static inline int base_memory_block_id(int section_nr)  { -	int retval = 0; - -	return retval; +	return section_nr / sections_per_block;  } -static const struct kset_uevent_ops memory_uevent_ops = { -	.name		= memory_uevent_name, -	.uevent		= memory_uevent, +static int memory_subsys_online(struct device *dev); +static int memory_subsys_offline(struct device *dev); + +static struct bus_type memory_subsys = { +	.name = MEMORY_CLASS_NAME, +	.dev_name = MEMORY_CLASS_NAME, +	.online = memory_subsys_online, +	.offline = memory_subsys_offline,  };  static BLOCKING_NOTIFIER_HEAD(memory_chain); @@ -80,30 +76,31 @@ void unregister_memory_isolate_notifier(struct notifier_block *nb)  }  EXPORT_SYMBOL(unregister_memory_isolate_notifier); -/* - * register_memory - Setup a sysfs device for a memory block - */ -static -int register_memory(struct memory_block *memory, struct mem_section *section) +static void memory_block_release(struct device *dev)  { -	int error; +	struct memory_block *mem = to_memory_block(dev); -	memory->sysdev.cls = &memory_sysdev_class; -	memory->sysdev.id = __section_nr(section); +	kfree(mem); +} -	error = sysdev_register(&memory->sysdev); -	return error; +unsigned long __weak memory_block_size_bytes(void) +{ +	return MIN_MEMORY_BLOCK_SIZE;  } -static void -unregister_memory(struct memory_block *memory, struct mem_section *section) +static unsigned long get_memory_block_size(void)  { -	BUG_ON(memory->sysdev.cls != &memory_sysdev_class); -	BUG_ON(memory->sysdev.id != __section_nr(section)); +	unsigned long block_sz; -	/* drop the ref. we got in remove_memory_block() */ -	kobject_put(&memory->sysdev.kobj); -	sysdev_unregister(&memory->sysdev); +	block_sz = memory_block_size_bytes(); + +	/* Validate blk_sz is a power of 2 and not less than section size */ +	if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) { +		WARN_ON(1); +		block_sz = MIN_MEMORY_BLOCK_SIZE; +	} + +	return block_sz;  }  /* @@ -111,38 +108,43 @@ unregister_memory(struct memory_block *memory, struct mem_section *section)   * uses.   */ -static ssize_t show_mem_phys_index(struct sys_device *dev, -			struct sysdev_attribute *attr, char *buf) +static ssize_t show_mem_start_phys_index(struct device *dev, +			struct device_attribute *attr, char *buf)  { -	struct memory_block *mem = -		container_of(dev, struct memory_block, sysdev); -	return sprintf(buf, "%08lx\n", mem->phys_index); +	struct memory_block *mem = to_memory_block(dev); +	unsigned long phys_index; + +	phys_index = mem->start_section_nr / sections_per_block; +	return sprintf(buf, "%08lx\n", phys_index);  }  /*   * Show whether the section of memory is likely to be hot-removable   */ -static ssize_t show_mem_removable(struct sys_device *dev, -			struct sysdev_attribute *attr, char *buf) +static ssize_t show_mem_removable(struct device *dev, +			struct device_attribute *attr, char *buf)  { -	unsigned long start_pfn; -	int ret; -	struct memory_block *mem = -		container_of(dev, struct memory_block, sysdev); +	unsigned long i, pfn; +	int ret = 1; +	struct memory_block *mem = to_memory_block(dev); + +	for (i = 0; i < sections_per_block; i++) { +		if (!present_section_nr(mem->start_section_nr + i)) +			continue; +		pfn = section_nr_to_pfn(mem->start_section_nr + i); +		ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); +	} -	start_pfn = section_nr_to_pfn(mem->phys_index); -	ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION);  	return sprintf(buf, "%d\n", ret);  }  /*   * online, offline, going offline, etc.   */ -static ssize_t show_mem_state(struct sys_device *dev, -			struct sysdev_attribute *attr, char *buf) +static ssize_t show_mem_state(struct device *dev, +			struct device_attribute *attr, char *buf)  { -	struct memory_block *mem = -		container_of(dev, struct memory_block, sysdev); +	struct memory_block *mem = to_memory_block(dev);  	ssize_t len = 0;  	/* @@ -180,57 +182,68 @@ int memory_isolate_notify(unsigned long val, void *v)  }  /* - * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is - * OK to have direct references to sparsemem variables in here. + * The probe routines leave the pages reserved, just as the bootmem code does. + * Make sure they're still that way.   */ -static int -memory_block_action(struct memory_block *mem, unsigned long action) +static bool pages_correctly_reserved(unsigned long start_pfn)  { -	int i; -	unsigned long psection; -	unsigned long start_pfn, start_paddr; -	struct page *first_page; -	int ret; -	int old_state = mem->state; - -	psection = mem->phys_index; -	first_page = pfn_to_page(psection << PFN_SECTION_SHIFT); +	int i, j; +	struct page *page; +	unsigned long pfn = start_pfn;  	/* -	 * The probe routines leave the pages reserved, just -	 * as the bootmem code does.  Make sure they're still -	 * that way. +	 * memmap between sections is not contiguous except with +	 * SPARSEMEM_VMEMMAP. We lookup the page once per section +	 * and assume memmap is contiguous within each section  	 */ -	if (action == MEM_ONLINE) { -		for (i = 0; i < PAGES_PER_SECTION; i++) { -			if (PageReserved(first_page+i)) +	for (i = 0; i < sections_per_block; i++, pfn += PAGES_PER_SECTION) { +		if (WARN_ON_ONCE(!pfn_valid(pfn))) +			return false; +		page = pfn_to_page(pfn); + +		for (j = 0; j < PAGES_PER_SECTION; j++) { +			if (PageReserved(page + j))  				continue;  			printk(KERN_WARNING "section number %ld page number %d " -				"not reserved, was it already online? \n", -				psection, i); -			return -EBUSY; +				"not reserved, was it already online?\n", +				pfn_to_section_nr(pfn), j); + +			return false;  		}  	} +	return true; +} + +/* + * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is + * OK to have direct references to sparsemem variables in here. + */ +static int +memory_block_action(unsigned long phys_index, unsigned long action, int online_type) +{ +	unsigned long start_pfn; +	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; +	struct page *first_page; +	int ret; + +	first_page = pfn_to_page(phys_index << PFN_SECTION_SHIFT); +	start_pfn = page_to_pfn(first_page); +  	switch (action) {  		case MEM_ONLINE: -			start_pfn = page_to_pfn(first_page); -			ret = online_pages(start_pfn, PAGES_PER_SECTION); +			if (!pages_correctly_reserved(start_pfn)) +				return -EBUSY; + +			ret = online_pages(start_pfn, nr_pages, online_type);  			break;  		case MEM_OFFLINE: -			mem->state = MEM_GOING_OFFLINE; -			start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; -			ret = remove_memory(start_paddr, -					    PAGES_PER_SECTION << PAGE_SHIFT); -			if (ret) { -				mem->state = old_state; -				break; -			} +			ret = offline_pages(start_pfn, nr_pages);  			break;  		default: -			WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", -					__func__, mem, action, action); +			WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: " +			     "%ld\n", __func__, phys_index, action, action);  			ret = -EINVAL;  	} @@ -241,41 +254,104 @@ static int memory_block_change_state(struct memory_block *mem,  		unsigned long to_state, unsigned long from_state_req)  {  	int ret = 0; -	mutex_lock(&mem->state_mutex); -	if (mem->state != from_state_req) { -		ret = -EINVAL; -		goto out; -	} +	if (mem->state != from_state_req) +		return -EINVAL; -	ret = memory_block_action(mem, to_state); -	if (!ret) -		mem->state = to_state; +	if (to_state == MEM_OFFLINE) +		mem->state = MEM_GOING_OFFLINE; + +	ret = memory_block_action(mem->start_section_nr, to_state, +				mem->online_type); + +	mem->state = ret ? from_state_req : to_state; + +	return ret; +} + +/* The device lock serializes operations on memory_subsys_[online|offline] */ +static int memory_subsys_online(struct device *dev) +{ +	struct memory_block *mem = to_memory_block(dev); +	int ret; + +	if (mem->state == MEM_ONLINE) +		return 0; + +	/* +	 * If we are called from store_mem_state(), online_type will be +	 * set >= 0 Otherwise we were called from the device online +	 * attribute and need to set the online_type. +	 */ +	if (mem->online_type < 0) +		mem->online_type = ONLINE_KEEP; + +	ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); + +	/* clear online_type */ +	mem->online_type = -1; -out: -	mutex_unlock(&mem->state_mutex);  	return ret;  } +static int memory_subsys_offline(struct device *dev) +{ +	struct memory_block *mem = to_memory_block(dev); + +	if (mem->state == MEM_OFFLINE) +		return 0; + +	return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); +} +  static ssize_t -store_mem_state(struct sys_device *dev, -		struct sysdev_attribute *attr, const char *buf, size_t count) +store_mem_state(struct device *dev, +		struct device_attribute *attr, const char *buf, size_t count)  { -	struct memory_block *mem; -	unsigned int phys_section_nr; -	int ret = -EINVAL; +	struct memory_block *mem = to_memory_block(dev); +	int ret, online_type; + +	ret = lock_device_hotplug_sysfs(); +	if (ret) +		return ret; -	mem = container_of(dev, struct memory_block, sysdev); -	phys_section_nr = mem->phys_index; +	if (!strncmp(buf, "online_kernel", min_t(int, count, 13))) +		online_type = ONLINE_KERNEL; +	else if (!strncmp(buf, "online_movable", min_t(int, count, 14))) +		online_type = ONLINE_MOVABLE; +	else if (!strncmp(buf, "online", min_t(int, count, 6))) +		online_type = ONLINE_KEEP; +	else if (!strncmp(buf, "offline", min_t(int, count, 7))) +		online_type = -1; +	else { +		ret = -EINVAL; +		goto err; +	} -	if (!present_section_nr(phys_section_nr)) -		goto out; +	switch (online_type) { +	case ONLINE_KERNEL: +	case ONLINE_MOVABLE: +	case ONLINE_KEEP: +		/* +		 * mem->online_type is not protected so there can be a +		 * race here.  However, when racing online, the first +		 * will succeed and the second will just return as the +		 * block will already be online.  The online type +		 * could be either one, but that is expected. +		 */ +		mem->online_type = online_type; +		ret = device_online(&mem->dev); +		break; +	case -1: +		ret = device_offline(&mem->dev); +		break; +	default: +		ret = -EINVAL; /* should never happen */ +	} + +err: +	unlock_device_hotplug(); -	if (!strncmp(buf, "online", min((int)count, 6))) -		ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); -	else if(!strncmp(buf, "offline", min((int)count, 7))) -		ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); -out:  	if (ret)  		return ret;  	return count; @@ -290,41 +366,29 @@ out:   * s.t. if I offline all of these sections I can then   * remove the physical device?   */ -static ssize_t show_phys_device(struct sys_device *dev, -				struct sysdev_attribute *attr, char *buf) +static ssize_t show_phys_device(struct device *dev, +				struct device_attribute *attr, char *buf)  { -	struct memory_block *mem = -		container_of(dev, struct memory_block, sysdev); +	struct memory_block *mem = to_memory_block(dev);  	return sprintf(buf, "%d\n", mem->phys_device);  } -static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL); -static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state); -static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL); -static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL); - -#define mem_create_simple_file(mem, attr_name)	\ -	sysdev_create_file(&mem->sysdev, &attr_##attr_name) -#define mem_remove_simple_file(mem, attr_name)	\ -	sysdev_remove_file(&mem->sysdev, &attr_##attr_name) +static DEVICE_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL); +static DEVICE_ATTR(state, 0644, show_mem_state, store_mem_state); +static DEVICE_ATTR(phys_device, 0444, show_phys_device, NULL); +static DEVICE_ATTR(removable, 0444, show_mem_removable, NULL);  /*   * Block size attribute stuff   */  static ssize_t -print_block_size(struct sysdev_class *class, struct sysdev_class_attribute *attr, +print_block_size(struct device *dev, struct device_attribute *attr,  		 char *buf)  { -	return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE); +	return sprintf(buf, "%lx\n", get_memory_block_size());  } -static SYSDEV_CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); - -static int block_size_init(void) -{ -	return sysfs_create_file(&memory_sysdev_class.kset.kobj, -				&attr_block_size_bytes.attr); -} +static DEVICE_ATTR(block_size_bytes, 0444, print_block_size, NULL);  /*   * Some architectures will have custom drivers to do this, and @@ -334,35 +398,35 @@ static int block_size_init(void)   */  #ifdef CONFIG_ARCH_MEMORY_PROBE  static ssize_t -memory_probe_store(struct class *class, struct class_attribute *attr, +memory_probe_store(struct device *dev, struct device_attribute *attr,  		   const char *buf, size_t count)  {  	u64 phys_addr;  	int nid; -	int ret; +	int i, ret; +	unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;  	phys_addr = simple_strtoull(buf, NULL, 0); -	nid = memory_add_physaddr_to_nid(phys_addr); -	ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT); +	if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) +		return -EINVAL; -	if (ret) -		count = ret; +	for (i = 0; i < sections_per_block; i++) { +		nid = memory_add_physaddr_to_nid(phys_addr); +		ret = add_memory(nid, phys_addr, +				 PAGES_PER_SECTION << PAGE_SHIFT); +		if (ret) +			goto out; -	return count; -} -static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store); +		phys_addr += MIN_MEMORY_BLOCK_SIZE; +	} -static int memory_probe_init(void) -{ -	return sysfs_create_file(&memory_sysdev_class.kset.kobj, -				&class_attr_probe.attr); -} -#else -static inline int memory_probe_init(void) -{ -	return 0; +	ret = count; +out: +	return ret;  } + +static DEVICE_ATTR(probe, S_IWUSR, NULL, memory_probe_store);  #endif  #ifdef CONFIG_MEMORY_FAILURE @@ -372,15 +436,15 @@ static inline int memory_probe_init(void)  /* Soft offline a page */  static ssize_t -store_soft_offline_page(struct class *class, -			struct class_attribute *attr, +store_soft_offline_page(struct device *dev, +			struct device_attribute *attr,  			const char *buf, size_t count)  {  	int ret;  	u64 pfn;  	if (!capable(CAP_SYS_ADMIN))  		return -EPERM; -	if (strict_strtoull(buf, 0, &pfn) < 0) +	if (kstrtoull(buf, 0, &pfn) < 0)  		return -EINVAL;  	pfn >>= PAGE_SHIFT;  	if (!pfn_valid(pfn)) @@ -391,40 +455,23 @@ store_soft_offline_page(struct class *class,  /* Forcibly offline a page, including killing processes. */  static ssize_t -store_hard_offline_page(struct class *class, -			struct class_attribute *attr, +store_hard_offline_page(struct device *dev, +			struct device_attribute *attr,  			const char *buf, size_t count)  {  	int ret;  	u64 pfn;  	if (!capable(CAP_SYS_ADMIN))  		return -EPERM; -	if (strict_strtoull(buf, 0, &pfn) < 0) +	if (kstrtoull(buf, 0, &pfn) < 0)  		return -EINVAL;  	pfn >>= PAGE_SHIFT; -	ret = __memory_failure(pfn, 0, 0); +	ret = memory_failure(pfn, 0, 0);  	return ret ? ret : count;  } -static CLASS_ATTR(soft_offline_page, 0644, NULL, store_soft_offline_page); -static CLASS_ATTR(hard_offline_page, 0644, NULL, store_hard_offline_page); - -static __init int memory_fail_init(void) -{ -	int err; - -	err = sysfs_create_file(&memory_sysdev_class.kset.kobj, -				&class_attr_soft_offline_page.attr); -	if (!err) -		err = sysfs_create_file(&memory_sysdev_class.kset.kobj, -				&class_attr_hard_offline_page.attr); -	return err; -} -#else -static inline int memory_fail_init(void) -{ -	return 0; -} +static DEVICE_ATTR(soft_offline_page, S_IWUSR, NULL, store_soft_offline_page); +static DEVICE_ATTR(hard_offline_page, S_IWUSR, NULL, store_hard_offline_page);  #endif  /* @@ -437,30 +484,23 @@ int __weak arch_get_memory_phys_device(unsigned long start_pfn)  	return 0;  } +/* + * A reference for the returned object is held and the reference for the + * hinted object is released. + */  struct memory_block *find_memory_block_hinted(struct mem_section *section,  					      struct memory_block *hint)  { -	struct kobject *kobj; -	struct sys_device *sysdev; -	struct memory_block *mem; -	char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1]; +	int block_id = base_memory_block_id(__section_nr(section)); +	struct device *hintdev = hint ? &hint->dev : NULL; +	struct device *dev; -	kobj = hint ? &hint->sysdev.kobj : NULL; - -	/* -	 * This only works because we know that section == sysdev->id -	 * slightly redundant with sysdev_register() -	 */ -	sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section)); - -	kobj = kset_find_obj_hinted(&memory_sysdev_class.kset, name, kobj); -	if (!kobj) +	dev = subsys_find_device_by_id(&memory_subsys, block_id, hintdev); +	if (hint) +		put_device(&hint->dev); +	if (!dev)  		return NULL; - -	sysdev = container_of(kobj, struct sys_device, kobj); -	mem = container_of(sysdev, struct memory_block, sysdev); - -	return mem; +	return to_memory_block(dev);  }  /* @@ -469,79 +509,153 @@ struct memory_block *find_memory_block_hinted(struct mem_section *section,   * this gets to be a real problem, we can always use a radix   * tree or something here.   * - * This could be made generic for all sysdev classes. + * This could be made generic for all device subsystems.   */  struct memory_block *find_memory_block(struct mem_section *section)  {  	return find_memory_block_hinted(section, NULL);  } -static int add_memory_block(int nid, struct mem_section *section, -			unsigned long state, enum mem_add_context context) +static struct attribute *memory_memblk_attrs[] = { +	&dev_attr_phys_index.attr, +	&dev_attr_state.attr, +	&dev_attr_phys_device.attr, +	&dev_attr_removable.attr, +	NULL +}; + +static struct attribute_group memory_memblk_attr_group = { +	.attrs = memory_memblk_attrs, +}; + +static const struct attribute_group *memory_memblk_attr_groups[] = { +	&memory_memblk_attr_group, +	NULL, +}; + +/* + * register_memory - Setup a sysfs device for a memory block + */ +static +int register_memory(struct memory_block *memory) +{ +	memory->dev.bus = &memory_subsys; +	memory->dev.id = memory->start_section_nr / sections_per_block; +	memory->dev.release = memory_block_release; +	memory->dev.groups = memory_memblk_attr_groups; +	memory->dev.offline = memory->state == MEM_OFFLINE; + +	return device_register(&memory->dev); +} + +static int init_memory_block(struct memory_block **memory, +			     struct mem_section *section, unsigned long state)  { -	struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); +	struct memory_block *mem;  	unsigned long start_pfn; +	int scn_nr;  	int ret = 0; +	mem = kzalloc(sizeof(*mem), GFP_KERNEL);  	if (!mem)  		return -ENOMEM; -	mutex_lock(&mem_sysfs_mutex); - -	mem->phys_index = __section_nr(section); +	scn_nr = __section_nr(section); +	mem->start_section_nr = +			base_memory_block_id(scn_nr) * sections_per_block; +	mem->end_section_nr = mem->start_section_nr + sections_per_block - 1;  	mem->state = state;  	mem->section_count++; -	mutex_init(&mem->state_mutex); -	start_pfn = section_nr_to_pfn(mem->phys_index); +	start_pfn = section_nr_to_pfn(mem->start_section_nr);  	mem->phys_device = arch_get_memory_phys_device(start_pfn); -	ret = register_memory(mem, section); -	if (!ret) -		ret = mem_create_simple_file(mem, phys_index); -	if (!ret) -		ret = mem_create_simple_file(mem, state); -	if (!ret) -		ret = mem_create_simple_file(mem, phys_device); -	if (!ret) -		ret = mem_create_simple_file(mem, removable); -	if (!ret) { -		if (context == HOTPLUG) -			ret = register_mem_sect_under_node(mem, nid); -	} +	ret = register_memory(mem); -	mutex_unlock(&mem_sysfs_mutex); +	*memory = mem;  	return ret;  } -int remove_memory_block(unsigned long node_id, struct mem_section *section, -		int phys_device) +static int add_memory_block(int base_section_nr)  {  	struct memory_block *mem; +	int i, ret, section_count = 0, section_nr; -	mutex_lock(&mem_sysfs_mutex); -	mem = find_memory_block(section); - -	mem->section_count--; -	if (mem->section_count == 0) { -		unregister_mem_sect_under_nodes(mem); -		mem_remove_simple_file(mem, phys_index); -		mem_remove_simple_file(mem, state); -		mem_remove_simple_file(mem, phys_device); -		mem_remove_simple_file(mem, removable); -		unregister_memory(mem, section); +	for (i = base_section_nr; +	     (i < base_section_nr + sections_per_block) && i < NR_MEM_SECTIONS; +	     i++) { +		if (!present_section_nr(i)) +			continue; +		if (section_count == 0) +			section_nr = i; +		section_count++;  	} -	mutex_unlock(&mem_sysfs_mutex); +	if (section_count == 0) +		return 0; +	ret = init_memory_block(&mem, __nr_to_section(section_nr), MEM_ONLINE); +	if (ret) +		return ret; +	mem->section_count = section_count;  	return 0;  } +  /*   * need an interface for the VM to add new memory regions,   * but without onlining it.   */  int register_new_memory(int nid, struct mem_section *section)  { -	return add_memory_block(nid, section, MEM_OFFLINE, HOTPLUG); +	int ret = 0; +	struct memory_block *mem; + +	mutex_lock(&mem_sysfs_mutex); + +	mem = find_memory_block(section); +	if (mem) { +		mem->section_count++; +		put_device(&mem->dev); +	} else { +		ret = init_memory_block(&mem, section, MEM_OFFLINE); +		if (ret) +			goto out; +	} + +	if (mem->section_count == sections_per_block) +		ret = register_mem_sect_under_node(mem, nid); +out: +	mutex_unlock(&mem_sysfs_mutex); +	return ret; +} + +#ifdef CONFIG_MEMORY_HOTREMOVE +static void +unregister_memory(struct memory_block *memory) +{ +	BUG_ON(memory->dev.bus != &memory_subsys); + +	/* drop the ref. we got in remove_memory_block() */ +	put_device(&memory->dev); +	device_unregister(&memory->dev); +} + +static int remove_memory_block(unsigned long node_id, +			       struct mem_section *section, int phys_device) +{ +	struct memory_block *mem; + +	mutex_lock(&mem_sysfs_mutex); +	mem = find_memory_block(section); +	unregister_mem_sect_under_nodes(mem, __section_nr(section)); + +	mem->section_count--; +	if (mem->section_count == 0) +		unregister_memory(mem); +	else +		put_device(&mem->dev); + +	mutex_unlock(&mem_sysfs_mutex); +	return 0;  }  int unregister_memory_section(struct mem_section *section) @@ -551,6 +665,36 @@ int unregister_memory_section(struct mem_section *section)  	return remove_memory_block(0, section, 0);  } +#endif /* CONFIG_MEMORY_HOTREMOVE */ + +/* return true if the memory block is offlined, otherwise, return false */ +bool is_memblock_offlined(struct memory_block *mem) +{ +	return mem->state == MEM_OFFLINE; +} + +static struct attribute *memory_root_attrs[] = { +#ifdef CONFIG_ARCH_MEMORY_PROBE +	&dev_attr_probe.attr, +#endif + +#ifdef CONFIG_MEMORY_FAILURE +	&dev_attr_soft_offline_page.attr, +	&dev_attr_hard_offline_page.attr, +#endif + +	&dev_attr_block_size_bytes.attr, +	NULL +}; + +static struct attribute_group memory_root_attr_group = { +	.attrs = memory_root_attrs, +}; + +static const struct attribute_group *memory_root_attr_groups[] = { +	&memory_root_attr_group, +	NULL, +};  /*   * Initialize the sysfs support for memory devices... @@ -560,34 +704,27 @@ int __init memory_dev_init(void)  	unsigned int i;  	int ret;  	int err; +	unsigned long block_sz; -	memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops; -	ret = sysdev_class_register(&memory_sysdev_class); +	ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);  	if (ret)  		goto out; +	block_sz = get_memory_block_size(); +	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; +  	/*  	 * Create entries for memory sections that were found  	 * during boot and have been initialized  	 */ -	for (i = 0; i < NR_MEM_SECTIONS; i++) { -		if (!present_section_nr(i)) -			continue; -		err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, -				       BOOT); +	mutex_lock(&mem_sysfs_mutex); +	for (i = 0; i < NR_MEM_SECTIONS; i += sections_per_block) { +		err = add_memory_block(i);  		if (!ret)  			ret = err;  	} +	mutex_unlock(&mem_sysfs_mutex); -	err = memory_probe_init(); -	if (!ret) -		ret = err; -	err = memory_fail_init(); -	if (!ret) -		ret = err; -	err = block_size_init(); -	if (!ret) -		ret = err;  out:  	if (ret)  		printk(KERN_ERR "%s() failed: %d\n", __func__, ret);  | 
