aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/Kconfig.debug5
-rw-r--r--mm/backing-dev.c16
-rw-r--r--mm/bootmem.c24
-rw-r--r--mm/compaction.c14
-rw-r--r--mm/fadvise.c3
-rw-r--r--mm/failslab.c2
-rw-r--r--mm/filemap.c20
-rw-r--r--mm/huge_memory.c16
-rw-r--r--mm/hugetlb.c58
-rw-r--r--mm/memblock.c961
-rw-r--r--mm/memcontrol.c128
-rw-r--r--mm/mempolicy.c25
-rw-r--r--mm/mempool.c104
-rw-r--r--mm/migrate.c16
-rw-r--r--mm/mmap.c60
-rw-r--r--mm/mremap.c9
-rw-r--r--mm/nobootmem.c45
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/oom_kill.c15
-rw-r--r--mm/page-writeback.c591
-rw-r--r--mm/page_alloc.c773
-rw-r--r--mm/percpu-vm.c17
-rw-r--r--mm/percpu.c68
-rw-r--r--mm/rmap.c45
-rw-r--r--mm/shmem.c17
-rw-r--r--mm/slab.c46
-rw-r--r--mm/slub.c128
-rw-r--r--mm/swap.c14
-rw-r--r--mm/swap_state.c1
-rw-r--r--mm/swapfile.c6
-rw-r--r--mm/vmalloc.c68
-rw-r--r--mm/vmscan.c82
33 files changed, 1905 insertions, 1480 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 011b110365c..e338407f122 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -131,6 +131,12 @@ config SPARSEMEM_VMEMMAP
config HAVE_MEMBLOCK
boolean
+config HAVE_MEMBLOCK_NODE_MAP
+ boolean
+
+config ARCH_DISCARD_MEMBLOCK
+ boolean
+
config NO_BOOTMEM
boolean
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 8b1a477162d..4b2443254de 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -4,6 +4,7 @@ config DEBUG_PAGEALLOC
depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC
depends on !KMEMCHECK
select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC
+ select PAGE_GUARD if ARCH_SUPPORTS_DEBUG_PAGEALLOC
---help---
Unmap pages from the kernel linear mapping after free_pages().
This results in a large slowdown, but helps to find certain types
@@ -22,3 +23,7 @@ config WANT_PAGE_DEBUG_FLAGS
config PAGE_POISONING
bool
select WANT_PAGE_DEBUG_FLAGS
+
+config PAGE_GUARD
+ bool
+ select WANT_PAGE_DEBUG_FLAGS
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index a0860640378..7ba8feae11b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -600,14 +600,10 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
/*
* Finally, kill the kernel thread. We don't need to be RCU
- * safe anymore, since the bdi is gone from visibility. Force
- * unfreeze of the thread before calling kthread_stop(), otherwise
- * it would never exet if it is currently stuck in the refrigerator.
+ * safe anymore, since the bdi is gone from visibility.
*/
- if (bdi->wb.task) {
- thaw_process(bdi->wb.task);
+ if (bdi->wb.task)
kthread_stop(bdi->wb.task);
- }
}
/*
@@ -724,6 +720,14 @@ void bdi_destroy(struct backing_dev_info *bdi)
bdi_unregister(bdi);
+ /*
+ * If bdi_unregister() had already been called earlier, the
+ * wakeup_timer could still be armed because bdi_prune_sb()
+ * can race with the bdi_wakeup_thread_delayed() calls from
+ * __mark_inode_dirty().
+ */
+ del_timer_sync(&bdi->wb.wakeup_timer);
+
for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
percpu_counter_destroy(&bdi->bdi_stat[i]);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 1a77012ecdb..668e94df8cf 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -56,7 +56,7 @@ early_param("bootmem_debug", bootmem_debug_setup);
static unsigned long __init bootmap_bytes(unsigned long pages)
{
- unsigned long bytes = (pages + 7) / 8;
+ unsigned long bytes = DIV_ROUND_UP(pages, 8);
return ALIGN(bytes, sizeof(long));
}
@@ -171,7 +171,6 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
{
- int aligned;
struct page *page;
unsigned long start, end, pages, count = 0;
@@ -181,14 +180,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
start = bdata->node_min_pfn;
end = bdata->node_low_pfn;
- /*
- * If the start is aligned to the machines wordsize, we might
- * be able to free pages in bulks of that order.
- */
- aligned = !(start & (BITS_PER_LONG - 1));
-
- bdebug("nid=%td start=%lx end=%lx aligned=%d\n",
- bdata - bootmem_node_data, start, end, aligned);
+ bdebug("nid=%td start=%lx end=%lx\n",
+ bdata - bootmem_node_data, start, end);
while (start < end) {
unsigned long *map, idx, vec;
@@ -196,12 +189,17 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
map = bdata->node_bootmem_map;
idx = start - bdata->node_min_pfn;
vec = ~map[idx / BITS_PER_LONG];
-
- if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) {
+ /*
+ * If we have a properly aligned and fully unreserved
+ * BITS_PER_LONG block of pages in front of us, free
+ * it in one go.
+ */
+ if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) {
int order = ilog2(BITS_PER_LONG);
__free_pages_bootmem(pfn_to_page(start), order);
count += BITS_PER_LONG;
+ start += BITS_PER_LONG;
} else {
unsigned long off = 0;
@@ -214,8 +212,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
vec >>= 1;
off++;
}
+ start = ALIGN(start + 1, BITS_PER_LONG);
}
- start += BITS_PER_LONG;
}
page = virt_to_page(bdata->node_bootmem_map);
diff --git a/mm/compaction.c b/mm/compaction.c
index 899d9563858..e6670c34eb4 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -365,8 +365,10 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
nr_isolated++;
/* Avoid isolating too much */
- if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
+ if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {
+ ++low_pfn;
break;
+ }
}
acct_isolated(zone, cc);
@@ -721,23 +723,23 @@ int sysctl_extfrag_handler(struct ctl_table *table, int write,
}
#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
-ssize_t sysfs_compact_node(struct sys_device *dev,
- struct sysdev_attribute *attr,
+ssize_t sysfs_compact_node(struct device *dev,
+ struct device_attribute *attr,
const char *buf, size_t count)
{
compact_node(dev->id);
return count;
}
-static SYSDEV_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node);
+static DEVICE_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node);
int compaction_register_node(struct node *node)
{
- return sysdev_create_file(&node->sysdev, &attr_compact);
+ return device_create_file(&node->dev, &dev_attr_compact);
}
void compaction_unregister_node(struct node *node)
{
- return sysdev_remove_file(&node->sysdev, &attr_compact);
+ return device_remove_file(&node->dev, &dev_attr_compact);
}
#endif /* CONFIG_SYSFS && CONFIG_NUMA */
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 8d723c9e8b7..469491e0af7 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -117,7 +117,8 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
break;
case POSIX_FADV_DONTNEED:
if (!bdi_write_congested(mapping->backing_dev_info))
- filemap_flush(mapping);
+ __filemap_fdatawrite_range(mapping, offset, endbyte,
+ WB_SYNC_NONE);
/* First and last FULL page! */
start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT;
diff --git a/mm/failslab.c b/mm/failslab.c
index 0dd7b8fec71..fefaabaab76 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -35,7 +35,7 @@ __setup("failslab=", setup_failslab);
static int __init failslab_debugfs_init(void)
{
struct dentry *dir;
- mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+ umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr);
if (IS_ERR(dir))
diff --git a/mm/filemap.c b/mm/filemap.c
index c0018f2d50e..c4ee2e918be 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1828,7 +1828,7 @@ repeat:
page = __page_cache_alloc(gfp | __GFP_COLD);
if (!page)
return ERR_PTR(-ENOMEM);
- err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+ err = add_to_page_cache_lru(page, mapping, index, gfp);
if (unlikely(err)) {
page_cache_release(page);
if (err == -EEXIST)
@@ -1925,10 +1925,7 @@ static struct page *wait_on_page_read(struct page *page)
* @gfp: the page allocator flags to use if allocating
*
* This is the same as "read_mapping_page(mapping, index, NULL)", but with
- * any new page allocations done using the specified allocation flags. Note
- * that the Radix tree operations will still use GFP_KERNEL, so you can't
- * expect to do this atomically or anything like that - but you can pass in
- * other page requirements.
+ * any new page allocations done using the specified allocation flags.
*
* If the page does not get brought uptodate, return -EIO.
*/
@@ -1971,7 +1968,7 @@ EXPORT_SYMBOL(read_cache_page);
*/
int should_remove_suid(struct dentry *dentry)
{
- mode_t mode = dentry->d_inode->i_mode;
+ umode_t mode = dentry->d_inode->i_mode;
int kill = 0;
/* suid always must be killed */
@@ -2354,8 +2351,11 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
pgoff_t index, unsigned flags)
{
int status;
+ gfp_t gfp_mask;
struct page *page;
gfp_t gfp_notmask = 0;
+
+ gfp_mask = mapping_gfp_mask(mapping) | __GFP_WRITE;
if (flags & AOP_FLAG_NOFS)
gfp_notmask = __GFP_FS;
repeat:
@@ -2363,7 +2363,7 @@ repeat:
if (page)
goto found;
- page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);
+ page = __page_cache_alloc(gfp_mask & ~gfp_notmask);
if (!page)
return NULL;
status = add_to_page_cache_lru(page, mapping, index,
@@ -2407,7 +2407,6 @@ static ssize_t generic_perform_write(struct file *file,
iov_iter_count(i));
again:
-
/*
* Bring in the user page that we will copy from _first_.
* Otherwise there's a nasty deadlock on copying from the
@@ -2463,7 +2462,10 @@ again:
written += copied;
balance_dirty_pages_ratelimited(mapping);
-
+ if (fatal_signal_pending(current)) {
+ status = -EINTR;
+ break;
+ }
} while (iov_iter_count(i));
return written ? written : status;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4298abaae15..36b3d988b4e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2259,12 +2259,8 @@ static void khugepaged_do_scan(struct page **hpage)
static void khugepaged_alloc_sleep(void)
{
- DEFINE_WAIT(wait);
- add_wait_queue(&khugepaged_wait, &wait);
- schedule_timeout_interruptible(
- msecs_to_jiffies(
- khugepaged_alloc_sleep_millisecs));
- remove_wait_queue(&khugepaged_wait, &wait);
+ wait_event_freezable_timeout(khugepaged_wait, false,
+ msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
}
#ifndef CONFIG_NUMA
@@ -2313,14 +2309,10 @@ static void khugepaged_loop(void)
if (unlikely(kthread_should_stop()))
break;
if (khugepaged_has_work()) {
- DEFINE_WAIT(wait);
if (!khugepaged_scan_sleep_millisecs)
continue;
- add_wait_queue(&khugepaged_wait, &wait);
- schedule_timeout_interruptible(
- msecs_to_jiffies(
- khugepaged_scan_sleep_millisecs));
- remove_wait_queue(&khugepaged_wait, &wait);
+ wait_event_freezable_timeout(khugepaged_wait, false,
+ msecs_to_jiffies(khugepaged_scan_sleep_millisecs));
} else if (khugepaged_enabled())
wait_event_freezable(khugepaged_wait,
khugepaged_wait_event());
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index dae27ba3be2..ea8c3a4cd2a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -576,6 +576,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
__SetPageHead(page);
for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
__SetPageTail(p);
+ set_page_count(p, 0);
p->first_page = page;
}
}
@@ -799,7 +800,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
if (page && arch_prepare_hugepage(page)) {
__free_pages(page, huge_page_order(h));
- return NULL;
+ page = NULL;
}
spin_lock(&hugetlb_lock);
@@ -900,7 +901,6 @@ retry:
h->resv_huge_pages += delta;
ret = 0;
- spin_unlock(&hugetlb_lock);
/* Free the needed pages to the hugetlb pool */
list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
if ((--needed) < 0)
@@ -914,6 +914,7 @@ retry:
VM_BUG_ON(page_count(page));
enqueue_huge_page(h, page);
}
+ spin_unlock(&hugetlb_lock);
/* Free unnecessary surplus pages to the buddy allocator */
free:
@@ -1591,9 +1592,9 @@ static void __init hugetlb_sysfs_init(void)
/*
* node_hstate/s - associate per node hstate attributes, via their kobjects,
- * with node sysdevs in node_devices[] using a parallel array. The array
- * index of a node sysdev or _hstate == node id.
- * This is here to avoid any static dependency of the node sysdev driver, in
+ * with node devices in node_devices[] using a parallel array. The array
+ * index of a node device or _hstate == node id.
+ * This is here to avoid any static dependency of the node device driver, in
* the base kernel, on the hugetlb module.
*/
struct node_hstate {
@@ -1603,7 +1604,7 @@ struct node_hstate {
struct node_hstate node_hstates[MAX_NUMNODES];
/*
- * A subset of global hstate attributes for node sysdevs
+ * A subset of global hstate attributes for node devices
*/
static struct attribute *per_node_hstate_attrs[] = {
&nr_hugepages_attr.attr,
@@ -1617,7 +1618,7 @@ static struct attribute_group per_node_hstate_attr_group = {
};
/*
- * kobj_to_node_hstate - lookup global hstate for node sysdev hstate attr kobj.
+ * kobj_to_node_hstate - lookup global hstate for node device hstate attr kobj.
* Returns node id via non-NULL nidp.
*/
static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
@@ -1640,13 +1641,13 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
}
/*
- * Unregister hstate attributes from a single node sysdev.
+ * Unregister hstate attributes from a single node device.
* No-op if no hstate attributes attached.
*/
void hugetlb_unregister_node(struct node *node)
{
struct hstate *h;
- struct node_hstate *nhs = &node_hstates[node->sysdev.id];
+ struct node_hstate *nhs = &node_hstates[node->dev.id];
if (!nhs->hugepages_kobj)
return; /* no hstate attributes */
@@ -1662,7 +1663,7 @@ void hugetlb_unregister_node(struct node *node)
}
/*
- * hugetlb module exit: unregister hstate attributes from node sysdevs
+ * hugetlb module exit: unregister hstate attributes from node devices
* that have them.
*/
static void hugetlb_unregister_all_nodes(void)
@@ -1670,7 +1671,7 @@ static void hugetlb_unregister_all_nodes(void)
int nid;
/*
- * disable node sysdev registrations.
+ * disable node device registrations.
*/
register_hugetlbfs_with_node(NULL, NULL);
@@ -1682,20 +1683,20 @@ static void hugetlb_unregister_all_nodes(void)
}
/*
- * Register hstate attributes for a single node sysdev.
+ * Register hstate attributes for a single node device.
* No-op if attributes already registered.
*/
void hugetlb_register_node(struct node *node)
{
struct hstate *h;
- struct node_hstate *nhs = &node_hstates[node->sysdev.id];
+ struct node_hstate *nhs = &node_hstates[node->dev.id];
int err;
if (nhs->hugepages_kobj)
return; /* already allocated */
nhs->hugepages_kobj = kobject_create_and_add("hugepages",
- &node->sysdev.kobj);
+ &node->dev.kobj);
if (!nhs->hugepages_kobj)
return;
@@ -1706,7 +1707,7 @@ void hugetlb_register_node(struct node *node)
if (err) {
printk(KERN_ERR "Hugetlb: Unable to add hstate %s"
" for node %d\n",
- h->name, node->sysdev.id);
+ h->name, node->dev.id);
hugetlb_unregister_node(node);
break;
}
@@ -1715,8 +1716,8 @@ void hugetlb_register_node(struct node *node)
/*
* hugetlb init time: register hstate attributes for all registered node
- * sysdevs of nodes that have memory. All on-line nodes should have
- * registered their associated sysdev by this time.
+ * devices of nodes that have memory. All on-line nodes should have
+ * registered their associated device by this time.
*/
static void hugetlb_register_all_nodes(void)
{
@@ -1724,12 +1725,12 @@ static void hugetlb_register_all_nodes(void)
for_each_node_state(nid, N_HIGH_MEMORY) {
struct node *node = &node_devices[nid];
- if (node->sysdev.id == nid)
+ if (node->dev.id == nid)
hugetlb_register_node(node);
}
/*
- * Let the node sysdev driver know we're here so it can
+ * Let the node device driver know we're here so it can
* [un]register hstate attributes on node hotplug.
*/
register_hugetlbfs_with_node(hugetlb_register_node,
@@ -2314,8 +2315,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
* from page cache lookup which is in HPAGE_SIZE units.
*/
address = address & huge_page_mask(h);
- pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
- + (vma->vm_pgoff >> PAGE_SHIFT);
+ pgoff = vma_hugecache_offset(h, vma, address);
mapping = (struct address_space *)page_private(page);
/*
@@ -2348,6 +2348,9 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
/*
* Hugetlb_cow() should be called with page lock of the original hugepage held.
+ * Called with hugetlb_instantiation_mutex held and pte_page locked so we
+ * cannot race with other handlers or page migration.
+ * Keep the pte_same checks anyway to make transition from the mutex easier.
*/
static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *ptep, pte_t pte,
@@ -2407,7 +2410,14 @@ retry_avoidcopy:
BUG_ON(page_count(old_page) != 1);
BUG_ON(huge_pte_none(pte));
spin_lock(&mm->page_table_lock);
- goto retry_avoidcopy;
+ ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+ if (likely(pte_same(huge_ptep_get(ptep), pte)))
+ goto retry_avoidcopy;
+ /*
+ * race occurs while re-acquiring page_table_lock, and
+ * our job is done.
+ */
+ return 0;
}
WARN_ON_ONCE(1);
}
@@ -2422,6 +2432,8 @@ retry_avoidcopy:
* anon_vma prepared.
*/
if (unlikely(anon_vma_prepare(vma))) {
+ page_cache_release(new_page);
+ page_cache_release(old_page);
/* Caller expects lock to be held */
spin_lock(&mm->page_table_lock);
return VM_FAULT_OOM;
@@ -2627,6 +2639,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
static DEFINE_MUTEX(hugetlb_instantiation_mutex);
struct hstate *h = hstate_vma(vma);
+ address &= huge_page_mask(h);
+
ptep = huge_pte_offset(mm, address);
if (ptep) {
entry = huge_ptep_get(ptep);
diff --git a/mm/memblock.c b/mm/memblock.c
index 84bec4969ed..2f55f19b7c8 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,12 +20,23 @@
#include <linux/seq_file.h>
#include <linux/memblock.h>
-struct memblock memblock __initdata_memblock;
+static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
+static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
+
+struct memblock memblock __initdata_memblock = {
+ .memory.regions = memblock_memory_init_regions,
+ .memory.cnt = 1, /* empty dummy entry */
+ .memory.max = INIT_MEMBLOCK_REGIONS,
+
+ .reserved.regions = memblock_reserved_init_regions,
+ .reserved.cnt = 1, /* empty dummy entry */
+ .reserved.max = INIT_MEMBLOCK_REGIONS,
+
+ .current_limit = MEMBLOCK_ALLOC_ANYWHERE,
+};
int memblock_debug __initdata_memblock;
-int memblock_can_resize __initdata_memblock;
-static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
-static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock;
+static int memblock_can_resize __initdata_memblock;
/* inline so we don't get a warning when pr_debug is compiled out */
static inline const char *memblock_type_name(struct memblock_type *type)
@@ -38,20 +49,15 @@ static inline const char *memblock_type_name(struct memblock_type *type)
return "unknown";
}
-/*
- * Address comparison utilities
- */
-
-static phys_addr_t __init_memblock memblock_align_down(phys_addr_t addr, phys_addr_t size)
-{
- return addr & ~(size - 1);
-}
-
-static phys_addr_t __init_memblock memblock_align_up(phys_addr_t addr, phys_addr_t size)
+/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
+static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
{
- return (addr + (size - 1)) & ~(size - 1);
+ return *size = min(*size, (phys_addr_t)ULLONG_MAX - base);
}
+/*
+ * Address comparison utilities
+ */
static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
phys_addr_t base2, phys_addr_t size2)
{
@@ -73,83 +79,66 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
return (i < type->cnt) ? i : -1;
}
-/*
- * Find, allocate, deallocate or reserve unreserved regions. All allocations
- * are top-down.
+/**
+ * memblock_find_in_range_node - find free area in given range and node
+ * @start: start of candidate range
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @size: size of free area to find
+ * @align: alignment of free area to find
+ * @nid: nid of the free area to find, %MAX_NUMNODES for any node
+ *
+ * Find @size free area aligned to @align in the specified range and node.
+ *
+ * RETURNS:
+ * Found address on success, %0 on failure.
*/
-
-static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_addr_t end,
- phys_addr_t size, phys_addr_t align)
+phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start,
+ phys_addr_t end, phys_addr_t size,
+ phys_addr_t align, int nid)
{
- phys_addr_t base, res_base;
- long j;
-
- /* In case, huge size is requested */
- if (end < size)
- return MEMBLOCK_ERROR;
-
- base = memblock_align_down((end - size), align);
+ phys_addr_t this_start, this_end, cand;
+ u64 i;
- /* Prevent allocations returning 0 as it's also used to
- * indicate an allocation failure
- */
- if (start == 0)
- start = PAGE_SIZE;
-
- while (start <= base) {
- j = memblock_overlaps_region(&memblock.reserved, base, size);
- if (j < 0)
- return base;
- res_base = memblock.reserved.regions[j].base;
- if (res_base < size)
- break;
- base = memblock_align_down(res_base - size, align);
- }
+ /* align @size to avoid excessive fragmentation on reserved array */
+ size = round_up(size, align);
- return MEMBLOCK_ERROR;
-}
-
-static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size,
- phys_addr_t align, phys_addr_t start, phys_addr_t end)
-{
- long i;
-
- BUG_ON(0 == size);
-
- /* Pump up max_addr */
+ /* pump up @end */
if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
end = memblock.current_limit;
- /* We do a top-down search, this tends to limit memory
- * fragmentation by keeping early boot allocs near the
- * top of memory
- */
- for (i = memblock.memory.cnt - 1; i >= 0; i--) {
- phys_addr_t memblockbase = memblock.memory.regions[i].base;
- phys_addr_t memblocksize = memblock.memory.regions[i].size;
- phys_addr_t bottom, top, found;
+ /* adjust @start to avoid underflow and allocating the first page */
+ start = max3(start, size, (phys_addr_t)PAGE_SIZE);
+ end = max(start, end);
- if (memblocksize < size)
- continue;
- if ((memblockbase + memblocksize) <= start)
- break;
- bottom = max(memblockbase, start);
- top = min(memblockbase + memblocksize, end);
- if (bottom >= top)
- continue;
- found = memblock_find_region(bottom, top, size, align);
- if (found != MEMBLOCK_ERROR)
- return found;
+ for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) {
+ this_start = clamp(this_start, start, end);
+ this_end = clamp(this_end, start, end);
+
+ cand = round_down(this_end - size, align);
+ if (cand >= this_start)
+ return cand;
}
- return MEMBLOCK_ERROR;
+ return 0;
}
-/*
- * Find a free area with specified alignment in a specific range.
+/**
+ * memblock_find_in_range - find free area in given range
+ * @start: start of candidate range
+ * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
+ * @size: size of free area to find
+ * @align: alignment of free area to find
+ *
+ * Find @size free area aligned to @align in the specified range.
+ *
+ * RETURNS:
+ * Found address on success, %0 on failure.
*/
-u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 align)
+phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
+ phys_addr_t end, phys_addr_t size,
+ phys_addr_t align)
{
- return memblock_find_base(size, align, start, end);
+ return memblock_find_in_range_node(start, end, size, align,
+ MAX_NUMNODES);
}
/*
@@ -178,25 +167,21 @@ int __init_memblock memblock_reserve_reserved_regions(void)
static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
{
- unsigned long i;
-
- for (i = r; i < type->cnt - 1; i++) {
- type->regions[i].base = type->regions[i + 1].base;
- type->regions[i].size = type->regions[i + 1].size;
- }
+ type->total_size -= type->regions[r].size;
+ memmove(&type->regions[r], &type->regions[r + 1],
+ (type->cnt - (r + 1)) * sizeof(type->regions[r]));
type->cnt--;
/* Special case for empty arrays */
if (type->cnt == 0) {
+ WARN_ON(type->total_size != 0);
type->cnt = 1;
type->regions[0].base = 0;
type->regions[0].size = 0;
+ memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
}
}
-/* Defined below but needed now */
-static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size);
-
static int __init_memblock memblock_double_array(struct memblock_type *type)
{
struct memblock_region *new_array, *old_array;
@@ -226,10 +211,10 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
*/
if (use_slab) {
new_array = kmalloc(new_size, GFP_KERNEL);
- addr = new_array == NULL ? MEMBLOCK_ERROR : __pa(new_array);
+ addr = new_array ? __pa(new_array) : 0;
} else
- addr = memblock_find_base(new_size, sizeof(phys_addr_t), 0, MEMBLOCK_ALLOC_ACCESSIBLE);
- if (addr == MEMBLOCK_ERROR) {
+ addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t));
+ if (!addr) {
pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
memblock_type_name(type), type->max, type->max * 2);
return -1;
@@ -254,7 +239,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
return 0;
/* Add the new reserved region now. Should not fail ! */
- BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size));
+ BUG_ON(memblock_reserve(addr, new_size));
/* If the array wasn't our static init one, then free it. We only do
* that before SLAB is available as later on, we don't know whether
@@ -268,343 +253,514 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)
return 0;
}
-int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1,
- phys_addr_t addr2, phys_addr_t size2)
-{
- return 1;
-}
-
-static long __init_memblock memblock_add_region(struct memblock_type *type,
- phys_addr_t base, phys_addr_t size)
+/**
+ * memblock_merge_regions - merge neighboring compatible regions
+ * @type: memblock type to scan
+ *
+ * Scan @type and merge neighboring compatible regions.
+ */
+static void __init_memblock memblock_merge_regions(struct memblock_type *type)
{
- phys_addr_t end = base + size;
- int i, slot = -1;
-
- /* First try and coalesce this MEMBLOCK with others */
- for (i = 0; i < type->cnt; i++) {
- struct memblock_region *rgn = &type->regions[i];
- phys_addr_t rend = rgn->base + rgn->size;
+ int i = 0;
- /* Exit if there's no possible hits */