diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 6 | ||||
-rw-r--r-- | mm/Kconfig.debug | 5 | ||||
-rw-r--r-- | mm/backing-dev.c | 16 | ||||
-rw-r--r-- | mm/bootmem.c | 24 | ||||
-rw-r--r-- | mm/compaction.c | 14 | ||||
-rw-r--r-- | mm/fadvise.c | 3 | ||||
-rw-r--r-- | mm/failslab.c | 2 | ||||
-rw-r--r-- | mm/filemap.c | 20 | ||||
-rw-r--r-- | mm/huge_memory.c | 16 | ||||
-rw-r--r-- | mm/hugetlb.c | 58 | ||||
-rw-r--r-- | mm/memblock.c | 961 | ||||
-rw-r--r-- | mm/memcontrol.c | 128 | ||||
-rw-r--r-- | mm/mempolicy.c | 25 | ||||
-rw-r--r-- | mm/mempool.c | 104 | ||||
-rw-r--r-- | mm/migrate.c | 16 | ||||
-rw-r--r-- | mm/mmap.c | 60 | ||||
-rw-r--r-- | mm/mremap.c | 9 | ||||
-rw-r--r-- | mm/nobootmem.c | 45 | ||||
-rw-r--r-- | mm/nommu.c | 2 | ||||
-rw-r--r-- | mm/oom_kill.c | 15 | ||||
-rw-r--r-- | mm/page-writeback.c | 591 | ||||
-rw-r--r-- | mm/page_alloc.c | 773 | ||||
-rw-r--r-- | mm/percpu-vm.c | 17 | ||||
-rw-r--r-- | mm/percpu.c | 68 | ||||
-rw-r--r-- | mm/rmap.c | 45 | ||||
-rw-r--r-- | mm/shmem.c | 17 | ||||
-rw-r--r-- | mm/slab.c | 46 | ||||
-rw-r--r-- | mm/slub.c | 128 | ||||
-rw-r--r-- | mm/swap.c | 14 | ||||
-rw-r--r-- | mm/swap_state.c | 1 | ||||
-rw-r--r-- | mm/swapfile.c | 6 | ||||
-rw-r--r-- | mm/vmalloc.c | 68 | ||||
-rw-r--r-- | mm/vmscan.c | 82 |
33 files changed, 1905 insertions, 1480 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 011b110365c..e338407f122 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -131,6 +131,12 @@ config SPARSEMEM_VMEMMAP config HAVE_MEMBLOCK boolean +config HAVE_MEMBLOCK_NODE_MAP + boolean + +config ARCH_DISCARD_MEMBLOCK + boolean + config NO_BOOTMEM boolean diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 8b1a477162d..4b2443254de 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -4,6 +4,7 @@ config DEBUG_PAGEALLOC depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC depends on !KMEMCHECK select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC + select PAGE_GUARD if ARCH_SUPPORTS_DEBUG_PAGEALLOC ---help--- Unmap pages from the kernel linear mapping after free_pages(). This results in a large slowdown, but helps to find certain types @@ -22,3 +23,7 @@ config WANT_PAGE_DEBUG_FLAGS config PAGE_POISONING bool select WANT_PAGE_DEBUG_FLAGS + +config PAGE_GUARD + bool + select WANT_PAGE_DEBUG_FLAGS diff --git a/mm/backing-dev.c b/mm/backing-dev.c index a0860640378..7ba8feae11b 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -600,14 +600,10 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) /* * Finally, kill the kernel thread. We don't need to be RCU - * safe anymore, since the bdi is gone from visibility. Force - * unfreeze of the thread before calling kthread_stop(), otherwise - * it would never exet if it is currently stuck in the refrigerator. + * safe anymore, since the bdi is gone from visibility. */ - if (bdi->wb.task) { - thaw_process(bdi->wb.task); + if (bdi->wb.task) kthread_stop(bdi->wb.task); - } } /* @@ -724,6 +720,14 @@ void bdi_destroy(struct backing_dev_info *bdi) bdi_unregister(bdi); + /* + * If bdi_unregister() had already been called earlier, the + * wakeup_timer could still be armed because bdi_prune_sb() + * can race with the bdi_wakeup_thread_delayed() calls from + * __mark_inode_dirty(). + */ + del_timer_sync(&bdi->wb.wakeup_timer); + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) percpu_counter_destroy(&bdi->bdi_stat[i]); diff --git a/mm/bootmem.c b/mm/bootmem.c index 1a77012ecdb..668e94df8cf 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -56,7 +56,7 @@ early_param("bootmem_debug", bootmem_debug_setup); static unsigned long __init bootmap_bytes(unsigned long pages) { - unsigned long bytes = (pages + 7) / 8; + unsigned long bytes = DIV_ROUND_UP(pages, 8); return ALIGN(bytes, sizeof(long)); } @@ -171,7 +171,6 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size) static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) { - int aligned; struct page *page; unsigned long start, end, pages, count = 0; @@ -181,14 +180,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) start = bdata->node_min_pfn; end = bdata->node_low_pfn; - /* - * If the start is aligned to the machines wordsize, we might - * be able to free pages in bulks of that order. - */ - aligned = !(start & (BITS_PER_LONG - 1)); - - bdebug("nid=%td start=%lx end=%lx aligned=%d\n", - bdata - bootmem_node_data, start, end, aligned); + bdebug("nid=%td start=%lx end=%lx\n", + bdata - bootmem_node_data, start, end); while (start < end) { unsigned long *map, idx, vec; @@ -196,12 +189,17 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) map = bdata->node_bootmem_map; idx = start - bdata->node_min_pfn; vec = ~map[idx / BITS_PER_LONG]; - - if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) { + /* + * If we have a properly aligned and fully unreserved + * BITS_PER_LONG block of pages in front of us, free + * it in one go. + */ + if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) { int order = ilog2(BITS_PER_LONG); __free_pages_bootmem(pfn_to_page(start), order); count += BITS_PER_LONG; + start += BITS_PER_LONG; } else { unsigned long off = 0; @@ -214,8 +212,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) vec >>= 1; off++; } + start = ALIGN(start + 1, BITS_PER_LONG); } - start += BITS_PER_LONG; } page = virt_to_page(bdata->node_bootmem_map); diff --git a/mm/compaction.c b/mm/compaction.c index 899d9563858..e6670c34eb4 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -365,8 +365,10 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, nr_isolated++; /* Avoid isolating too much */ - if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) + if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) { + ++low_pfn; break; + } } acct_isolated(zone, cc); @@ -721,23 +723,23 @@ int sysctl_extfrag_handler(struct ctl_table *table, int write, } #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA) -ssize_t sysfs_compact_node(struct sys_device *dev, - struct sysdev_attribute *attr, +ssize_t sysfs_compact_node(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) { compact_node(dev->id); return count; } -static SYSDEV_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node); +static DEVICE_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node); int compaction_register_node(struct node *node) { - return sysdev_create_file(&node->sysdev, &attr_compact); + return device_create_file(&node->dev, &dev_attr_compact); } void compaction_unregister_node(struct node *node) { - return sysdev_remove_file(&node->sysdev, &attr_compact); + return device_remove_file(&node->dev, &dev_attr_compact); } #endif /* CONFIG_SYSFS && CONFIG_NUMA */ diff --git a/mm/fadvise.c b/mm/fadvise.c index 8d723c9e8b7..469491e0af7 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -117,7 +117,8 @@ SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) break; case POSIX_FADV_DONTNEED: if (!bdi_write_congested(mapping->backing_dev_info)) - filemap_flush(mapping); + __filemap_fdatawrite_range(mapping, offset, endbyte, + WB_SYNC_NONE); /* First and last FULL page! */ start_index = (offset+(PAGE_CACHE_SIZE-1)) >> PAGE_CACHE_SHIFT; diff --git a/mm/failslab.c b/mm/failslab.c index 0dd7b8fec71..fefaabaab76 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -35,7 +35,7 @@ __setup("failslab=", setup_failslab); static int __init failslab_debugfs_init(void) { struct dentry *dir; - mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; dir = fault_create_debugfs_attr("failslab", NULL, &failslab.attr); if (IS_ERR(dir)) diff --git a/mm/filemap.c b/mm/filemap.c index c0018f2d50e..c4ee2e918be 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1828,7 +1828,7 @@ repeat: page = __page_cache_alloc(gfp | __GFP_COLD); if (!page) return ERR_PTR(-ENOMEM); - err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); + err = add_to_page_cache_lru(page, mapping, index, gfp); if (unlikely(err)) { page_cache_release(page); if (err == -EEXIST) @@ -1925,10 +1925,7 @@ static struct page *wait_on_page_read(struct page *page) * @gfp: the page allocator flags to use if allocating * * This is the same as "read_mapping_page(mapping, index, NULL)", but with - * any new page allocations done using the specified allocation flags. Note - * that the Radix tree operations will still use GFP_KERNEL, so you can't - * expect to do this atomically or anything like that - but you can pass in - * other page requirements. + * any new page allocations done using the specified allocation flags. * * If the page does not get brought uptodate, return -EIO. */ @@ -1971,7 +1968,7 @@ EXPORT_SYMBOL(read_cache_page); */ int should_remove_suid(struct dentry *dentry) { - mode_t mode = dentry->d_inode->i_mode; + umode_t mode = dentry->d_inode->i_mode; int kill = 0; /* suid always must be killed */ @@ -2354,8 +2351,11 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index, unsigned flags) { int status; + gfp_t gfp_mask; struct page *page; gfp_t gfp_notmask = 0; + + gfp_mask = mapping_gfp_mask(mapping) | __GFP_WRITE; if (flags & AOP_FLAG_NOFS) gfp_notmask = __GFP_FS; repeat: @@ -2363,7 +2363,7 @@ repeat: if (page) goto found; - page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask); + page = __page_cache_alloc(gfp_mask & ~gfp_notmask); if (!page) return NULL; status = add_to_page_cache_lru(page, mapping, index, @@ -2407,7 +2407,6 @@ static ssize_t generic_perform_write(struct file *file, iov_iter_count(i)); again: - /* * Bring in the user page that we will copy from _first_. * Otherwise there's a nasty deadlock on copying from the @@ -2463,7 +2462,10 @@ again: written += copied; balance_dirty_pages_ratelimited(mapping); - + if (fatal_signal_pending(current)) { + status = -EINTR; + break; + } } while (iov_iter_count(i)); return written ? written : status; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4298abaae15..36b3d988b4e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2259,12 +2259,8 @@ static void khugepaged_do_scan(struct page **hpage) static void khugepaged_alloc_sleep(void) { - DEFINE_WAIT(wait); - add_wait_queue(&khugepaged_wait, &wait); - schedule_timeout_interruptible( - msecs_to_jiffies( - khugepaged_alloc_sleep_millisecs)); - remove_wait_queue(&khugepaged_wait, &wait); + wait_event_freezable_timeout(khugepaged_wait, false, + msecs_to_jiffies(khugepaged_alloc_sleep_millisecs)); } #ifndef CONFIG_NUMA @@ -2313,14 +2309,10 @@ static void khugepaged_loop(void) if (unlikely(kthread_should_stop())) break; if (khugepaged_has_work()) { - DEFINE_WAIT(wait); if (!khugepaged_scan_sleep_millisecs) continue; - add_wait_queue(&khugepaged_wait, &wait); - schedule_timeout_interruptible( - msecs_to_jiffies( - khugepaged_scan_sleep_millisecs)); - remove_wait_queue(&khugepaged_wait, &wait); + wait_event_freezable_timeout(khugepaged_wait, false, + msecs_to_jiffies(khugepaged_scan_sleep_millisecs)); } else if (khugepaged_enabled()) wait_event_freezable(khugepaged_wait, khugepaged_wait_event()); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index dae27ba3be2..ea8c3a4cd2a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -576,6 +576,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order) __SetPageHead(page); for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) { __SetPageTail(p); + set_page_count(p, 0); p->first_page = page; } } @@ -799,7 +800,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) if (page && arch_prepare_hugepage(page)) { __free_pages(page, huge_page_order(h)); - return NULL; + page = NULL; } spin_lock(&hugetlb_lock); @@ -900,7 +901,6 @@ retry: h->resv_huge_pages += delta; ret = 0; - spin_unlock(&hugetlb_lock); /* Free the needed pages to the hugetlb pool */ list_for_each_entry_safe(page, tmp, &surplus_list, lru) { if ((--needed) < 0) @@ -914,6 +914,7 @@ retry: VM_BUG_ON(page_count(page)); enqueue_huge_page(h, page); } + spin_unlock(&hugetlb_lock); /* Free unnecessary surplus pages to the buddy allocator */ free: @@ -1591,9 +1592,9 @@ static void __init hugetlb_sysfs_init(void) /* * node_hstate/s - associate per node hstate attributes, via their kobjects, - * with node sysdevs in node_devices[] using a parallel array. The array - * index of a node sysdev or _hstate == node id. - * This is here to avoid any static dependency of the node sysdev driver, in + * with node devices in node_devices[] using a parallel array. The array + * index of a node device or _hstate == node id. + * This is here to avoid any static dependency of the node device driver, in * the base kernel, on the hugetlb module. */ struct node_hstate { @@ -1603,7 +1604,7 @@ struct node_hstate { struct node_hstate node_hstates[MAX_NUMNODES]; /* - * A subset of global hstate attributes for node sysdevs + * A subset of global hstate attributes for node devices */ static struct attribute *per_node_hstate_attrs[] = { &nr_hugepages_attr.attr, @@ -1617,7 +1618,7 @@ static struct attribute_group per_node_hstate_attr_group = { }; /* - * kobj_to_node_hstate - lookup global hstate for node sysdev hstate attr kobj. + * kobj_to_node_hstate - lookup global hstate for node device hstate attr kobj. * Returns node id via non-NULL nidp. */ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp) @@ -1640,13 +1641,13 @@ static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp) } /* - * Unregister hstate attributes from a single node sysdev. + * Unregister hstate attributes from a single node device. * No-op if no hstate attributes attached. */ void hugetlb_unregister_node(struct node *node) { struct hstate *h; - struct node_hstate *nhs = &node_hstates[node->sysdev.id]; + struct node_hstate *nhs = &node_hstates[node->dev.id]; if (!nhs->hugepages_kobj) return; /* no hstate attributes */ @@ -1662,7 +1663,7 @@ void hugetlb_unregister_node(struct node *node) } /* - * hugetlb module exit: unregister hstate attributes from node sysdevs + * hugetlb module exit: unregister hstate attributes from node devices * that have them. */ static void hugetlb_unregister_all_nodes(void) @@ -1670,7 +1671,7 @@ static void hugetlb_unregister_all_nodes(void) int nid; /* - * disable node sysdev registrations. + * disable node device registrations. */ register_hugetlbfs_with_node(NULL, NULL); @@ -1682,20 +1683,20 @@ static void hugetlb_unregister_all_nodes(void) } /* - * Register hstate attributes for a single node sysdev. + * Register hstate attributes for a single node device. * No-op if attributes already registered. */ void hugetlb_register_node(struct node *node) { struct hstate *h; - struct node_hstate *nhs = &node_hstates[node->sysdev.id]; + struct node_hstate *nhs = &node_hstates[node->dev.id]; int err; if (nhs->hugepages_kobj) return; /* already allocated */ nhs->hugepages_kobj = kobject_create_and_add("hugepages", - &node->sysdev.kobj); + &node->dev.kobj); if (!nhs->hugepages_kobj) return; @@ -1706,7 +1707,7 @@ void hugetlb_register_node(struct node *node) if (err) { printk(KERN_ERR "Hugetlb: Unable to add hstate %s" " for node %d\n", - h->name, node->sysdev.id); + h->name, node->dev.id); hugetlb_unregister_node(node); break; } @@ -1715,8 +1716,8 @@ void hugetlb_register_node(struct node *node) /* * hugetlb init time: register hstate attributes for all registered node - * sysdevs of nodes that have memory. All on-line nodes should have - * registered their associated sysdev by this time. + * devices of nodes that have memory. All on-line nodes should have + * registered their associated device by this time. */ static void hugetlb_register_all_nodes(void) { @@ -1724,12 +1725,12 @@ static void hugetlb_register_all_nodes(void) for_each_node_state(nid, N_HIGH_MEMORY) { struct node *node = &node_devices[nid]; - if (node->sysdev.id == nid) + if (node->dev.id == nid) hugetlb_register_node(node); } /* - * Let the node sysdev driver know we're here so it can + * Let the node device driver know we're here so it can * [un]register hstate attributes on node hotplug. */ register_hugetlbfs_with_node(hugetlb_register_node, @@ -2314,8 +2315,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * from page cache lookup which is in HPAGE_SIZE units. */ address = address & huge_page_mask(h); - pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) - + (vma->vm_pgoff >> PAGE_SHIFT); + pgoff = vma_hugecache_offset(h, vma, address); mapping = (struct address_space *)page_private(page); /* @@ -2348,6 +2348,9 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, /* * Hugetlb_cow() should be called with page lock of the original hugepage held. + * Called with hugetlb_instantiation_mutex held and pte_page locked so we + * cannot race with other handlers or page migration. + * Keep the pte_same checks anyway to make transition from the mutex easier. */ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t pte, @@ -2407,7 +2410,14 @@ retry_avoidcopy: BUG_ON(page_count(old_page) != 1); BUG_ON(huge_pte_none(pte)); spin_lock(&mm->page_table_lock); - goto retry_avoidcopy; + ptep = huge_pte_offset(mm, address & huge_page_mask(h)); + if (likely(pte_same(huge_ptep_get(ptep), pte))) + goto retry_avoidcopy; + /* + * race occurs while re-acquiring page_table_lock, and + * our job is done. + */ + return 0; } WARN_ON_ONCE(1); } @@ -2422,6 +2432,8 @@ retry_avoidcopy: * anon_vma prepared. */ if (unlikely(anon_vma_prepare(vma))) { + page_cache_release(new_page); + page_cache_release(old_page); /* Caller expects lock to be held */ spin_lock(&mm->page_table_lock); return VM_FAULT_OOM; @@ -2627,6 +2639,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, static DEFINE_MUTEX(hugetlb_instantiation_mutex); struct hstate *h = hstate_vma(vma); + address &= huge_page_mask(h); + ptep = huge_pte_offset(mm, address); if (ptep) { entry = huge_ptep_get(ptep); diff --git a/mm/memblock.c b/mm/memblock.c index 84bec4969ed..2f55f19b7c8 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,12 +20,23 @@ #include <linux/seq_file.h> #include <linux/memblock.h> -struct memblock memblock __initdata_memblock; +static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; + +struct memblock memblock __initdata_memblock = { + .memory.regions = memblock_memory_init_regions, + .memory.cnt = 1, /* empty dummy entry */ + .memory.max = INIT_MEMBLOCK_REGIONS, + + .reserved.regions = memblock_reserved_init_regions, + .reserved.cnt = 1, /* empty dummy entry */ + .reserved.max = INIT_MEMBLOCK_REGIONS, + + .current_limit = MEMBLOCK_ALLOC_ANYWHERE, +}; int memblock_debug __initdata_memblock; -int memblock_can_resize __initdata_memblock; -static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock; -static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock; +static int memblock_can_resize __initdata_memblock; /* inline so we don't get a warning when pr_debug is compiled out */ static inline const char *memblock_type_name(struct memblock_type *type) @@ -38,20 +49,15 @@ static inline const char *memblock_type_name(struct memblock_type *type) return "unknown"; } -/* - * Address comparison utilities - */ - -static phys_addr_t __init_memblock memblock_align_down(phys_addr_t addr, phys_addr_t size) -{ - return addr & ~(size - 1); -} - -static phys_addr_t __init_memblock memblock_align_up(phys_addr_t addr, phys_addr_t size) +/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ +static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) { - return (addr + (size - 1)) & ~(size - 1); + return *size = min(*size, (phys_addr_t)ULLONG_MAX - base); } +/* + * Address comparison utilities + */ static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, phys_addr_t size2) { @@ -73,83 +79,66 @@ static long __init_memblock memblock_overlaps_region(struct memblock_type *type, return (i < type->cnt) ? i : -1; } -/* - * Find, allocate, deallocate or reserve unreserved regions. All allocations - * are top-down. +/** + * memblock_find_in_range_node - find free area in given range and node + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} + * @size: size of free area to find + * @align: alignment of free area to find + * @nid: nid of the free area to find, %MAX_NUMNODES for any node + * + * Find @size free area aligned to @align in the specified range and node. + * + * RETURNS: + * Found address on success, %0 on failure. */ - -static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_addr_t end, - phys_addr_t size, phys_addr_t align) +phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t start, + phys_addr_t end, phys_addr_t size, + phys_addr_t align, int nid) { - phys_addr_t base, res_base; - long j; - - /* In case, huge size is requested */ - if (end < size) - return MEMBLOCK_ERROR; - - base = memblock_align_down((end - size), align); + phys_addr_t this_start, this_end, cand; + u64 i; - /* Prevent allocations returning 0 as it's also used to - * indicate an allocation failure - */ - if (start == 0) - start = PAGE_SIZE; - - while (start <= base) { - j = memblock_overlaps_region(&memblock.reserved, base, size); - if (j < 0) - return base; - res_base = memblock.reserved.regions[j].base; - if (res_base < size) - break; - base = memblock_align_down(res_base - size, align); - } + /* align @size to avoid excessive fragmentation on reserved array */ + size = round_up(size, align); - return MEMBLOCK_ERROR; -} - -static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size, - phys_addr_t align, phys_addr_t start, phys_addr_t end) -{ - long i; - - BUG_ON(0 == size); - - /* Pump up max_addr */ + /* pump up @end */ if (end == MEMBLOCK_ALLOC_ACCESSIBLE) end = memblock.current_limit; - /* We do a top-down search, this tends to limit memory - * fragmentation by keeping early boot allocs near the - * top of memory - */ - for (i = memblock.memory.cnt - 1; i >= 0; i--) { - phys_addr_t memblockbase = memblock.memory.regions[i].base; - phys_addr_t memblocksize = memblock.memory.regions[i].size; - phys_addr_t bottom, top, found; + /* adjust @start to avoid underflow and allocating the first page */ + start = max3(start, size, (phys_addr_t)PAGE_SIZE); + end = max(start, end); - if (memblocksize < size) - continue; - if ((memblockbase + memblocksize) <= start) - break; - bottom = max(memblockbase, start); - top = min(memblockbase + memblocksize, end); - if (bottom >= top) - continue; - found = memblock_find_region(bottom, top, size, align); - if (found != MEMBLOCK_ERROR) - return found; + for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) { + this_start = clamp(this_start, start, end); + this_end = clamp(this_end, start, end); + + cand = round_down(this_end - size, align); + if (cand >= this_start) + return cand; } - return MEMBLOCK_ERROR; + return 0; } -/* - * Find a free area with specified alignment in a specific range. +/** + * memblock_find_in_range - find free area in given range + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} + * @size: size of free area to find + * @align: alignment of free area to find + * + * Find @size free area aligned to @align in the specified range. + * + * RETURNS: + * Found address on success, %0 on failure. */ -u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 align) +phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, + phys_addr_t end, phys_addr_t size, + phys_addr_t align) { - return memblock_find_base(size, align, start, end); + return memblock_find_in_range_node(start, end, size, align, + MAX_NUMNODES); } /* @@ -178,25 +167,21 @@ int __init_memblock memblock_reserve_reserved_regions(void) static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) { - unsigned long i; - - for (i = r; i < type->cnt - 1; i++) { - type->regions[i].base = type->regions[i + 1].base; - type->regions[i].size = type->regions[i + 1].size; - } + type->total_size -= type->regions[r].size; + memmove(&type->regions[r], &type->regions[r + 1], + (type->cnt - (r + 1)) * sizeof(type->regions[r])); type->cnt--; /* Special case for empty arrays */ if (type->cnt == 0) { + WARN_ON(type->total_size != 0); type->cnt = 1; type->regions[0].base = 0; type->regions[0].size = 0; + memblock_set_region_node(&type->regions[0], MAX_NUMNODES); } } -/* Defined below but needed now */ -static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); - static int __init_memblock memblock_double_array(struct memblock_type *type) { struct memblock_region *new_array, *old_array; @@ -226,10 +211,10 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) */ if (use_slab) { new_array = kmalloc(new_size, GFP_KERNEL); - addr = new_array == NULL ? MEMBLOCK_ERROR : __pa(new_array); + addr = new_array ? __pa(new_array) : 0; } else - addr = memblock_find_base(new_size, sizeof(phys_addr_t), 0, MEMBLOCK_ALLOC_ACCESSIBLE); - if (addr == MEMBLOCK_ERROR) { + addr = memblock_find_in_range(0, MEMBLOCK_ALLOC_ACCESSIBLE, new_size, sizeof(phys_addr_t)); + if (!addr) { pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", memblock_type_name(type), type->max, type->max * 2); return -1; @@ -254,7 +239,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) return 0; /* Add the new reserved region now. Should not fail ! */ - BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size)); + BUG_ON(memblock_reserve(addr, new_size)); /* If the array wasn't our static init one, then free it. We only do * that before SLAB is available as later on, we don't know whether @@ -268,343 +253,514 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) return 0; } -int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, - phys_addr_t addr2, phys_addr_t size2) -{ - return 1; -} - -static long __init_memblock memblock_add_region(struct memblock_type *type, - phys_addr_t base, phys_addr_t size) +/** + * memblock_merge_regions - merge neighboring compatible regions + * @type: memblock type to scan + * + * Scan @type and merge neighboring compatible regions. + */ +static void __init_memblock memblock_merge_regions(struct memblock_type *type) { - phys_addr_t end = base + size; - int i, slot = -1; - - /* First try and coalesce this MEMBLOCK with others */ - for (i = 0; i < type->cnt; i++) { - struct memblock_region *rgn = &type->regions[i]; - phys_addr_t rend = rgn->base + rgn->size; + int i = 0; - /* Exit if there's no possible hits */ |