diff options
96 files changed, 2792 insertions, 1697 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 71c4da41344..a25cb3fafeb 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -144,9 +144,9 @@ Figure 1 shows the important aspects of the controller 3. Each page has a pointer to the page_cgroup, which in turn knows the cgroup it belongs to -The accounting is done as follows: mem_cgroup_charge() is invoked to set up -the necessary data structures and check if the cgroup that is being charged -is over its limit. If it is, then reclaim is invoked on the cgroup. +The accounting is done as follows: mem_cgroup_charge_common() is invoked to +set up the necessary data structures and check if the cgroup that is being +charged is over its limit. If it is, then reclaim is invoked on the cgroup. More details can be found in the reclaim section of this document. If everything goes well, a page meta-data-structure called page_cgroup is updated. page_cgroup has its own LRU on cgroup. diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index 6d0c2519cf4..c6f993d491b 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt @@ -161,7 +161,8 @@ a recent addition and not present on older kernels. in the memory block. 'state' : read-write at read: contains online/offline state of memory. - at write: user can specify "online", "offline" command + at write: user can specify "online_kernel", + "online_movable", "online", "offline" command which will be performed on al sections in the block. 'phys_device' : read-only: designed to show the name of physical memory device. This is not well implemented now. @@ -255,6 +256,17 @@ For onlining, you have to write "online" to the section's state file as: % echo online > /sys/devices/system/memory/memoryXXX/state +This onlining will not change the ZONE type of the target memory section, +If the memory section is in ZONE_NORMAL, you can change it to ZONE_MOVABLE: + +% echo online_movable > /sys/devices/system/memory/memoryXXX/state +(NOTE: current limit: this memory section must be adjacent to ZONE_MOVABLE) + +And if the memory section is in ZONE_MOVABLE, you can change it to ZONE_NORMAL: + +% echo online_kernel > /sys/devices/system/memory/memoryXXX/state +(NOTE: current limit: this memory section must be adjacent to ZONE_NORMAL) + After this, section memoryXXX's state will be 'online' and the amount of available memory will be increased. @@ -377,15 +389,18 @@ The third argument is passed by pointer of struct memory_notify. struct memory_notify { unsigned long start_pfn; unsigned long nr_pages; + int status_change_nid_normal; int status_change_nid; } start_pfn is start_pfn of online/offline memory. nr_pages is # of pages of online/offline memory. +status_change_nid_normal is set node id when N_NORMAL_MEMORY of nodemask +is (will be) set/clear, if this is -1, then nodemask status is not changed. status_change_nid is set node id when N_HIGH_MEMORY of nodemask is (will be) set/clear. It means a new(memoryless) node gets new memory by online and a node loses all memory. If this is -1, then nodemask status is not changed. -If status_changed_nid >= 0, callback should create/discard structures for the +If status_changed_nid* >= 0, callback should create/discard structures for the node if necessary. -------------- diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h index cbeb3616a28..0086b472bc2 100644 --- a/arch/alpha/include/asm/mman.h +++ b/arch/alpha/include/asm/mman.h @@ -63,4 +63,15 @@ /* compatibility flags */ #define MAP_FILE 0 +/* + * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. + * This gives us 6 bits, which is enough until someone invents 128 bit address + * spaces. + * + * Assume these are all power of twos. + * When 0 use the default page size. + */ +#define MAP_HUGE_SHIFT 26 +#define MAP_HUGE_MASK 0x3f + #endif /* __ALPHA_MMAN_H__ */ diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 89f2b7f7b04..10062ceadd1 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -11,18 +11,6 @@ #include <linux/random.h> #include <asm/cachetype.h> -static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr, - unsigned long pgoff) -{ - unsigned long base = addr & ~(SHMLBA-1); - unsigned long off = (pgoff << PAGE_SHIFT) & (SHMLBA-1); - - if (base + off <= addr) - return base + off; - - return base - off; -} - #define COLOUR_ALIGN(addr,pgoff) \ ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) @@ -69,9 +57,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; - unsigned long start_addr; int do_align = 0; int aliasing = cache_is_vipt_aliasing(); + struct vm_unmapped_area_info info; /* * We only need to do colour alignment if either the I or D @@ -104,46 +92,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, (!vma || addr + len <= vma->vm_start)) return addr; } - if (len > mm->cached_hole_size) { - start_addr = addr = mm->free_area_cache; - } else { - start_addr = addr = mm->mmap_base; - mm->cached_hole_size = 0; - } -full_search: - if (do_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); - - for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) { - /* - * Start a new search - just in case we missed - * some holes. - */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; - mm->cached_hole_size = 0; - goto full_search; - } - return -ENOMEM; - } - if (!vma || addr + len <= vma->vm_start) { - /* - * Remember the place where we stopped the search: - */ - mm->free_area_cache = addr + len; - return addr; - } - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - addr = vma->vm_end; - if (do_align) - addr = COLOUR_ALIGN(addr, pgoff); - } + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); } unsigned long @@ -156,6 +112,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, unsigned long addr = addr0; int do_align = 0; int aliasing = cache_is_vipt_aliasing(); + struct vm_unmapped_area_info info; /* * We only need to do colour alignment if either the I or D @@ -187,70 +144,27 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } - /* check if free_area_cache is useful for us */ - if (len <= mm->cached_hole_size) { - mm->cached_hole_size = 0; - mm->free_area_cache = mm->mmap_base; - } - - /* either no address requested or can't fit in requested address hole */ - addr = mm->free_area_cache; - if (do_align) { - unsigned long base = COLOUR_ALIGN_DOWN(addr - len, pgoff); - addr = base + len; - } - - /* make sure it can fit in the remaining address space */ - if (addr > len) { - vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) - /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr-len); - } - - if (mm->mmap_base < len) - goto bottomup; - - addr = mm->mmap_base - len; - if (do_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); - - do { - /* - * Lookup failure means no vma is above this address, - * else if new region fits below vma->vm_start, - * return with success: - */ - vma = find_vma(mm, addr); - if (!vma || addr+len <= vma->vm_start) - /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr); + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = PAGE_SIZE; + info.high_limit = mm->mmap_base; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + addr = vm_unmapped_area(&info); - /* remember the largest hole we saw so far */ - if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; - - /* try just below the current vma->vm_start */ - addr = vma->vm_start - len; - if (do_align) - addr = COLOUR_ALIGN_DOWN(addr, pgoff); - } while (len < vma->vm_start); - -bottomup: /* * A failed mmap() very likely causes application failure, * so fall back to the bottom-up function here. This scenario * can happen with large stack limits and large mmap() * allocations. */ - mm->cached_hole_size = ~0UL; - mm->free_area_cache = TASK_UNMAPPED_BASE; - addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); - /* - * Restore the topdown base: - */ - mm->free_area_cache = mm->mmap_base; - mm->cached_hole_size = ~0UL; + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } return addr; } diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig index 06e73bf665e..c2bbc9a7222 100644 --- a/arch/avr32/Kconfig +++ b/arch/avr32/Kconfig @@ -193,9 +193,6 @@ source "kernel/Kconfig.preempt" config QUICKLIST def_bool y -config HAVE_ARCH_BOOTMEM - def_bool n - config ARCH_HAVE_MEMORY_PRESENT def_bool n diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index 46d3da0d4b9..9a936ac9a94 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -87,4 +87,15 @@ /* compatibility flags */ #define MAP_FILE 0 +/* + * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. + * This gives us 6 bits, which is enough until someone invents 128 bit address + * spaces. + * + * Assume these are all power of twos. + * When 0 use the default page size. + */ +#define MAP_HUGE_SHIFT 26 +#define MAP_HUGE_MASK 0x3f + #endif /* _ASM_MMAN_H */ diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 302d779d5b0..d9be7540a6b 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -45,18 +45,6 @@ static unsigned long mmap_base(unsigned long rnd) return PAGE_ALIGN(TASK_SIZE - gap - rnd); } -static inline unsigned long COLOUR_ALIGN_DOWN(unsigned long addr, - unsigned long pgoff) -{ - unsigned long base = addr & ~shm_align_mask; - unsigned long off = (pgoff << PAGE_SHIFT) & shm_align_mask; - - if (base + off <= addr) - return base + off; - - return base - off; -} - #define COLOUR_ALIGN(addr, pgoff) \ ((((addr) + shm_align_mask) & ~shm_align_mask) + \ (((pgoff) << PAGE_SHIFT) & shm_align_mask)) @@ -71,6 +59,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, struct vm_area_struct *vma; unsigned long addr = addr0; int do_color_align; + struct vm_unmapped_area_info info; if (unlikely(len > TASK_SIZE)) return -ENOMEM; @@ -107,97 +96,31 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, return addr; } - if (dir == UP) { - addr = mm->mmap_base; - if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); - else - addr = PAGE_ALIGN(addr); + info.length = len; + info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0; + info.align_offset = pgoff << PAGE_SHIFT; - for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) { - /* At this point: (!vma || addr < vma->vm_end). */ - if (TASK_SIZE - len < addr) - return -ENOMEM; - if (!vma || addr + len <= vma->vm_start) - return addr; - addr = vma->vm_end; - if (do_color_align) - addr = COLOUR_ALIGN(addr, pgoff); - } - } else { - /* check if free_area_cache i |