diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-22 09:04:48 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-22 09:04:48 -0700 |
commit | 95211279c5ad00a317c98221d7e4365e02f20836 (patch) | |
tree | 2ddc8625378d2915b8c96392f3cf6663b705ed55 | |
parent | 5375871d432ae9fc581014ac117b96aaee3cd0c7 (diff) | |
parent | 12724850e8064f64b6223d26d78c0597c742c65a (diff) |
Merge branch 'akpm' (Andrew's patch-bomb)
Merge first batch of patches from Andrew Morton:
"A few misc things and all the MM queue"
* emailed from Andrew Morton <akpm@linux-foundation.org>: (92 commits)
memcg: avoid THP split in task migration
thp: add HPAGE_PMD_* definitions for !CONFIG_TRANSPARENT_HUGEPAGE
memcg: clean up existing move charge code
mm/memcontrol.c: remove unnecessary 'break' in mem_cgroup_read()
mm/memcontrol.c: remove redundant BUG_ON() in mem_cgroup_usage_unregister_event()
mm/memcontrol.c: s/stealed/stolen/
memcg: fix performance of mem_cgroup_begin_update_page_stat()
memcg: remove PCG_FILE_MAPPED
memcg: use new logic for page stat accounting
memcg: remove PCG_MOVE_LOCK flag from page_cgroup
memcg: simplify move_account() check
memcg: remove EXPORT_SYMBOL(mem_cgroup_update_page_stat)
memcg: kill dead prev_priority stubs
memcg: remove PCG_CACHE page_cgroup flag
memcg: let css_get_next() rely upon rcu_read_lock()
cgroup: revert ss_id_lock to spinlock
idr: make idr_get_next() good for rcu_read_lock()
memcg: remove unnecessary thp check in page stat accounting
memcg: remove redundant returns
memcg: enum lru_list lru
...
77 files changed, 1902 insertions, 1235 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index a76a26a1db8..b7413cb46dc 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -290,7 +290,7 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7) rsslim current limit in bytes on the rss start_code address above which program text can run end_code address below which program text can run - start_stack address of the start of the stack + start_stack address of the start of the main process stack esp current value of ESP eip current value of EIP pending bitmap of pending signals @@ -325,7 +325,7 @@ address perms offset dev inode pathname a7cb1000-a7cb2000 ---p 00000000 00:00 0 a7cb2000-a7eb2000 rw-p 00000000 00:00 0 a7eb2000-a7eb3000 ---p 00000000 00:00 0 -a7eb3000-a7ed5000 rw-p 00000000 00:00 0 +a7eb3000-a7ed5000 rw-p 00000000 00:00 0 [stack:1001] a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6 a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6 @@ -357,11 +357,39 @@ is not associated with a file: [heap] = the heap of the program [stack] = the stack of the main process + [stack:1001] = the stack of the thread with tid 1001 [vdso] = the "virtual dynamic shared object", the kernel system call handler or if empty, the mapping is anonymous. +The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint +of the individual tasks of a process. In this file you will see a mapping marked +as [stack] if that task sees it as a stack. This is a key difference from the +content of /proc/PID/maps, where you will see all mappings that are being used +as stack by all of those tasks. Hence, for the example above, the task-level +map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this: + +08048000-08049000 r-xp 00000000 03:00 8312 /opt/test +08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test +0804a000-0806b000 rw-p 00000000 00:00 0 [heap] +a7cb1000-a7cb2000 ---p 00000000 00:00 0 +a7cb2000-a7eb2000 rw-p 00000000 00:00 0 +a7eb2000-a7eb3000 ---p 00000000 00:00 0 +a7eb3000-a7ed5000 rw-p 00000000 00:00 0 [stack] +a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 +a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6 +a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6 +a800b000-a800e000 rw-p 00000000 00:00 0 +a800e000-a8022000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0 +a8022000-a8023000 r--p 00013000 03:00 14462 /lib/libpthread.so.0 +a8023000-a8024000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0 +a8024000-a8027000 rw-p 00000000 00:00 0 +a8027000-a8043000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2 +a8043000-a8044000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2 +a8044000-a8045000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2 +aff35000-aff4a000 rw-p 00000000 00:00 0 +ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso] The /proc/PID/smaps is an extension based on maps, showing the memory consumption for each of the process's mappings. For each of mappings there diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8cadb7551fc..7986d79d9d1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2635,6 +2635,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. to facilitate early boot debugging. See also Documentation/trace/events.txt + transparent_hugepage= + [KNL] + Format: [always|madvise|never] + Can be used to control the default behavior of the system + with respect to transparent hugepages. + See Documentation/vm/transhuge.txt for more details. + tsc= Disable clocksource stability checks for TSC. Format: <string> [x86] reliable: mark tsc clocksource as reliable, this diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c index 7445caa26d0..0b13f02d405 100644 --- a/Documentation/vm/page-types.c +++ b/Documentation/vm/page-types.c @@ -98,6 +98,7 @@ #define KPF_HWPOISON 19 #define KPF_NOPAGE 20 #define KPF_KSM 21 +#define KPF_THP 22 /* [32-] kernel hacking assistances */ #define KPF_RESERVED 32 @@ -147,6 +148,7 @@ static const char *page_flag_names[] = { [KPF_HWPOISON] = "X:hwpoison", [KPF_NOPAGE] = "n:nopage", [KPF_KSM] = "x:ksm", + [KPF_THP] = "t:thp", [KPF_RESERVED] = "r:reserved", [KPF_MLOCKED] = "m:mlocked", diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt index df09b9650a8..4600cbe3d6b 100644 --- a/Documentation/vm/pagemap.txt +++ b/Documentation/vm/pagemap.txt @@ -60,6 +60,7 @@ There are three components to pagemap: 19. HWPOISON 20. NOPAGE 21. KSM + 22. THP Short descriptions to the page flags: @@ -97,6 +98,9 @@ Short descriptions to the page flags: 21. KSM identical memory pages dynamically shared between one or more processes +22. THP + contiguous pages which construct transparent hugepages + [IO related page flags] 1. ERROR IO error occurred 3. UPTODATE page has up-to-date data diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 023b8860dc9..c8f5b50db89 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -776,7 +776,6 @@ static inline int handle_signal32(unsigned long signr, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { - sigset_t blocked; int err; if (ka->sa.sa_flags & SA_SIGINFO) @@ -787,11 +786,7 @@ static inline int handle_signal32(unsigned long signr, struct k_sigaction *ka, if (err) return err; - sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NOMASK)) - sigaddset(&blocked, signr); - set_current_blocked(&blocked); - + block_sigmask(ka, signr); tracehook_signal_handler(signr, info, ka, regs, 0); return 0; diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index d54c6e53aba..7bb71b6fbd2 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -465,7 +465,6 @@ static inline int handle_signal(unsigned long signr, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { - sigset_t blocked; int err; if (ka->sa.sa_flags & SA_SIGINFO) @@ -476,11 +475,7 @@ handle_signal(unsigned long signr, struct k_sigaction *ka, if (err) return err; - sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NOMASK)) - sigaddset(&blocked, signr); - set_current_blocked(&blocked); - + block_sigmask(ka, signr); tracehook_signal_handler(signr, info, ka, regs, 0); return 0; diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index f0836cd0e2f..d8a67e60be8 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -479,18 +479,14 @@ static inline int handle_signal(unsigned long signr, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { - sigset_t blocked; int err; err = setup_rt_frame(ka, regs, signr, oldset, (ka->sa.sa_flags & SA_SIGINFO) ? info : NULL); if (err) return err; - sigorsets(&blocked, ¤t->blocked, &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NOMASK)) - sigaddset(&blocked, signr); - set_current_blocked(&blocked); + block_sigmask(ka, signr); tracehook_signal_handler(signr, info, ka, regs, 0); return 0; diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 051489082d5..ef59642ff1b 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -195,7 +195,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; - unsigned long addr = addr0; + unsigned long addr = addr0, start_addr; /* requested length too big for entire address space */ if (len > TASK_SIZE) @@ -223,25 +223,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, mm->free_area_cache = mm->mmap_base; } +try_again: /* either no address requested or can't fit in requested address hole */ - addr = mm->free_area_cache; - - /* make sure it can fit in the remaining address space */ - if (addr > len) { - unsigned long tmp_addr = align_addr(addr - len, filp, - ALIGN_TOPDOWN); - - vma = find_vma(mm, tmp_addr); - if (!vma || tmp_addr + len <= vma->vm_start) - /* remember the address as a hint for next time */ - return mm->free_area_cache = tmp_addr; - } - - if (mm->mmap_base < len) - goto bottomup; + start_addr = addr = mm->free_area_cache; - addr = mm->mmap_base-len; + if (addr < len) + goto fail; + addr -= len; do { addr = align_addr(addr, filp, ALIGN_TOPDOWN); @@ -263,6 +252,17 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = vma->vm_start-len; } while (len < vma->vm_start); +fail: + /* + * if hint left us with no space for the requested + * mapping then try again: + */ + if (start_addr != mm->mmap_base) { + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = 0; + goto try_again; + } + bottomup: /* * A failed mmap() very likely causes application failure, diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index b466cab5ba1..328cb37bb82 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -172,6 +172,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) spinlock_t *ptl; int i; + down_write(&mm->mmap_sem); pgd = pgd_offset(mm, 0xA0000); if (pgd_none_or_clear_bad(pgd)) goto out; @@ -190,6 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) } pte_unmap_unlock(pte, ptl); out: + up_write(&mm->mmap_sem); flush_tlb(); } diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 8ecbb4bba4b..f6679a7fb8c 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -308,10 +308,11 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, { struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; - struct vm_area_struct *vma, *prev_vma; - unsigned long base = mm->mmap_base, addr = addr0; + struct vm_area_struct *vma; + unsigned long base = mm->mmap_base; + unsigned long addr = addr0; unsigned long largest_hole = mm->cached_hole_size; - int first_time = 1; + unsigned long start_addr; /* don't allow allocations above current base */ if (mm->free_area_cache > base) @@ -322,6 +323,8 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, mm->free_area_cache = base; } try_again: + start_addr = mm->free_area_cache; + /* make sure it can fit in the remaining address space */ if (mm->free_area_cache < len) goto fail; @@ -337,22 +340,14 @@ try_again: if (!vma) return addr; - /* - * new region fits between prev_vma->vm_end and - * vma->vm_start, use it: - */ - prev_vma = vma->vm_prev; - if (addr + len <= vma->vm_start && - (!prev_vma || (addr >= prev_vma->vm_end))) { + if (addr + len <= vma->vm_start) { /* remember the address as a hint for next time */ mm->cached_hole_size = largest_hole; return (mm->free_area_cache = addr); - } else { + } else if (mm->free_area_cache == vma->vm_end) { /* pull free_area_cache down to the first hole */ - if (mm->free_area_cache == vma->vm_end) { - mm->free_area_cache = vma->vm_start; - mm->cached_hole_size = largest_hole; - } + mm->free_area_cache = vma->vm_start; + mm->cached_hole_size = largest_hole; } /* remember the largest hole we saw so far */ @@ -368,10 +363,9 @@ fail: * if hint left us with no space for the requested * mapping then try again: */ - if (first_time) { + if (start_addr != base) { mm->free_area_cache = base; largest_hole = 0; - first_time = 0; goto try_again; } /* diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index 46db56845f1..740b0a35543 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -60,7 +60,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei, eb->nid = nid; if (emu_nid_to_phys[nid] == NUMA_NO_NODE) - emu_nid_to_phys[nid] = pb->nid; + emu_nid_to_phys[nid] = nid; pb->start += size; if (pb->start >= pb->end) { diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index f2220b5bdce..b69b000349f 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -260,10 +260,7 @@ asmlinkage long xtensa_rt_sigreturn(long a0, long a1, long a2, long a3, goto badframe; sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&set); if (restore_sigcontext(regs, frame)) goto badframe; @@ -336,8 +333,8 @@ gen_return_code(unsigned char *codemem) } -static void setup_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs) +static int setup_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) { struct rt_sigframe *frame; int err = 0; @@ -422,12 +419,11 @@ static void setup_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, signal, frame, regs->pc); #endif - return; + return 0; give_sigsegv: - if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; - force_sig(SIGSEGV, current); + force_sigsegv(sig, current); + return -EFAULT; } /* @@ -449,11 +445,8 @@ asmlinkage long xtensa_rt_sigsuspend(sigset_t __user *unewset, return -EFAULT; sigdelsetmask(&newset, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); saveset = current->blocked; - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&newset); regs->areg[2] = -EINTR; while (1) { @@ -536,17 +529,11 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) /* Whee! Actually deliver the signal. */ /* Set up the stack frame */ - setup_frame(signr, &ka, &info, oldset, regs); - - if (ka.sa.sa_flags & SA_ONESHOT) - ka.sa.sa_handler = SIG_DFL; + ret = setup_frame(signr, &ka, &info, oldset, regs); + if (ret) + return ret; - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, &ka.sa.sa_mask); - if (!(ka.sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked, signr); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); |