From 8fe6929cfd43c44834858a53e129ffdc7c166298 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 4 Jun 2014 16:05:36 -0700 Subject: kthread: fix return value of kthread_create() upon SIGKILL. Commit 786235eeba0e ("kthread: make kthread_create() killable") meant for allowing kthread_create() to abort as soon as killed by the OOM-killer. But returning -ENOMEM is wrong if killed by SIGKILL from userspace. Change kthread_create() to return -EINTR upon SIGKILL. Signed-off-by: Tetsuo Handa Cc: Oleg Nesterov Acked-by: David Rientjes Cc: [3.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kthread.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/kthread.c b/kernel/kthread.c index 9a130ec06f7..c2390f41307 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -262,7 +262,7 @@ static void create_kthread(struct kthread_create_info *create) * kthread_stop() has been called). The return value should be zero * or a negative error number; it will be passed to kthread_stop(). * - * Returns a task_struct or ERR_PTR(-ENOMEM). + * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR). */ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, int node, @@ -298,7 +298,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), * that thread. */ if (xchg(&create->done, NULL)) - return ERR_PTR(-ENOMEM); + return ERR_PTR(-EINTR); /* * kthreadd (or new kernel thread) will call complete() * shortly. -- cgit v1.2.3-70-g09d2 From 52383431b37cdbec63944e953ffc2698a7ad9722 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 4 Jun 2014 16:06:39 -0700 Subject: mm: get rid of __GFP_KMEMCG Currently to allocate a page that should be charged to kmemcg (e.g. threadinfo), we pass __GFP_KMEMCG flag to the page allocator. The page allocated is then to be freed by free_memcg_kmem_pages. Apart from looking asymmetrical, this also requires intrusion to the general allocation path. So let's introduce separate functions that will alloc/free pages charged to kmemcg. The new functions are called alloc_kmem_pages and free_kmem_pages. They should be used when the caller actually would like to use kmalloc, but has to fall back to the page allocator for the allocation is large. They only differ from alloc_pages and free_pages in that besides allocating or freeing pages they also charge them to the kmem resource counter of the current memory cgroup. [sfr@canb.auug.org.au: export kmalloc_order() to modules] Signed-off-by: Vladimir Davydov Acked-by: Greg Thelen Cc: Johannes Weiner Acked-by: Michal Hocko Cc: Glauber Costa Cc: Christoph Lameter Cc: Pekka Enberg Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 10 +++++--- include/linux/memcontrol.h | 2 +- include/linux/slab.h | 11 +------- include/linux/thread_info.h | 2 -- include/trace/events/gfpflags.h | 1 - kernel/fork.c | 6 ++--- mm/memcontrol.c | 11 ++++---- mm/page_alloc.c | 56 +++++++++++++++++++++++++---------------- mm/slab_common.c | 13 ++++++++++ mm/slub.c | 6 ++--- 10 files changed, 68 insertions(+), 50 deletions(-) (limited to 'kernel') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 39b81dc7d01..d382db71e30 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -31,7 +31,6 @@ struct vm_area_struct; #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u #define ___GFP_RECLAIMABLE 0x80000u -#define ___GFP_KMEMCG 0x100000u #define ___GFP_NOTRACK 0x200000u #define ___GFP_NO_KSWAPD 0x400000u #define ___GFP_OTHER_NODE 0x800000u @@ -91,7 +90,6 @@ struct vm_area_struct; #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ -#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */ #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ /* @@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ alloc_pages_vma(gfp_mask, 0, vma, addr, node) +extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order); +extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, + unsigned int order); + extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); @@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order); extern void free_hot_cold_page(struct page *page, int cold); extern void free_hot_cold_page_list(struct list_head *list, int cold); -extern void __free_memcg_kmem_pages(struct page *page, unsigned int order); -extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order); +extern void __free_kmem_pages(struct page *page, unsigned int order); +extern void free_kmem_pages(unsigned long addr, unsigned int order); #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr), 0) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 96e5d2573eb..5155d09e749 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -537,7 +537,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) * res_counter_charge_nofail, but we hope those allocations are rare, * and won't be worth the trouble. */ - if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL)) + if (gfp & __GFP_NOFAIL) return true; if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) return true; diff --git a/include/linux/slab.h b/include/linux/slab.h index 307bfbe6238..a6aab2c0dfc 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -369,16 +369,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, #include #endif -static __always_inline void * -kmalloc_order(size_t size, gfp_t flags, unsigned int order) -{ - void *ret; - - flags |= (__GFP_COMP | __GFP_KMEMCG); - ret = (void *) __get_free_pages(flags, order); - kmemleak_alloc(ret, size, 1, flags); - return ret; -} +extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order); #ifdef CONFIG_TRACING extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order); diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index cb0cec94fda..ff307b548ed 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm); # define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK) #endif -#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG) - /* * flag set/clear/test wrappers * - pass TIF_xxxx constants to these functions diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h index 1eddbf1557f..d6fd8e5b14b 100644 --- a/include/trace/events/gfpflags.h +++ b/include/trace/events/gfpflags.h @@ -34,7 +34,6 @@ {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ - {(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \ {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \ diff --git a/kernel/fork.c b/kernel/fork.c index 54a8d26f612..59e3dcc5b8f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -150,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti) static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node) { - struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED, - THREAD_SIZE_ORDER); + struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP, + THREAD_SIZE_ORDER); return page ? page_address(page) : NULL; } static inline void free_thread_info(struct thread_info *ti) { - free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER); + free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER); } # else static struct kmem_cache *thread_info_cache; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 56a768b3d5a..7bab1de50f4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3540,11 +3540,12 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order) /* * Disabling accounting is only relevant for some specific memcg * internal allocations. Therefore we would initially not have such - * check here, since direct calls to the page allocator that are marked - * with GFP_KMEMCG only happen outside memcg core. We are mostly - * concerned with cache allocations, and by having this test at - * memcg_kmem_get_cache, we are already able to relay the allocation to - * the root cache and bypass the memcg cache altogether. + * check here, since direct calls to the page allocator that are + * accounted to kmemcg (alloc_kmem_pages and friends) only happen + * outside memcg core. We are mostly concerned with cache allocations, + * and by having this test at memcg_kmem_get_cache, we are already able + * to relay the allocation to the root cache and bypass the memcg cache + * altogether. * * There is one exception, though: the SLUB allocator does not create * large order caches, but rather service large kmallocs directly from diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5dba2933c9c..7cfdcd808f5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2697,7 +2697,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int migratetype = allocflags_to_migratetype(gfp_mask); unsigned int cpuset_mems_cookie; int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR; - struct mem_cgroup *memcg = NULL; gfp_mask &= gfp_allowed_mask; @@ -2716,13 +2715,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (unlikely(!zonelist->_zonerefs->zone)) return NULL; - /* - * Will only have any effect when __GFP_KMEMCG is set. This is - * verified in the (always inline) callee - */ - if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order)) - return NULL; - retry_cpuset: cpuset_mems_cookie = read_mems_allowed_begin(); @@ -2782,8 +2774,6 @@ out: if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; - memcg_kmem_commit_charge(page, memcg, order); - return page; } EXPORT_SYMBOL(__alloc_pages_nodemask); @@ -2837,27 +2827,51 @@ void free_pages(unsigned long addr, unsigned int order) EXPORT_SYMBOL(free_pages); /* - * __free_memcg_kmem_pages and free_memcg_kmem_pages will free - * pages allocated with __GFP_KMEMCG. + * alloc_kmem_pages charges newly allocated pages to the kmem resource counter + * of the current memory cgroup. * - * Those pages are accounted to a particular memcg, embedded in the - * corresponding page_cgroup. To avoid adding a hit in the allocator to search - * for that information only to find out that it is NULL for users who have no - * interest in that whatsoever, we provide these functions. - * - * The caller knows better which flags it relies on. + * It should be used when the caller would like to use kmalloc, but since the + * allocation is large, it has to fall back to the page allocator. + */ +struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order) +{ + struct page *page; + struct mem_cgroup *memcg = NULL; + + if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order)) + return NULL; + page = alloc_pages(gfp_mask, order); + memcg_kmem_commit_charge(page, memcg, order); + return page; +} + +struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order) +{ + struct page *page; + struct mem_cgroup *memcg = NULL; + + if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order)) + return NULL; + page = alloc_pages_node(nid, gfp_mask, order); + memcg_kmem_commit_charge(page, memcg, order); + return page; +} + +/* + * __free_kmem_pages and free_kmem_pages will free pages allocated with + * alloc_kmem_pages. */ -void __free_memcg_kmem_pages(struct page *page, unsigned int order) +void __free_kmem_pages(struct page *page, unsigned int order) { memcg_kmem_uncharge_pages(page, order); __free_pages(page, order); } -void free_memcg_kmem_pages(unsigned long addr, unsigned int order) +void free_kmem_pages(unsigned long addr, unsigned int order) { if (addr != 0) { VM_BUG_ON(!virt_addr_valid((void *)addr)); - __free_memcg_kmem_pages(virt_to_page((void *)addr), order); + __free_kmem_pages(virt_to_page((void *)addr), order); } } diff --git a/mm/slab_common.c b/mm/slab_common.c index 06f0c612563..1950c8f4d1a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -582,6 +582,19 @@ void __init create_kmalloc_caches(unsigned long flags) } #endif /* !CONFIG_SLOB */ +void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) +{ + void *ret; + struct page *page; + + flags |= __GFP_COMP; + page = alloc_kmem_pages(flags, order); + ret = page ? page_address(page) : NULL; + kmemleak_alloc(ret, size, 1, flags); + return ret; +} +EXPORT_SYMBOL(kmalloc_order); + #ifdef CONFIG_TRACING void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) { diff --git a/mm/slub.c b/mm/slub.c index fc9831851be..ddb60795f37 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3311,8 +3311,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) struct page *page; void *ptr = NULL; - flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG; - page = alloc_pages_node(node, flags, get_order(size)); + flags |= __GFP_COMP | __GFP_NOTRACK; + page = alloc_kmem_pages_node(node, flags, get_order(size)); if (page) ptr = page_address(page); @@ -3381,7 +3381,7 @@ void kfree(const void *x) if (unlikely(!PageSlab(page))) { BUG_ON(!PageCompound(page)); kfree_hook(x); - __free_memcg_kmem_pages(page, compound_order(page)); + __free_kmem_pages(page, compound_order(page)); return; } slab_free(page->slab_cache, page, object, _RET_IP_); -- cgit v1.2.3-70-g09d2 From f98bafa06a28fdfdd5c49f820f4d6560f636fc46 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 4 Jun 2014 16:07:34 -0700 Subject: memcg: kill CONFIG_MM_OWNER CONFIG_MM_OWNER makes no sense. It is not user-selectable, it is only selected by CONFIG_MEMCG automatically. So we can kill this option in init/Kconfig and do s/CONFIG_MM_OWNER/CONFIG_MEMCG/ globally. Signed-off-by: Oleg Nesterov Acked-by: Michal Hocko Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 2 +- include/linux/sched.h | 4 ++-- init/Kconfig | 7 ------- kernel/exit.c | 4 ++-- kernel/fork.c | 4 ++-- 5 files changed, 7 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8967e20cbe5..de1627232af 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -406,7 +406,7 @@ struct mm_struct { spinlock_t ioctx_lock; struct kioctx_table __rcu *ioctx_table; #endif -#ifdef CONFIG_MM_OWNER +#ifdef CONFIG_MEMCG /* * "owner" points to a task that is regarded as the canonical * user/owner of this mm. All of the following must be true in diff --git a/include/linux/sched.h b/include/linux/sched.h index 70f67e4e615..2f2dd7d932a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2967,7 +2967,7 @@ static inline void inc_syscw(struct task_struct *tsk) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif -#ifdef CONFIG_MM_OWNER +#ifdef CONFIG_MEMCG extern void mm_update_next_owner(struct mm_struct *mm); extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); #else @@ -2978,7 +2978,7 @@ static inline void mm_update_next_owner(struct mm_struct *mm) static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) { } -#endif /* CONFIG_MM_OWNER */ +#endif /* CONFIG_MEMCG */ static inline unsigned long task_rlimit(const struct task_struct *tsk, unsigned int limit) diff --git a/init/Kconfig b/init/Kconfig index 4a1822a1a68..0a2f09a80e9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -933,7 +933,6 @@ config RESOURCE_COUNTERS config MEMCG bool "Memory Resource Controller for Control Groups" depends on RESOURCE_COUNTERS - select MM_OWNER select EVENTFD help Provides a memory resource controller that manages both anonymous @@ -951,9 +950,6 @@ config MEMCG disable memory resource controller and you can avoid overheads. (and lose benefits of memory resource controller) - This config option also selects MM_OWNER config option, which - could in turn add some fork/exit overhead. - config MEMCG_SWAP bool "Memory Resource Controller Swap Extension" depends on MEMCG && SWAP @@ -1179,9 +1175,6 @@ config SCHED_AUTOGROUP desktop applications. Task group autogeneration is currently based upon task session. -config MM_OWNER - bool - config SYSFS_DEPRECATED bool "Enable deprecated sysfs features to support old userspace tools" depends on SYSFS diff --git a/kernel/exit.c b/kernel/exit.c index 6ed6a1d552b..da1b838de8a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -352,7 +352,7 @@ int disallow_signal(int sig) EXPORT_SYMBOL(disallow_signal); -#ifdef CONFIG_MM_OWNER +#ifdef CONFIG_MEMCG /* * A task is exiting. If it owned this mm, find a new owner for the mm. */ @@ -434,7 +434,7 @@ assign_new_owner: task_unlock(c); put_task_struct(c); } -#endif /* CONFIG_MM_OWNER */ +#endif /* CONFIG_MEMCG */ /* * Turn us into a lazy TLB process if we diff --git a/kernel/fork.c b/kernel/fork.c index 59e3dcc5b8f..0d53eb0dfb6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1099,12 +1099,12 @@ static void rt_mutex_init_task(struct task_struct *p) #endif } -#ifdef CONFIG_MM_OWNER +#ifdef CONFIG_MEMCG void mm_init_owner(struct mm_struct *mm, struct task_struct *p) { mm->owner = p; } -#endif /* CONFIG_MM_OWNER */ +#endif /* CONFIG_MEMCG */ /* * Initialize POSIX timer handling for a single task. -- cgit v1.2.3-70-g09d2 From f87fb599ae4d2a152a93f9821b94f3158146d097 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 4 Jun 2014 16:07:52 -0700 Subject: memcg: mm_update_next_owner() should skip kthreads "Search through everything else" in mm_update_next_owner() can hit a kthread which adopted this "mm" via use_mm(), it should not be used as mm->owner. Add the PF_KTHREAD check. While at it, change this code to use for_each_process_thread() instead of deprecated do_each_thread/while_each_thread. Signed-off-by: Oleg Nesterov Reviewed-by: Michal Hocko Cc: Balbir Singh Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Michal Hocko Cc: Peter Chiang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index da1b838de8a..5ac3c19c245 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -395,14 +395,12 @@ retry: } /* - * Search through everything else. We should not get - * here often + * Search through everything else, we should not get here often. */ - do_each_thread(g, c) { - if (c->mm == mm) + for_each_process_thread(g, c) { + if (!(c->flags & PF_KTHREAD) && c->mm == mm) goto assign_new_owner; - } while_each_thread(g, c); - + } read_unlock(&tasklist_lock); /* * We found no owner yet mm_users > 1: this implies that we are -- cgit v1.2.3-70-g09d2 From 39af1765f1255b2bbadc3064e16270781abf24a1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 4 Jun 2014 16:07:54 -0700 Subject: memcg: optimize the "Search everything else" loop in mm_update_next_owner() for_each_process_thread() is sub-optimal. All threads share the same ->mm, we can swicth to the next process once we found a thread with ->mm != NULL and ->mm != mm. Signed-off-by: Oleg Nesterov Reviewed-by: Michal Hocko Cc: Balbir Singh Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Michal Hocko Cc: Peter Chiang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/exit.c b/kernel/exit.c index 5ac3c19c245..750c2e59461 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -397,9 +397,15 @@ retry: /* * Search through everything else, we should not get here often. */ - for_each_process_thread(g, c) { - if (!(c->flags & PF_KTHREAD) && c->mm == mm) - goto assign_new_owner; + for_each_process(g) { + if (g->flags & PF_KTHREAD) + continue; + for_each_thread(g, c) { + if (c->mm == mm) + goto assign_new_owner; + if (c->mm) + break; + } } read_unlock(&tasklist_lock); /* -- cgit v1.2.3-70-g09d2 From 664eeddeef6539247691197c1ac124d4aa872ab6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 4 Jun 2014 16:10:08 -0700 Subject: mm: page_alloc: use jump labels to avoid checking number_of_cpusets If cpusets are not in use then we still check a global variable on every page allocation. Use jump labels to avoid the overhead. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Cc: Johannes Weiner Cc: Vlastimil Babka Cc: Jan Kara Cc: Michal Hocko Cc: Hugh Dickins Cc: Dave Hansen Cc: Theodore Ts'o Cc: "Paul E. McKenney" Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 29 ++++++++++++++++++++++++++--- kernel/cpuset.c | 14 ++++---------- mm/page_alloc.c | 3 ++- 3 files changed, 32 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index b19d3dc2e65..ade2390ffe9 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -12,10 +12,31 @@ #include #include #include +#include #ifdef CONFIG_CPUSETS -extern int number_of_cpusets; /* How many cpusets are defined in system? */ +extern struct static_key cpusets_enabled_key; +static inline bool cpusets_enabled(void) +{ + return static_key_false(&cpusets_enabled_key); +} + +static inline int nr_cpusets(void) +{ + /* jump label reference count + the top-level cpuset */ + return static_key_count(&cpusets_enabled_key) + 1; +} + +static inline void cpuset_inc(void) +{ + static_key_slow_inc(&cpusets_enabled_key); +} + +static inline void cpuset_dec(void) +{ + static_key_slow_dec(&cpusets_enabled_key); +} extern int cpuset_init(void); extern void cpuset_init_smp(void); @@ -32,13 +53,13 @@ extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask); static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) { - return number_of_cpusets <= 1 || + return nr_cpusets() <= 1 || __cpuset_node_allowed_softwall(node, gfp_mask); } static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) { - return number_of_cpusets <= 1 || + return nr_cpusets() <= 1 || __cpuset_node_allowed_hardwall(node, gfp_mask); } @@ -124,6 +145,8 @@ static inline void set_mems_allowed(nodemask_t nodemask) #else /* !CONFIG_CPUSETS */ +static inline bool cpusets_enabled(void) { return false; } + static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3d54c418bd0..13001784389 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -61,12 +61,7 @@ #include #include -/* - * Tracks how many cpusets are currently defined in system. - * When there is only one cpuset (the root cpuset) we can - * short circuit some hooks. - */ -int number_of_cpusets __read_mostly; +struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE; /* See "Frequency meter" comments, below. */ @@ -611,7 +606,7 @@ static int generate_sched_domains(cpumask_var_t **domains, goto done; } - csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL); + csa = kmalloc(nr_cpusets() * sizeof(cp), GFP_KERNEL); if (!csa) goto done; csn = 0; @@ -1888,7 +1883,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) if (is_spread_slab(parent)) set_bit(CS_SPREAD_SLAB, &cs->flags); - number_of_cpusets++; + cpuset_inc(); if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; @@ -1939,7 +1934,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) if (is_sched_load_balance(cs)) update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); - number_of_cpusets--; + cpuset_dec(); clear_bit(CS_ONLINE, &cs->flags); mutex_unlock(&cpuset_mutex); @@ -1992,7 +1987,6 @@ int __init cpuset_init(void) if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL)) BUG(); - number_of_cpusets = 1; return 0; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b4381eaee71..a2955e10171 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1921,7 +1921,8 @@ zonelist_scan: if (IS_ENABLED(CONFIG_NUMA) && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes)) continue; - if ((alloc_flags & ALLOC_CPUSET) && + if (cpusets_enabled() && + (alloc_flags & ALLOC_CPUSET) && !cpuset_zone_allowed_softwall(zone, gfp_mask)) continue; BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); -- cgit v1.2.3-70-g09d2 From f6187769dae48234f3877df3c4d99294cc2254fa Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:11:12 -0700 Subject: sys_sgetmask/sys_ssetmask: add CONFIG_SGETMASK_SYSCALL sys_sgetmask and sys_ssetmask are obsolete system calls no longer supported in libc. This patch replaces architecture related __ARCH_WANT_SYS_SGETMAX by expert mode configuration.That option is enabled by default for those architectures. Signed-off-by: Fabian Frederick Cc: Steven Miao Cc: Mikael Starvik Cc: Jesper Nilsson Cc: David Howells Cc: Geert Uytterhoeven Cc: Michal Simek Cc: Ralf Baechle Cc: Koichi Yasutake Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "David S. Miller" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Greg Ungerer Cc: Heiko Carstens Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/blackfin/include/asm/unistd.h | 1 - arch/cris/include/asm/unistd.h | 1 - arch/frv/include/asm/unistd.h | 1 - arch/m68k/include/asm/unistd.h | 1 - arch/microblaze/include/asm/unistd.h | 1 - arch/mips/include/asm/unistd.h | 1 - arch/mn10300/include/asm/unistd.h | 1 - arch/parisc/include/asm/unistd.h | 1 - arch/powerpc/include/asm/unistd.h | 1 - arch/sh/include/asm/unistd.h | 1 - arch/sparc/include/asm/unistd.h | 1 - arch/x86/include/asm/unistd.h | 1 - init/Kconfig | 10 ++++++++++ kernel/signal.c | 4 ++-- kernel/sys_ni.c | 2 ++ 15 files changed, 14 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h index c35414bdf7b..c8c8ff9eff6 100644 --- a/arch/blackfin/include/asm/unistd.h +++ b/arch/blackfin/include/asm/unistd.h @@ -12,7 +12,6 @@ #define __ARCH_WANT_SYS_ALARM #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_PAUSE -#define __ARCH_WANT_SYS_SGETMASK #define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_SYS_FADVISE64 #define __ARCH_WANT_SYS_GETPGRP diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h index 5cc7d1991e4..0f40fed1ba2 100644 --- a/arch/cris/include/asm/unistd.h +++ b/arch/cris/include/asm/unistd.h @@ -15,7 +15,6 @@ #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_IPC #define __ARCH_WANT_SYS_PAUSE -#define __ARCH_WANT_SYS_SGETMASK #define __ARCH_WANT_SYS_SIGNAL #define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_SYS_UTIME diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h index 70ec7293dce..17b5df8fc28 100644 --- a/arch/frv/include/asm/unistd.h +++ b/arch/frv/include/asm/unistd.h @@ -13,7 +13,6 @@ /* #define __ARCH_WANT_SYS_GETHOSTNAME */ #define __ARCH_WANT_SYS_IPC #define __ARCH_WANT_SYS_PAUSE -/* #define __ARCH_WANT_SYS_SGETMASK */ /* #define __ARCH_WANT_SYS_SIGNAL */ #define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_SYS_UTIME diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 33afa56ad47..1fcdd344c7a 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -13,7 +13,6 @@ #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_IPC #define __ARCH_WANT_SYS_PAUSE -#define __ARCH_WANT_SYS_SGETMASK #define __ARCH_WANT_SYS_SIGNAL #define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_SYS_UTIME diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index b14232b6878..fd56a8f6648 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -19,7 +19,6 @@ #define __ARCH_WANT_SYS_ALARM #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_PAUSE -#define __ARCH_WANT_SYS_SGETMASK #define __ARCH_WANT_SYS_SIGNAL #define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_SYS_UTIME diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index 413d6c612be..e55813029d5 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -29,7 +29,6 @@ #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_IPC #define __ARCH_WANT_SYS_PAUSE -#define __ARCH_WANT_SYS_SGETMASK #define __ARCH_WANT_SYS_UTIME #define __ARCH_WANT_SYS_WAITPID #define __ARCH_WANT_SYS_SOCKETCALL diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h index 9d4e2d1ef90..0522468f488 100644 --- a/arch/mn10300/include/asm/unistd.h +++ b/arch/mn10300/include/asm/unistd.h @@ -26,7 +26,6 @@ #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_IPC #define __ARCH_WANT_SYS_PAUSE -#define __ARCH_WANT_SYS_SGETMASK #define __ARCH_WANT_SYS_SIGNAL #define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_SYS_UTIME diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index 74d835820ee..5f4c68daa26 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -145,7 +145,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ #define __ARCH_WANT_SYS_ALARM #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_PAUSE -#define __ARCH_WANT_SYS_SGETMASK #define __ARCH_WANT_SYS_SIGNAL #define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_COMPAT_SYS_TIME diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 9b892bbd9d8..5ce5552ab9f 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -29,7 +29,6 @@ #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_IPC #define __ARCH_WANT_SYS_PAUSE -#define __ARCH_WANT_SYS_SGETMASK #define __ARCH_WANT_SYS_SIGNAL #define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_SYS_UTIME diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h index e77816c4b9b..126fe8340b2 100644 --- a/arch/sh/include/asm/unistd.h +++ b/arch/sh/include/asm/unistd.h @@ -11,7 +11,6 @@ # define __ARCH_WANT_SYS_GETHOSTNAME # define __ARCH_WANT_SYS_IPC # define __ARCH_WANT_SYS_PAUSE -# define __ARCH_WANT_SYS_SGETMASK # define __ARCH_WANT_SYS_SIGNAL # define __ARCH_WANT_SYS_TIME # define __ARCH_WANT_SYS_UTIME diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index dfa53fdd5cb..0aac1e8f296 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -25,7 +25,6 @@ #define __ARCH_WANT_SYS_ALARM #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_PAUSE -#define __ARCH_WANT_SYS_SGETMASK #define __ARCH_WANT_SYS_SIGNAL #define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_SYS_UTIME diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 3f556c6a015..2b19caa4081 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -41,7 +41,6 @@ # define __ARCH_WANT_SYS_OLD_GETRLIMIT # define __ARCH_WANT_SYS_OLD_UNAME # define __ARCH_WANT_SYS_PAUSE -# define __ARCH_WANT_SYS_SGETMASK # define __ARCH_WANT_SYS_SIGNAL # define __ARCH_WANT_SYS_SIGPENDING # define __ARCH_WANT_SYS_SIGPROCMASK diff --git a/init/Kconfig b/init/Kconfig index ce034ad4a16..9d76b99af1b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1313,6 +1313,16 @@ config UID16 help This enables the legacy 16-bit UID syscall wrappers. +config SGETMASK_SYSCALL + bool "sgetmask/ssetmask syscalls support" if EXPERT + def_bool PARISC || MN10300 || BLACKFIN || M68K || PPC || MIPS || X86 || SPARC || CRIS || MICROBLAZE || SUPERH + ---help--- + sys_sgetmask and sys_ssetmask are obsolete system calls + no longer supported in libc but still enabled by default in some + architectures. + + If unsure, leave the default option here. + config SYSFS_SYSCALL bool "Sysfs syscall support" if EXPERT default y diff --git a/kernel/signal.c b/kernel/signal.c index 6ea13c09ae5..6e600aaa2af 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3496,7 +3496,7 @@ COMPAT_SYSCALL_DEFINE3(sigaction, int, sig, } #endif -#ifdef __ARCH_WANT_SYS_SGETMASK +#ifdef CONFIG_SGETMASK_SYSCALL /* * For backwards compatibility. Functionality superseded by sigprocmask. @@ -3517,7 +3517,7 @@ SYSCALL_DEFINE1(ssetmask, int, newmask) return old; } -#endif /* __ARCH_WANT_SGETMASK */ +#endif /* CONFIG_SGETMASK_SYSCALL */ #ifdef __ARCH_WANT_SYS_SIGNAL /* diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index bc8d1b74a6b..36441b51b5d 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -135,6 +135,8 @@ cond_syscall(sys_setresgid16); cond_syscall(sys_setresuid16); cond_syscall(sys_setreuid16); cond_syscall(sys_setuid16); +cond_syscall(sys_sgetmask); +cond_syscall(sys_ssetmask); cond_syscall(sys_vm86old); cond_syscall(sys_vm86); cond_syscall(sys_ipc); -- cgit v1.2.3-70-g09d2 From 84117da5b79ffb4077bb05d64c86dfa4d746115c Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:11:17 -0700 Subject: kernel/cpu.c: convert printk to pr_foo() no level printk converted to pr_warn (if err) no level printk converted to pr_info (disabling non-boot cpus) Other printk converted to respective level. Signed-off-by: Fabian Frederick Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpu.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/cpu.c b/kernel/cpu.c index 247979a1b81..acf791c55b7 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -283,8 +283,7 @@ static inline void check_for_tasks(int cpu) task_cputime(p, &utime, &stime); if (task_cpu(p) == cpu && p->state == TASK_RUNNING && (utime || stime)) - printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d " - "(state = %ld, flags = %x)\n", + pr_warn("Task %s (pid = %d) is on cpu %d (state = %ld, flags = %x)\n", p->comm, task_pid_nr(p), cpu, p->state, p->flags); } @@ -336,8 +335,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) if (err) { nr_calls--; __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL); - printk("%s: attempt to take down CPU %u failed\n", - __func__, cpu); + pr_warn("%s: attempt to take down CPU %u failed\n", + __func__, cpu); goto out_release; } @@ -444,8 +443,8 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen) ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); if (ret) { nr_calls--; - printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n", - __func__, cpu); + pr_warn("%s: attempt to bring up CPU %u failed\n", + __func__, cpu); goto out_notify; } @@ -475,11 +474,10 @@ int cpu_up(unsigned int cpu) int err = 0; if (!cpu_possible(cpu)) { - printk(KERN_ERR "can't online cpu %d because it is not " - "configured as may-hotadd at boot time\n", cpu); + pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n", + cpu); #if defined(CONFIG_IA64) - printk(KERN_ERR "please check additional_cpus= boot " - "parameter\n"); + pr_err("please check additional_cpus= boot parameter\n"); #endif return -EINVAL; } @@ -518,7 +516,7 @@ int disable_nonboot_cpus(void) */ cpumask_clear(frozen_cpus); - printk("Disabling non-boot CPUs ...\n"); + pr_info("Disabling non-boot CPUs ...\n"); for_each_online_cpu(cpu) { if (cpu == first_cpu) continue; @@ -526,8 +524,7 @@ int disable_nonboot_cpus(void) if (!error) cpumask_set_cpu(cpu, frozen_cpus); else { - printk(KERN_ERR "Error taking CPU%d down: %d\n", - cpu, error); + pr_err("Error taking CPU%d down: %d\n", cpu, error); break; } } @@ -537,7 +534,7 @@ int disable_nonboot_cpus(void) /* Make sure the CPUs won't be enabled by someone else */ cpu_hotplug_disabled = 1; } else { - printk(KERN_ERR "Non-boot CPUs are not disabled\n"); + pr_err("Non-boot CPUs are not disabled\n"); } cpu_maps_update_done(); return error; @@ -561,17 +558,17 @@ void __ref enable_nonboot_cpus(void) if (cpumask_empty(frozen_cpus)) goto out; - printk(KERN_INFO "Enabling non-boot CPUs ...\n"); + pr_info("Enabling non-boot CPUs ...\n"); arch_enable_nonboot_cpus_begin(); for_each_cpu(cpu, frozen_cpus) { error = _cpu_up(cpu, 1); if (!error) { - printk(KERN_INFO "CPU%d is up\n", cpu); + pr_info("CPU%d is up\n", cpu); continue; } - printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); + pr_warn("Error taking CPU%d up: %d\n", cpu, error); } arch_enable_nonboot_cpus_end(); -- cgit v1.2.3-70-g09d2 From 462b29b8564c489e0aa3f5a3a505fd2776af5e55 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:11:18 -0700 Subject: kernel/backtracetest.c: replace no level printk by pr_info() Signed-off-by: Fabian Frederick Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/backtracetest.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c index a5e026bc45c..1323360d90e 100644 --- a/kernel/backtracetest.c +++ b/kernel/backtracetest.c @@ -19,8 +19,8 @@ static void backtrace_test_normal(void) { - printk("Testing a backtrace from process context.\n"); - printk("The following trace is a kernel self test and not a bug!\n"); + pr_info("Testing a backtrace from process context.\n"); + pr_info("The following trace is a kernel self test and not a bug!\n"); dump_stack(); } @@ -37,8 +37,8 @@ static DECLARE_TASKLET(backtrace_tasklet, &backtrace_test_irq_callback, 0); static void backtrace_test_irq(void) { - printk("Testing a backtrace from irq context.\n"); - printk("The following trace is a kernel self test and not a bug!\n"); + pr_info("Testing a backtrace from irq context.\n"); + pr_info("The following trace is a kernel self test and not a bug!\n"); init_completion(&backtrace_work); tasklet_schedule(&backtrace_tasklet); @@ -51,8 +51,8 @@ static void backtrace_test_saved(void) struct stack_trace trace; unsigned long entries[8]; - printk("Testing a saved backtrace.\n"); - printk("The following trace is a kernel self test and not a bug!\n"); + pr_info("Testing a saved backtrace.\n"); + pr_info("The following trace is a kernel self test and not a bug!\n"); trace.nr_entries = 0; trace.max_entries = ARRAY_SIZE(entries); @@ -65,19 +65,19 @@ static void backtrace_test_saved(void) #else static void backtrace_test_saved(void) { - printk("Saved backtrace test skipped.\n"); + pr_info("Saved backtrace test skipped.\n"); } #endif static int backtrace_regression_test(void) { - printk("====[ backtrace testing ]===========\n"); + pr_info("====[ backtrace testing ]===========\n"); backtrace_test_normal(); backtrace_test_irq(); backtrace_test_saved(); - printk("====[ end of backtrace testing ]====\n"); + pr_info("====[ end of backtrace testing ]====\n"); return 0; } -- cgit v1.2.3-70-g09d2 From a6c8c6902c53e620e607e83f520e9ae424e2a424 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:11:19 -0700 Subject: kernel/capability.c: code clean-up - EXPORT_SYMBOL - typo: unexpectidly->unexpectedly - function prototype over 80 characters Signed-off-by: Fabian Frederick Cc: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/capability.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/capability.c b/kernel/capability.c index a8d63df0c32..84b2bbf443e 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -24,7 +24,6 @@ */ const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; - EXPORT_SYMBOL(__cap_empty_set); int file_caps_enabled = 1; @@ -189,7 +188,7 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) * * An alternative would be to return an error here * (-ERANGE), but that causes legacy applications to - * unexpectidly fail; the capget/modify/capset aborts + * unexpectedly fail; the capget/modify/capset aborts * before modification is attempted and the application * fails. */ @@ -395,7 +394,8 @@ EXPORT_SYMBOL(ns_capable); * This does not set PF_SUPERPRIV because the caller may not * actually be privileged. */ -bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap) +bool file_ns_capable(const struct file *file, struct user_namespace *ns, + int cap) { if (WARN_ON_ONCE(!cap_valid(cap))) return false; -- cgit v1.2.3-70-g09d2 From b9e5db6d2bbe4416cd1c30c2d1891ef39d6bd0b7 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:11:20 -0700 Subject: kernel/exec_domain.c: code clean-up Fix checkpatch warnings about EXPORT_SYMBOL and return() Signed-off-by: Fabian Frederick Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exec_domain.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index 0dbeae37422..83d4382f569 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -37,7 +37,7 @@ static unsigned long ident_map[32] = { struct exec_domain default_exec_domain = { .name = "Linux", /* name */ .handler = default_handler, /* lcall7 causes a seg fault. */ - .pers_low = 0, /* PER_LINUX personality. */ + .pers_low = 0, /* PER_LINUX personality. */ .pers_high = 0, /* PER_LINUX personality. */ .signal_map = ident_map, /* Identity map signals. */ .signal_invmap = ident_map, /* - both ways. */ @@ -83,7 +83,7 @@ lookup_exec_domain(unsigned int personality) ep = &default_exec_domain; out: read_unlock(&exec_domains_lock); - return (ep); + return ep; } int @@ -110,8 +110,9 @@ register_exec_domain(struct exec_domain *ep) out: write_unlock(&exec_domains_lock); - return (err); + return err; } +EXPORT_SYMBOL(register_exec_domain); int unregister_exec_domain(struct exec_domain *ep) @@ -133,6 +134,7 @@ unregister: write_unlock(&exec_domains_lock); return 0; } +EXPORT_SYMBOL(unregister_exec_domain); int __set_personality(unsigned int personality) { @@ -144,6 +146,7 @@ int __set_personality(unsigned int personality) return 0; } +EXPORT_SYMBOL(__set_personality); #ifdef CONFIG_PROC_FS static int execdomains_proc_show(struct seq_file *m, void *v) @@ -188,8 +191,3 @@ SYSCALL_DEFINE1(personality, unsigned int, personality) return old; } - - -EXPORT_SYMBOL(register_exec_domain); -EXPORT_SYMBOL(unregister_exec_domain); -EXPORT_SYMBOL(__set_personality); -- cgit v1.2.3-70-g09d2 From eaa1809b900c460a020bff1f4030f4f6a237b2b2 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:11:21 -0700 Subject: kernel/latencytop.c: convert seq_printf to seq_puts This patch also fixes one function declaration over 80 characters. Signed-off-by: Fabian Frederick Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/latencytop.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/latencytop.c b/kernel/latencytop.c index a462b317f9a..a02812743a7 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -88,7 +88,8 @@ static void clear_global_latency_tracing(void) } static void __sched -account_global_scheduler_latency(struct task_struct *tsk, struct latency_record *lat) +account_global_scheduler_latency(struct task_struct *tsk, + struct latency_record *lat) { int firstnonnull = MAXLR + 1; int i; @@ -255,7 +256,7 @@ static int lstats_show(struct seq_file *m, void *v) break; seq_printf(m, " %ps", (void *)bt); } - seq_printf(m, "\n"); + seq_puts(m, "\n"); } } return 0; -- cgit v1.2.3-70-g09d2 From cf25004069d3ccd6aae607d8175bdff67c1dd319 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:11:22 -0700 Subject: kernel/stop_machine.c: kernel-doc warning fix Signed-off-by: Fabian Frederick Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/stop_machine.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 01fbae5b97b..695f0c6cd16 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -307,6 +307,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * * @cpu: cpu to stop * @fn: function to execute * @arg: argument to @fn + * @work_buf: pointer to cpu_stop_work structure * * Similar to stop_one_cpu() but doesn't wait for completion. The * caller is responsible for ensuring @work_buf is currently unused -- cgit v1.2.3-70-g09d2 From cac92ba74f19fd58a28976f753f9327f27cf1669 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:11:23 -0700 Subject: kernel/tracepoint.c: kernel-doc fixes Signed-off-by: Fabian Frederick Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/tracepoint.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 6620e5837ce..33cbd8c203f 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -239,6 +239,7 @@ static int tracepoint_remove_func(struct tracepoint *tp, * tracepoint_probe_register - Connect a probe to a tracepoint * @tp: tracepoint * @probe: probe handler + * @data: tracepoint data * * Returns 0 if ok, error value on error. * Note: if @tp is within a module, the caller is responsible for @@ -264,6 +265,7 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register); * tracepoint_probe_unregister - Disconnect a probe from a tracepoint * @tp: tracepoint * @probe: probe function pointer + * @data: tracepoint data * * Returns 0 if ok, error value on error. */ -- cgit v1.2.3-70-g09d2 From 6c5a53c67057bddf7f8e26c93a8e045215f61539 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:11:24 -0700 Subject: kernel/res_counter.c: replace simple_strtoull by kstrtoull [akpm@linux-foundation.org: don't overwrite kstrtoull()'s errno] Signed-off-by: Fabian Frederick Cc: Michal Hocko Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/res_counter.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/res_counter.c b/kernel/res_counter.c index 51dbac6a363..e791130f85a 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -186,8 +186,11 @@ int res_counter_memparse_write_strategy(const char *buf, /* return RES_COUNTER_MAX(unlimited) if "-1" is specified */ if (*buf == '-') { - res = simple_strtoull(buf + 1, &end, 10); - if (res != 1 || *end != '\0') + int rc = kstrtoull(buf + 1, 10, &res); + + if (rc) + return rc; + if (res != 1) return -EINVAL; *resp = RES_COUNTER_MAX; return 0; -- cgit v1.2.3-70-g09d2 From 616feab753972b9751308f3cd2a68fc57eae8edb Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:11:25 -0700 Subject: kernel/reboot.c: convert simple_strtoul to kstrtoint Replace obsolete function. kstrtoint is used as reboot_cpu is an integer. Signed-off-by: Fabian Frederick Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/reboot.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/reboot.c b/kernel/reboot.c index 662c83fc16b..a3a9e240fcd 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -388,15 +388,22 @@ static int __init reboot_setup(char *str) break; case 's': - if (isdigit(*(str+1))) - reboot_cpu = simple_strtoul(str+1, NULL, 0); - else if (str[1] == 'm' && str[2] == 'p' && - isdigit(*(str+3))) - reboot_cpu = simple_strtoul(str+3, NULL, 0); - else + { + int rc; + + if (isdigit(*(str+1))) { + rc = kstrtoint(str+1, 0, &reboot_cpu); + if (rc) + return rc; + } else if (str[1] == 'm' && str[2] == 'p' && + isdigit(*(str+3))) { + rc = kstrtoint(str+3, 0, &reboot_cpu); + if (rc) + return rc; + } else reboot_mode = REBOOT_SOFT; break; - + } case 'g': reboot_mode = REBOOT_GPIO; break; -- cgit v1.2.3-70-g09d2 From 95583e4ab5745218373add88ffddb70faff2d0c8 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:11:26 -0700 Subject: kernel/utsname_sysctl.c: replace obsolete __initcall by device_initcall Also fixes checkpatch warnings on proc_dostring function parameters Signed-off-by: Fabian Frederick Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/utsname_sysctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 4f69f9a5e22..6fbe811c7ad 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -51,7 +51,7 @@ static int proc_do_uts_string(ctl_table *table, int write, int r; memcpy(&uts_table, table, sizeof(uts_table)); uts_table.data = get_uts(table, write); - r = proc_dostring(&uts_table,write,buffer,lenp, ppos); + r = proc_dostring(&uts_table, write, buffer, lenp, ppos); put_uts(table, write, uts_table.data); if (write) @@ -135,4 +135,4 @@ static int __init utsname_sysctl_init(void) return 0; } -__initcall(utsname_sysctl_init); +device_initcall(utsname_sysctl_init); -- cgit v1.2.3-70-g09d2 From b51dbec68c8732caac2495f558659556523e8322 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:11:26 -0700 Subject: kernel/hung_task.c: convert simple_strtoul to kstrtouint sysctl_hung_task_panic has been changed to unsigned int. use kstrtouint instead of obsolete simple_strtoul Signed-off-by: Fabian Frederick Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/hung_task.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 06bb1417b06..06db12434d7 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -52,8 +52,10 @@ unsigned int __read_mostly sysctl_hung_task_panic = static int __init hung_task_panic_setup(char *str) { - sysctl_hung_task_panic = simple_strtoul(str, NULL, 0); + int rc = kstrtouint(str, 0, &sysctl_hung_task_panic); + if (rc) + return rc; return 1; } __setup("hung_task_panic=", hung_task_panic_setup); -- cgit v1.2.3-70-g09d2 From b300a4ea665f7fa44f015616ac1874deca891c5e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 4 Jun 2014 16:11:27 -0700 Subject: kernel/user.c: drop unused field 'files' from user_struct Nobody seems uses it for a long time. Let's drop it. Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - kernel/user.c | 1 - 2 files changed, 2 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2f2dd7d932a..611676fd4c2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -745,7 +745,6 @@ static inline int signal_group_exit(const struct signal_struct *sig) struct user_struct { atomic_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ - atomic_t files; /* How many open files does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ #ifdef CONFIG_INOTIFY_USER atomic_t inotify_watches; /* How many inotify watches does this user have? */ diff --git a/kernel/user.c b/kernel/user.c index 294fc6a9416..4efa39350e4 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -87,7 +87,6 @@ static DEFINE_SPINLOCK(uidhash_lock); struct user_struct root_user = { .__count = ATOMIC_INIT(1), .processes = ATOMIC_INIT(1), - .files = ATOMIC_INIT(0), .sigpending = ATOMIC_INIT(0), .locked_shm = 0, .uid = GLOBAL_ROOT_UID, -- cgit v1.2.3-70-g09d2 From 0a581694ab7a5bc083d710df8a552a6a055b005f Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 4 Jun 2014 16:11:28 -0700 Subject: printk: split code for making free space in the log buffer The check for free space in the log buffer always passes when "first_seq" and "next_seq" are equal. In theory, it might cause writing outside of the log buffer. Fortunately, the current usage looks safe because the used "text" and "dict" buffers are quite limited. See the second patch for more details. Anyway, it is better to be on the safe side and add a check. An easy solution is done in the 2nd patch and it is improved in the 4th patch. 5th patch fixes the computation of the printed message length. 1st and 3rd patches just do some code refactoring to make the other patches easier. This patch (of 5): There will be needed some fixes in the check for free space. They will be easier if the code is moved outside of the quite long log_store() function. This patch does not change the existing behavior. Signed-off-by: Petr Mladek Cc: Jan Kara Cc: Jiri Kosina Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 221229cf019..99b7a2d87b6 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -297,6 +297,34 @@ static u32 log_next(u32 idx) return idx + msg->len; } +/* check whether there is enough free space for the given message */ +static int logbuf_has_space(u32 msg_size) +{ + u32 free; + + if (log_next_idx > log_first_idx) + free = max(log_buf_len - log_next_idx, log_first_idx); + else + free = log_first_idx - log_next_idx; + + /* + * We need space also for an empty header that signalizes wrapping + * of the buffer. + */ + return free >= msg_size + sizeof(struct printk_log); +} + +static void log_make_free_space(u32 msg_size) +{ + while (log_first_seq < log_next_seq) { + if (logbuf_has_space(msg_size)) + return; + /* drop old messages until we have enough continuous space */ + log_first_idx = log_next(log_first_idx); + log_first_seq++; + } +} + /* insert record into the buffer, discard old ones, update heads */ static void log_store(int facility, int level, enum log_flags flags, u64 ts_nsec, @@ -311,21 +339,7 @@ static void log_store(int facility, int level, pad_len = (-size) & (LOG_ALIGN - 1); size += pad_len; - while (log_first_seq < log_next_seq) { - u32 free; - - if (log_next_idx > log_first_idx) - free = max(log_buf_len - log_next_idx, log_first_idx); - else - free = log_first_idx - log_next_idx; - - if (free >= size + sizeof(struct printk_log)) - break; - - /* drop old messages until we have enough contiuous space */ - log_first_idx = log_next(log_first_idx); - log_first_seq++; - } + log_make_free_space(size); if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) { /* -- cgit v1.2.3-70-g09d2 From f40e4b9f70d48eb08f443642283fdd9d05b27c6d Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 4 Jun 2014 16:11:30 -0700 Subject: printk: ignore too long messages There was no check for too long messages. The check for free space always passed when first_seq and next_seq were equal. Enough free space was not guaranteed, though. log_store() might be called to store messages up to 64kB + 64kB + 16B. This is sum of maximal text_len, dict_len values, and the size of the structure printk_log. On the other hand, the minimal size for the main log buffer currently is 4kB and it is enforced only by Kconfig. The good news is that the usage looks safe right now. log_store() is called only from vprintk_emit() and cont_flush(). Here the "text" part is always passed via a static buffer and the length is limited to LOG_LINE_MAX which is 1024. The "dict" part is NULL in most cases. The only exceptions is when vprintk_emit() is called from printk_emit() and dev_vprintk_emit(). But printk_emit() is currently used only in devkmsg_writev() and here "dict" is NULL as well. In dev_vprintk_emit(), "dict" is limited by the static buffer "hdr" of the size 128 bytes. It meas that the current maximal printed text is 1024B + 128B + 16B and it always fit the log buffer. But it is only matter of time when someone calls printk_emit() with unsafe parameters, especially the "dict" one. This patch adds a check for the free space when the buffer is empty. It reuses the already existing log_has_space() function but it has to add an extra parameter. It defines whether the buffer is empty. Note that the same values of "first_idx" and "next_idx" might also mean that the buffer is full. If the buffer is empty, we must respect the current position of the indexes. We cannot reset them to the beginning of the buffer. Otherwise, the functions reading the buffer would get crazy. The question is what to do when the message is too long. This patch uses the easiest solution and just ignores the problematic message. Let's do something better in a followup patch. Signed-off-by: Petr Mladek Cc: Jan Kara Cc: Jiri Kosina Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 99b7a2d87b6..8fbbab1771e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -297,12 +297,20 @@ static u32 log_next(u32 idx) return idx + msg->len; } -/* check whether there is enough free space for the given message */ -static int logbuf_has_space(u32 msg_size) +/* + * Check whether there is enough free space for the given message. + * + * The same values of first_idx and next_idx mean that the buffer + * is either empty or full. + * + * If the buffer is empty, we must respect the position of the indexes. + * They cannot be reset to the beginning of the buffer. + */ +static int logbuf_has_space(u32 msg_size, bool empty) { u32 free; - if (log_next_idx > log_first_idx) + if (log_next_idx > log_first_idx || empty) free = max(log_buf_len - log_next_idx, log_first_idx); else free = log_first_idx - log_next_idx; @@ -314,15 +322,21 @@ static int logbuf_has_space(u32 msg_size) return free >= msg_size + sizeof(struct printk_log); } -static void log_make_free_space(u32 msg_size) +static int log_make_free_space(u32 msg_size) { while (log_first_seq < log_next_seq) { - if (logbuf_has_space(msg_size)) - return; + if (logbuf_has_space(msg_size, false)) + return 0; /* drop old messages until we have enough continuous space */ log_first_idx = log_next(log_first_idx); log_first_seq++; } + + /* sequence numbers are equal, so the log buffer is empty */ + if (logbuf_has_space(msg_size, true)) + return 0; + + return -ENOMEM; } /* insert record into the buffer, discard old ones, update heads */ @@ -339,7 +353,9 @@ static void log_store(int facility, int level, pad_len = (-size) & (LOG_ALIGN - 1); size += pad_len; - log_make_free_space(size); + /* if message does not fit empty log buffer, ignore it */ + if (log_make_free_space(size)) + return; if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) { /* -- cgit v1.2.3-70-g09d2 From 85c87043023b7e5535f975bbee12a4f5399df520 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 4 Jun 2014 16:11:31 -0700 Subject: printk: split message size computation We will want to recompute the message size when shrinking too long messages. Let's put the code into separate function. The side effect of setting "pad_len" is not nice but it is worth removing the code duplication. Note that I will probably have one more usage for this function when handling messages safe way in NMI context. This patch does not change the existing behavior. Signed-off-by: Petr Mladek Cc: Jan Kara Cc: Jiri Kosina Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 8fbbab1771e..9f088ed8404 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -339,6 +339,18 @@ static int log_make_free_space(u32 msg_size) return -ENOMEM; } +/* compute the message size including the padding bytes */ +static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len) +{ + u32 size; + + size = sizeof(struct printk_log) + text_len + dict_len; + *pad_len = (-size) & (LOG_ALIGN - 1); + size += *pad_len; + + return size; +} + /* insert record into the buffer, discard old ones, update heads */ static void log_store(int facility, int level, enum log_flags flags, u64 ts_nsec, @@ -349,9 +361,7 @@ static void log_store(int facility, int level, u32 size, pad_len; /* number of '\0' padding bytes to next message */ - size = sizeof(struct printk_log) + text_len + dict_len; - pad_len = (-size) & (LOG_ALIGN - 1); - size += pad_len; + size = msg_used_size(text_len, dict_len, &pad_len); /* if message does not fit empty log buffer, ignore it */ if (log_make_free_space(size)) -- cgit v1.2.3-70-g09d2 From 55bd53a4eb3dd18be8744f8b4d026068fc801a62 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 4 Jun 2014 16:11:32 -0700 Subject: printk: shrink too long messages We might want to print at least part of too long messages and add some warning for debugging purpose. The question is how long the shrunken message should be. If we use the whole buffer, it might get rotated too soon. Let's try to use only 1/4 of the buffer for now. Also shrink the whole dictionary. We do not want to parse it or break it in the middle of some pair of values. It would not cause any real harm but still. Signed-off-by: Petr Mladek Cc: Jan Kara Cc: Jiri Kosina Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 9f088ed8404..7131dd4d0e3 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -351,6 +351,32 @@ static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len) return size; } +/* + * Define how much of the log buffer we could take at maximum. The value + * must be greater than two. Note that only half of the buffer is available + * when the index points to the middle. + */ +#define MAX_LOG_TAKE_PART 4 +static const char trunc_msg[] = ""; + +static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len, + u16 *dict_len, u32 *pad_len) +{ + /* + * The message should not take the whole buffer. Otherwise, it might + * get removed too soon. + */ + u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART; + if (*text_len > max_text_len) + *text_len = max_text_len; + /* enable the warning message */ + *trunc_msg_len = strlen(trunc_msg); + /* disable the "dict" completely */ + *dict_len = 0; + /* compute the size again, count also the warning message */ + return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len); +} + /* insert record into the buffer, discard old ones, update heads */ static void log_store(int facility, int level, enum log_flags flags, u64 ts_nsec, @@ -359,13 +385,19 @@ static void log_store(int facility, int level, { struct printk_log *msg; u32 size, pad_len; + u16 trunc_msg_len = 0; /* number of '\0' padding bytes to next message */ size = msg_used_size(text_len, dict_len, &pad_len); - /* if message does not fit empty log buffer, ignore it */ - if (log_make_free_space(size)) - return; + if (log_make_free_space(size)) { + /* truncate the message if it is too long for empty buffer */ + size = truncate_msg(&text_len, &trunc_msg_len, + &dict_len, &pad_len); + /* survive when the log buffer is too small for trunc_msg */ + if (log_make_free_space(size)) + return; + } if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) { /* @@ -381,6 +413,10 @@ static void log_store(int facility, int level, msg = (struct printk_log *)(log_buf + log_next_idx); memcpy(log_text(msg), text, text_len); msg->text_len = text_len; + if (trunc_msg_len) { + memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len); + msg->text_len += trunc_msg_len; + } memcpy(log_dict(msg), dict, dict_len); msg->dict_len = dict_len; msg->facility = facility; -- cgit v1.2.3-70-g09d2 From 034633ccb24d675850f99bf85c1c5880c831e4b6 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 4 Jun 2014 16:11:33 -0700 Subject: printk: return really stored message length I wonder if anyone uses printk return value but it is there and should be counted correctly. This patch modifies log_store() to return the number of really stored bytes from the 'text' part. Also it handles the return value in vprintk_emit(). Note that log_store() is used also in cont_flush() but we could ignore the return value there. The function works with characters that were already counted earlier. In addition, the store could newer fail here because the length of the printed text is limited by the "cont" buffer and "dict" is NULL. Signed-off-by: Petr Mladek Cc: Jan Kara Cc: Jiri Kosina Cc: Kay Sievers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 7131dd4d0e3..7476a53bc37 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -378,10 +378,10 @@ static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len, } /* insert record into the buffer, discard old ones, update heads */ -static void log_store(int facility, int level, - enum log_flags flags, u64 ts_nsec, - const char *dict, u16 dict_len, - const char *text, u16 text_len) +static int log_store(int facility, int level, + enum log_flags flags, u64 ts_nsec, + const char *dict, u16 dict_len, + const char *text, u16 text_len) { struct printk_log *msg; u32 size, pad_len; @@ -396,7 +396,7 @@ static void log_store(int facility, int level, &dict_len, &pad_len); /* survive when the log buffer is too small for trunc_msg */ if (log_make_free_space(size)) - return; + return 0; } if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) { @@ -432,6 +432,8 @@ static void log_store(int facility, int level, /* insert message */ log_next_idx += msg->len; log_next_seq++; + + return msg->text_len; } #ifdef CONFIG_SECURITY_DMESG_RESTRICT @@ -1606,10 +1608,10 @@ asmlinkage int vprintk_emit(int facility, int level, "BUG: recent printk recursion!"; recursion_bug = 0; - printed_len += strlen(recursion_msg); + text_len = strlen(recursion_msg); /* emit KERN_CRIT message */ - log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0, - NULL, 0, recursion_msg, printed_len); + printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0, + NULL, 0, recursion_msg, text_len); } /* @@ -1662,9 +1664,12 @@ asmlinkage int vprintk_emit(int facility, int level, cont_flush(LOG_NEWLINE); /* buffer line if possible, otherwise store it right away */ - if (!cont_add(facility, level, text, text_len)) - log_store(facility, level, lflags | LOG_CONT, 0, - dict, dictlen, text, text_len); + if (cont_add(facility, level, text, text_len)) + printed_len += text_len; + else + printed_len += log_store(facility, level, + lflags | LOG_CONT, 0, + dict, dictlen, text, text_len); } else { bool stored = false; @@ -1683,11 +1688,12 @@ asmlinkage int vprintk_emit(int facility, int level, cont_flush(LOG_NEWLINE); } - if (!stored) - log_store(facility, level, lflags, 0, - dict, dictlen, text, text_len); + if (stored) + printed_len += text_len; + else + printed_len += log_store(facility, level, lflags, 0, + dict, dictlen, text, text_len); } - printed_len += text_len; /* * Try to acquire and then immediately release the console semaphore. -- cgit v1.2.3-70-g09d2 From ca1d432ad8a527fabc5c7ceed8526e3a28de121c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 4 Jun 2014 16:11:34 -0700 Subject: printk: remove outdated comment Comment about interesting interlocking between lockbuf_lock and console_sem is outdated. It was added in 2002 by commit a880f45a48be during conversion of console_lock to console_sem + lockbuf_lock. At that time release_console_sem() (today's equivalent is console_unlock()) was indeed using lockbuf_lock to avoid races between trylock on console_sem in printk() and unlock of console_sem. However these days the interlocking is gone and the races are avoided by rechecking logbuf state after releasing console_sem. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 7476a53bc37..5bc54478c96 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -206,8 +206,7 @@ struct printk_log { }; /* - * The logbuf_lock protects kmsg buffer, indices, counters. It is also - * used in interesting ways to provide interlocking in console_unlock(); + * The logbuf_lock protects kmsg buffer, indices, counters. */ static DEFINE_RAW_SPINLOCK(logbuf_lock); -- cgit v1.2.3-70-g09d2 From 608873cacb9d0d2811586fcc79a38b64eabd6d32 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 4 Jun 2014 16:11:35 -0700 Subject: printk: release lockbuf_lock before calling console_trylock_for_printk() There's no reason to hold lockbuf_lock when entering console_trylock_for_printk(). The first thing this function does is to call down_trylock(console_sem) and if that fails it immediately unlocks lockbuf_lock. So lockbuf_lock isn't needed for that branch. When down_trylock() succeeds, the rest of console_trylock() is OK without lockbuf_lock (it is called without it from other places), and the only remaining thing in console_trylock_for_printk() is can_use_console() call. For that call console_sem is enough (it iterates all consoles and checks CON_ANYTIME flag). So we drop logbuf_lock before entering console_trylock_for_printk() which simplifies the code. [akpm@linux-foundation.org: fix have_callable_console() comment] Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 54 ++++++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 33 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5bc54478c96..6e1b21a8a49 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -249,9 +249,6 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; -/* cpu currently holding logbuf_lock */ -static volatile unsigned int logbuf_cpu = UINT_MAX; - /* human readable text of the record */ static char *log_text(const struct printk_log *msg) { @@ -1380,7 +1377,10 @@ static void zap_locks(void) sema_init(&console_sem, 1); } -/* Check if we have any console registered that can be called early in boot. */ +/* + * Check if we have any console that is capable of printing while cpu is + * booting or shutting down. Requires console_sem. + */ static int have_callable_console(void) { struct console *con; @@ -1410,36 +1410,22 @@ static inline int can_use_console(unsigned int cpu) * messages from a 'printk'. Return true (and with the * console_lock held, and 'console_locked' set) if it * is successful, false otherwise. - * - * This gets called with the 'logbuf_lock' spinlock held and - * interrupts disabled. It should return with 'lockbuf_lock' - * released but interrupts still disabled. */ static int console_trylock_for_printk(unsigned int cpu) - __releases(&logbuf_lock) { - int retval = 0, wake = 0; - - if (console_trylock()) { - retval = 1; - - /* - * If we can't use the console, we need to release - * the console semaphore by hand to avoid flushing - * the buffer. We need to hold the console semaphore - * in order to do this test safely. - */ - if (!can_use_console(cpu)) { - console_locked = 0; - wake = 1; - retval = 0; - } - } - logbuf_cpu = UINT_MAX; - raw_spin_unlock(&logbuf_lock); - if (wake) + if (!console_trylock()) + return 0; + /* + * If we can't use the console, we need to release the console + * semaphore by hand to avoid flushing the buffer. We need to hold the + * console semaphore in order to do this test safely. + */ + if (!can_use_console(cpu)) { + console_locked = 0; up(&console_sem); - return retval; + return 0; + } + return 1; } int printk_delay_msec __read_mostly; @@ -1572,6 +1558,9 @@ asmlinkage int vprintk_emit(int facility, int level, unsigned long flags; int this_cpu; int printed_len = 0; + /* cpu currently holding logbuf_lock in this function */ + static volatile unsigned int logbuf_cpu = UINT_MAX; + boot_delay_msec(level); printk_delay(); @@ -1694,13 +1683,12 @@ asmlinkage int vprintk_emit(int facility, int level, dict, dictlen, text, text_len); } + logbuf_cpu = UINT_MAX; + raw_spin_unlock(&logbuf_lock); /* * Try to acquire and then immediately release the console semaphore. * The release will print out buffers and wake up /dev/kmsg and syslog() * users. - * - * The console_trylock_for_printk() function will release 'logbuf_lock' - * regardless of whether it actually gets the console semaphore or not. */ if (console_trylock_for_printk(this_cpu)) console_unlock(); -- cgit v1.2.3-70-g09d2 From bd8d7cf5b8410fe98eba06a9aaa90efe88815d8a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 4 Jun 2014 16:11:36 -0700 Subject: printk: fix lockdep instrumentation of console_sem Printk calls mutex_acquire() / mutex_release() by hand to instrument lockdep about console_sem. However in some corner cases the instrumentation is missing. Fix the problem by creating helper functions for locking / unlocking console_sem which take care of lockdep instrumentation as well. Signed-off-by: Jan Kara Reported-by: Fabio Estevam Reported-by: Andy Shevchenko Tested-by: Fabio Estevam Tested-By: Valdis Kletnieks Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 46 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 6e1b21a8a49..5ba37f81372 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -90,6 +90,29 @@ static struct lockdep_map console_lock_dep_map = { }; #endif +/* + * Helper macros to handle lockdep when locking/unlocking console_sem. We use + * macros instead of functions so that _RET_IP_ contains useful information. + */ +#define down_console_sem() do { \ + down(&console_sem);\ + mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\ +} while (0) + +static int __down_trylock_console_sem(unsigned long ip) +{ + if (down_trylock(&console_sem)) + return 1; + mutex_acquire(&console_lock_dep_map, 0, 1, ip); + return 0; +} +#define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) + +#define up_console_sem() do { \ + mutex_release(&console_lock_dep_map, 1, _RET_IP_);\ + up(&console_sem);\ +} while (0) + /* * This is used for debugging the mess that is the VT code by * keeping track if we have the console semaphore held. It's @@ -1422,7 +1445,7 @@ static int console_trylock_for_printk(unsigned int cpu) */ if (!can_use_console(cpu)) { console_locked = 0; - up(&console_sem); + up_console_sem(); return 0; } return 1; @@ -1951,16 +1974,14 @@ void suspend_console(void) printk("Suspending console(s) (use no_console_suspend to debug)\n"); console_lock(); console_suspended = 1; - up(&console_sem); - mutex_release(&console_lock_dep_map, 1, _RET_IP_); + up_console_sem(); } void resume_console(void) { if (!console_suspend_enabled) return; - down(&console_sem); - mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_); + down_console_sem(); console_suspended = 0; console_unlock(); } @@ -2002,12 +2023,11 @@ void console_lock(void) { might_sleep(); - down(&console_sem); + down_console_sem(); if (console_suspended) return; console_locked = 1; console_may_schedule = 1; - mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_); } EXPORT_SYMBOL(console_lock); @@ -2021,15 +2041,14 @@ EXPORT_SYMBOL(console_lock); */ int console_trylock(void) { - if (down_trylock(&console_sem)) + if (down_trylock_console_sem()) return 0; if (console_suspended) { - up(&console_sem); + up_console_sem(); return 0; } console_locked = 1; console_may_schedule = 0; - mutex_acquire(&console_lock_dep_map, 0, 1, _RET_IP_); return 1; } EXPORT_SYMBOL(console_trylock); @@ -2091,7 +2110,7 @@ void console_unlock(void) bool retry; if (console_suspended) { - up(&console_sem); + up_console_sem(); return; } @@ -2153,7 +2172,6 @@ skip: local_irq_restore(flags); } console_locked = 0; - mutex_release(&console_lock_dep_map, 1, _RET_IP_); /* Release the exclusive_console once it is used */ if (unlikely(exclusive_console)) @@ -2161,7 +2179,7 @@ skip: raw_spin_unlock(&logbuf_lock); - up(&console_sem); + up_console_sem(); /* * Someone could have filled up the buffer again, so re-check if there's @@ -2206,7 +2224,7 @@ void console_unblank(void) * oops_in_progress is set to 1.. */ if (oops_in_progress) { - if (down_trylock(&console_sem) != 0) + if (down_trylock_console_sem() != 0) return; } else console_lock(); -- cgit v1.2.3-70-g09d2 From 939f04bec1a4ef6ba4370b0f34b01decc844b1b1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 4 Jun 2014 16:11:37 -0700 Subject: printk: enable interrupts before calling console_trylock_for_printk() We need interrupts disabled when calling console_trylock_for_printk() only so that cpu id we pass to can_use_console() remains valid (for other things console_sem provides all the exclusion we need and deadlocks on console_sem due to interrupts are impossible because we use down_trylock()). However if we are rescheduled, we are guaranteed to run on an online cpu so we can easily just get the cpu id in can_use_console(). We can lose a bit of performance when we enable interrupts in vprintk_emit() and then disable them again in console_unlock() but OTOH it can somewhat reduce interrupt latency caused by console_unlock() especially since later in the patch series we will want to spin on console_sem in console_trylock_for_printk(). Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5ba37f81372..4e22230f1f6 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1418,10 +1418,9 @@ static int have_callable_console(void) /* * Can we actually use the console at this time on this cpu? * - * Console drivers may assume that per-cpu resources have - * been allocated. So unless they're explicitly marked as - * being able to cope (CON_ANYTIME) don't call them until - * this CPU is officially up. + * Console drivers may assume that per-cpu resources have been allocated. So + * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't + * call them until this CPU is officially up. */ static inline int can_use_console(unsigned int cpu) { @@ -1434,8 +1433,10 @@ static inline int can_use_console(unsigned int cpu) * console_lock held, and 'console_locked' set) if it * is successful, false otherwise. */ -static int console_trylock_for_printk(unsigned int cpu) +static int console_trylock_for_printk(void) { + unsigned int cpu = smp_processor_id(); + if (!console_trylock()) return 0; /* @@ -1605,7 +1606,8 @@ asmlinkage int vprintk_emit(int facility, int level, */ if (!oops_in_progress && !lockdep_recursing(current)) { recursion_bug = 1; - goto out_restore_irqs; + local_irq_restore(flags); + return 0; } zap_locks(); } @@ -1708,17 +1710,22 @@ asmlinkage int vprintk_emit(int facility, int level, logbuf_cpu = UINT_MAX; raw_spin_unlock(&logbuf_lock); + lockdep_on(); + local_irq_restore(flags); + + /* + * Disable preemption to avoid being preempted while holding + * console_sem which would prevent anyone from printing to console + */ + preempt_disable(); /* * Try to acquire and then immediately release the console semaphore. * The release will print out buffers and wake up /dev/kmsg and syslog() * users. */ - if (console_trylock_for_printk(this_cpu)) + if (console_trylock_for_printk()) console_unlock(); - - lockdep_on(); -out_restore_irqs: - local_irq_restore(flags); + preempt_enable(); return printed_len; } -- cgit v1.2.3-70-g09d2 From 458df9fd4815b47809875d57f42e16401674b621 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Jun 2014 16:11:38 -0700 Subject: printk: remove separate printk_sched buffers and use printk buf instead To prevent deadlocks with doing a printk inside the scheduler, printk_sched() was created. The issue is that printk has a console_sem that it can grab and release. The release does a wake up if there's a task pending on the sem, and this wake up grabs the rq locks that is held in the scheduler. This leads to a possible deadlock if the wake up uses the same rq as the one with the rq lock held already. What printk_sched() does is to save the printk write in a per cpu buffer and sets the PRINTK_PENDING_SCHED flag. On a timer tick, if this flag is set, the printk() is done against the buffer. There's a couple of issues with this approach. 1) If two printk_sched()s are called before the tick, the second one will overwrite the first one. 2) The temporary buffer is 512 bytes and is per cpu. This is a quite a bit of space wasted for something that is seldom used. In order to remove this, the printk_sched() can use the printk buffer instead, and delay the console_trylock()/console_unlock() to the queued work. Because printk_sched() would then be taking the logbuf_lock, the logbuf_lock must not be held while doing anything that may call into the scheduler functions, which includes wake ups. Unfortunately, printk() also has a console_sem that it uses, and on release, the up(&console_sem) may do a wake up of any pending waiters. This must be avoided while holding the logbuf_lock. Signed-off-by: Steven Rostedt Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 47 +++++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 18 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 4e22230f1f6..247b0c1fadf 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -68,6 +68,9 @@ int console_printk[4] = { DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ }; +/* Deferred messaged from sched code are marked by this special level */ +#define SCHED_MESSAGE_LOGLEVEL -2 + /* * Low level drivers may need that to know if they can schedule in * their unblank() callback or not. So let's export it. @@ -229,7 +232,9 @@ struct printk_log { }; /* - * The logbuf_lock protects kmsg buffer, indices, counters. + * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken + * within the scheduler's rq lock. It must be released before calling + * console_unlock() or anything else that might wake up a process. */ static DEFINE_RAW_SPINLOCK(logbuf_lock); @@ -1577,14 +1582,19 @@ asmlinkage int vprintk_emit(int facility, int level, static int recursion_bug; static char textbuf[LOG_LINE_MAX]; char *text = textbuf; - size_t text_len; + size_t text_len = 0; enum log_flags lflags = 0; unsigned long flags; int this_cpu; int printed_len = 0; + bool in_sched = false; /* cpu currently holding logbuf_lock in this function */ static volatile unsigned int logbuf_cpu = UINT_MAX; + if (level == SCHED_MESSAGE_LOGLEVEL) { + level = -1; + in_sched = true; + } boot_delay_msec(level); printk_delay(); @@ -1631,7 +1641,12 @@ asmlinkage int vprintk_emit(int facility, int level, * The printf needs to come first; we need the syslog * prefix which might be passed-in as a parameter. */ - text_len = vscnprintf(text, sizeof(textbuf), fmt, args); + if (in_sched) + text_len = scnprintf(text, sizeof(textbuf), + KERN_WARNING "[sched_delayed] "); + + text_len += vscnprintf(text + text_len, + sizeof(textbuf) - text_len, fmt, args); /* mark and strip a trailing newline */ if (text_len && text[text_len-1] == '\n') { @@ -1713,6 +1728,10 @@ asmlinkage int vprintk_emit(int facility, int level, lockdep_on(); local_irq_restore(flags); + /* If called from the scheduler, we can not call up(). */ + if (in_sched) + return printed_len; + /* * Disable preemption to avoid being preempted while holding * console_sem which would prevent anyone from printing to console @@ -2532,21 +2551,19 @@ late_initcall(printk_late_init); /* * Delayed printk version, for scheduler-internal messages: */ -#define PRINTK_BUF_SIZE 512 - #define PRINTK_PENDING_WAKEUP 0x01 -#define PRINTK_PENDING_SCHED 0x02 +#define PRINTK_PENDING_OUTPUT 0x02 static DEFINE_PER_CPU(int, printk_pending); -static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf); static void wake_up_klogd_work_func(struct irq_work *irq_work) { int pending = __this_cpu_xchg(printk_pending, 0); - if (pending & PRINTK_PENDING_SCHED) { - char *buf = __get_cpu_var(printk_sched_buf); - pr_warn("[sched_delayed] %s", buf); + if (pending & PRINTK_PENDING_OUTPUT) { + /* If trylock fails, someone else is doing the printing */ + if (console_trylock()) + console_unlock(); } if (pending & PRINTK_PENDING_WAKEUP) @@ -2570,21 +2587,15 @@ void wake_up_klogd(void) int printk_sched(const char *fmt, ...) { - unsigned long flags; va_list args; - char *buf; int r; - local_irq_save(flags); - buf = __get_cpu_var(printk_sched_buf); - va_start(args, fmt); - r = vsnprintf(buf, PRINTK_BUF_SIZE, fmt, args); + r = vprintk_emit(0, SCHED_MESSAGE_LOGLEVEL, NULL, 0, fmt, args); va_end(args); - __this_cpu_or(printk_pending, PRINTK_PENDING_SCHED); + __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); - local_irq_restore(flags); return r; } -- cgit v1.2.3-70-g09d2 From 81954606265ab8f04b41154bd00576013affcf5b Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 4 Jun 2014 16:11:39 -0700 Subject: printk: disable preemption for printk_sched An earlier change in -mm (printk: remove separate printk_sched buffers...), removed the printk_sched irqsave/restore lines since it was safe for current users. Since we may be expanding usage of printk_sched(), disable preepmtion for this function to make it more generally safe to call. Signed-off-by: John Stultz Reviewed-by: Jan Kara Cc: Peter Zijlstra Cc: Jiri Bohac Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 247b0c1fadf..dc2b8bd9bc1 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2590,12 +2590,14 @@ int printk_sched(const char *fmt, ...) va_list args; int r; + preempt_disable(); va_start(args, fmt); r = vprintk_emit(0, SCHED_MESSAGE_LOGLEVEL, NULL, 0, fmt, args); va_end(args); __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); irq_work_queue(&__get_cpu_var(wake_up_klogd_work)); + preempt_enable(); return r; } -- cgit v1.2.3-70-g09d2 From aac74dc495456412c4130a1167ce4beb6c1f0b38 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 4 Jun 2014 16:11:40 -0700 Subject: printk: rename printk_sched to printk_deferred After learning we'll need some sort of deferred printk functionality in the timekeeping core, Peter suggested we rename the printk_sched function so it can be reused by needed subsystems. This only changes the function name. No logic changes. Signed-off-by: John Stultz Reviewed-by: Steven Rostedt Cc: Jan Kara Cc: Peter Zijlstra Cc: Jiri Bohac Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 6 +++--- kernel/printk/printk.c | 2 +- kernel/sched/core.c | 2 +- kernel/sched/deadline.c | 2 +- kernel/sched/rt.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/include/linux/printk.h b/include/linux/printk.h index 8752f7595b2..7847301e283 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -128,9 +128,9 @@ asmlinkage __printf(1, 2) __cold int printk(const char *fmt, ...); /* - * Special printk facility for scheduler use only, _DO_NOT_USE_ ! + * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ ! */ -__printf(1, 2) __cold int printk_sched(const char *fmt, ...); +__printf(1, 2) __cold int printk_deferred(const char *fmt, ...); /* * Please don't use printk_ratelimit(), because it shares ratelimiting state @@ -165,7 +165,7 @@ int printk(const char *s, ...) return 0; } static inline __printf(1, 2) __cold -int printk_sched(const char *s, ...) +int printk_deferred(const char *s, ...) { return 0; } diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index dc2b8bd9bc1..35d9db25190 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2585,7 +2585,7 @@ void wake_up_klogd(void) preempt_enable(); } -int printk_sched(const char *fmt, ...) +int printk_deferred(const char *fmt, ...) { va_list args; int r; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 913c6d6cc2c..caf03e89a06 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1367,7 +1367,7 @@ out: * leave kernel. */ if (p->mm && printk_ratelimit()) { - printk_sched("process %d (%s) no longer affine to cpu%d\n", + printk_deferred("process %d (%s) no longer affine to cpu%d\n", task_pid_nr(p), p->comm, cpu); } } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index f9ca7d19781..d17e1c48a79 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -352,7 +352,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, if (!lag_once) { lag_once = true; - printk_sched("sched: DL replenish lagged to much\n"); + printk_deferred("sched: DL replenish lagged to much\n"); } dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; dl_se->runtime = pi_se->dl_runtime; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 0ebfd7a2947..5d7667b37c2 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -896,7 +896,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) if (!once) { once = true; - printk_sched("sched: RT throttling activated\n"); + printk_deferred("sched: RT throttling activated\n"); } } else { /* -- cgit v1.2.3-70-g09d2 From c224815dac9c739b79050d3cc67443ff500bc478 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 4 Jun 2014 16:11:41 -0700 Subject: printk: Add printk_deferred_once Two of the three prink_deferred uses are really printk_once style uses, so add a printk_deferred_once macro to simplify those call sites. Signed-off-by: John Stultz Reviewed-by: Steven Rostedt Reviewed-by: Jan Kara Cc: Peter Zijlstra Cc: Jiri Bohac Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/printk.h | 11 +++++++++++ kernel/sched/deadline.c | 7 +------ kernel/sched/rt.c | 8 +------- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/include/linux/printk.h b/include/linux/printk.h index 7847301e283..f086d6c99db 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -266,9 +266,20 @@ extern asmlinkage void dump_stack(void) __cold; printk(fmt, ##__VA_ARGS__); \ } \ }) +#define printk_deferred_once(fmt, ...) \ +({ \ + static bool __print_once __read_mostly; \ + \ + if (!__print_once) { \ + __print_once = true; \ + printk_deferred(fmt, ##__VA_ARGS__); \ + } \ +}) #else #define printk_once(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) +#define printk_deferred_once(fmt, ...) \ + no_printk(fmt, ##__VA_ARGS__) #endif #define pr_emerg_once(fmt, ...) \ diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index d17e1c48a79..e1574fca03b 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -348,12 +348,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, * entity. */ if (dl_time_before(dl_se->deadline, rq_clock(rq))) { - static bool lag_once = false; - - if (!lag_once) { - lag_once = true; - printk_deferred("sched: DL replenish lagged to much\n"); - } + printk_deferred_once("sched: DL replenish lagged to much\n"); dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; dl_se->runtime = pi_se->dl_runtime; } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 5d7667b37c2..b3512f1afce 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -890,14 +890,8 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) * but accrue some time due to boosting. */ if (likely(rt_b->rt_runtime)) { - static bool once = false; - rt_rq->rt_throttled = 1; - - if (!once) { - once = true; - printk_deferred("sched: RT throttling activated\n"); - } + printk_deferred_once("sched: RT throttling activated\n"); } else { /* * In case we did anyway, make it go away, -- cgit v1.2.3-70-g09d2 From 6d9bcb621b0b0a20604cbdb298c4487e44dd0da2 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 4 Jun 2014 16:11:43 -0700 Subject: timekeeping: use printk_deferred when holding timekeeping seqlock Jiri Bohac pointed out that there are rare but potential deadlock possibilities when calling printk while holding the timekeeping seqlock. This is due to printk() triggering console sem wakeup, which can cause scheduling code to trigger hrtimers which may try to read the time. Specifically, as Jiri pointed out, that path is: printk vprintk_emit console_unlock up(&console_sem) __up wake_up_process try_to_wake_up ttwu_do_activate ttwu_activate activate_task enqueue_task enqueue_task_fair hrtick_update hrtick_start_fair hrtick_start_fair get_time ktime_get --> endless loop on read_seqcount_retry(&timekeeper_seq, ...) This patch tries to avoid this issue by using printk_deferred (previously named printk_sched) which should defer printing via a irq_work_queue. Signed-off-by: John Stultz Reported-by: Jiri Bohac Reviewed-by: Steven Rostedt Cc: Jan Kara Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/ntp.c | 15 +++++++++------ kernel/time/timekeeping.c | 7 ++++--- 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 419a52cecd2..5b0ac4de382 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -786,8 +786,9 @@ static long hardpps_update_freq(struct pps_normtime freq_norm) time_status |= STA_PPSERROR; pps_errcnt++; pps_dec_freq_interval(); - pr_err("hardpps: PPSERROR: interval too long - %ld s\n", - freq_norm.sec); + printk_deferred(KERN_ERR + "hardpps: PPSERROR: interval too long - %ld s\n", + freq_norm.sec); return 0; } @@ -800,7 +801,8 @@ static long hardpps_update_freq(struct pps_normtime freq_norm) delta = shift_right(ftemp - pps_freq, NTP_SCALE_SHIFT); pps_freq = ftemp; if (delta > PPS_MAXWANDER || delta < -PPS_MAXWANDER) { - pr_warning("hardpps: PPSWANDER: change=%ld\n", delta); + printk_deferred(KERN_WARNING + "hardpps: PPSWANDER: change=%ld\n", delta); time_status |= STA_PPSWANDER; pps_stbcnt++; pps_dec_freq_interval(); @@ -844,8 +846,9 @@ static void hardpps_update_phase(long error) * the time offset is updated. */ if (jitter > (pps_jitter << PPS_POPCORN)) { - pr_warning("hardpps: PPSJITTER: jitter=%ld, limit=%ld\n", - jitter, (pps_jitter << PPS_POPCORN)); + printk_deferred(KERN_WARNING + "hardpps: PPSJITTER: jitter=%ld, limit=%ld\n", + jitter, (pps_jitter << PPS_POPCORN)); time_status |= STA_PPSJITTER; pps_jitcnt++; } else if (time_status & STA_PPSTIME) { @@ -902,7 +905,7 @@ void __hardpps(const struct timespec *phase_ts, const struct timespec *raw_ts) time_status |= STA_PPSJITTER; /* restart the frequency calibration interval */ pps_fbase = *raw_ts; - pr_err("hardpps: PPSJITTER: bad pulse\n"); + printk_deferred(KERN_ERR "hardpps: PPSJITTER: bad pulse\n"); return; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f7df8ea2170..32d8d6aaedb 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -852,8 +852,9 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, struct timespec *delta) { if (!timespec_valid_strict(delta)) { - printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid " - "sleep delta value!\n"); + printk_deferred(KERN_WARNING + "__timekeeping_inject_sleeptime: Invalid " + "sleep delta value!\n"); return; } tk_xtime_add(tk, delta); @@ -1157,7 +1158,7 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) if (unlikely(tk->clock->maxadj && (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) { - printk_once(KERN_WARNING + printk_deferred_once(KERN_WARNING "Adjusting %s more than 11%% (%ld vs %ld)\n", tk->clock->name, (long)tk->mult + adj, (long)tk->clock->mult + tk->clock->maxadj); -- cgit v1.2.3-70-g09d2 From 84b5ec8a9df86f3dcaaaf912715db35e4852d1da Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 4 Jun 2014 16:11:45 -0700 Subject: printk: report dropping of messages from logbuf If the log ring buffer becomes full, we silently overwrite old messages with new data. console_unlock will detect this case and fast-forward the console_* pointers to skip over the corrupted data, but nothing will be reported to the user. This patch hijacks the first valid log message after detecting that we dropped messages and prefixes it with a note detailing how many messages were dropped. For long (~1000 char) messages, this will result in some truncation of the real message, but given that we're dropping things anyway, that doesn't seem to be the end of the world. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Kay Sievers Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 35d9db25190..923c5d4e420 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2157,10 +2157,15 @@ again: } if (console_seq < log_first_seq) { + len = sprintf(text, "** %u printk messages dropped ** ", + (unsigned)(log_first_seq - console_seq)); + /* messages are gone, move to first one */ console_seq = log_first_seq; console_idx = log_first_idx; console_prev = 0; + } else { + len = 0; } skip: if (console_seq == log_next_seq) @@ -2185,8 +2190,8 @@ skip: } level = msg->level; - len = msg_print_text(msg, console_prev, false, - text, sizeof(text)); + len += msg_print_text(msg, console_prev, false, + text + len, sizeof(text) - len); console_idx = log_next(console_idx); console_seq++; console_prev = msg->flags; -- cgit v1.2.3-70-g09d2 From a8fe19ebfbfd90ec17c02284717238b02efb9580 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 4 Jun 2014 16:11:46 -0700 Subject: kernel/printk: use symbolic defines for console loglevels ... instead of naked numbers. Stuff in sysrq.c used to set it to 8 which is supposed to mean above default level so set it to DEBUG instead as we're terminating/killing all tasks and we want to be verbose there. Also, correct the check in x86_64_start_kernel which should be >= as we're clearly issuing the string there for all debug levels, not only the magical 10. Signed-off-by: Borislav Petkov Acked-by: Kees Cook Acked-by: Randy Dunlap Cc: Joe Perches Cc: Valdis Kletnieks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/head64.c | 2 +- arch/x86/platform/uv/uv_nmi.c | 2 +- drivers/nubus/nubus.c | 18 +++++++++--------- drivers/tty/sysrq.c | 8 ++++---- include/linux/printk.h | 15 +++++++++++++-- init/main.c | 4 ++-- kernel/debug/kdb/kdb_bt.c | 2 +- kernel/debug/kdb/kdb_io.c | 2 +- kernel/debug/kdb/kdb_main.c | 2 +- kernel/printk/printk.c | 13 +++---------- 10 files changed, 36 insertions(+), 32 deletions(-) (limited to 'kernel') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 068054f4bf2..eda1a865641 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -172,7 +172,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) */ load_ucode_bsp(); - if (console_loglevel == 10) + if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG) early_printk("Kernel alive\n"); clear_page(init_level4_pgt); diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index be27da60dc8..c89c93320c1 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -85,7 +85,7 @@ static cpumask_var_t uv_nmi_cpu_mask; * Default is all stack dumps go to the console and buffer. * Lower level to send to log buffer only. */ -static int uv_nmi_loglevel = 7; +static int uv_nmi_loglevel = CONSOLE_LOGLEVEL_DEFAULT; module_param_named(dump_loglevel, uv_nmi_loglevel, int, 0644); /* diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c index 43926cd25ae..5066a7ef7b6 100644 --- a/drivers/nubus/nubus.c +++ b/drivers/nubus/nubus.c @@ -473,7 +473,7 @@ static struct nubus_dev* __init if (slot == 0 && (unsigned long)dir.base % 2) dir.base += 1; - if (console_loglevel >= 10) + if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG) printk(KERN_DEBUG "nubus_get_functional_resource: parent is 0x%p, dir is 0x%p\n", parent->base, dir.base); @@ -568,7 +568,7 @@ static int __init nubus_get_vidnames(struct nubus_board* board, printk(KERN_INFO " video modes supported:\n"); nubus_get_subdir(parent, &dir); - if (console_loglevel >= 10) + if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG) printk(KERN_DEBUG "nubus_get_vidnames: parent is 0x%p, dir is 0x%p\n", parent->base, dir.base); @@ -629,7 +629,7 @@ static int __init nubus_get_vendorinfo(struct nubus_board* board, printk(KERN_INFO " vendor info:\n"); nubus_get_subdir(parent, &dir); - if (console_loglevel >= 10) + if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG) printk(KERN_DEBUG "nubus_get_vendorinfo: parent is 0x%p, dir is 0x%p\n", parent->base, dir.base); @@ -654,7 +654,7 @@ static int __init nubus_get_board_resource(struct nubus_board* board, int slot, struct nubus_dirent ent; nubus_get_subdir(parent, &dir); - if (console_loglevel >= 10) + if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG) printk(KERN_DEBUG "nubus_get_board_resource: parent is 0x%p, dir is 0x%p\n", parent->base, dir.base); @@ -753,19 +753,19 @@ static void __init nubus_find_rom_dir(struct nubus_board* board) if (nubus_readdir(&dir, &ent) == -1) goto badrom; - if (console_loglevel >= 10) + if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG) printk(KERN_INFO "nubus_get_rom_dir: entry %02x %06x\n", ent.type, ent.data); /* This one takes us to where we want to go. */ if (nubus_readdir(&dir, &ent) == -1) goto badrom; - if (console_loglevel >= 10) + if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG) printk(KERN_DEBUG "nubus_get_rom_dir: entry %02x %06x\n", ent.type, ent.data); nubus_get_subdir(&ent, &dir); /* Resource ID 01, also an "Unknown Macintosh" */ if (nubus_readdir(&dir, &ent) == -1) goto badrom; - if (console_loglevel >= 10) + if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG) printk(KERN_DEBUG "nubus_get_rom_dir: entry %02x %06x\n", ent.type, ent.data); /* FIXME: the first one is *not* always the right one. We @@ -780,7 +780,7 @@ static void __init nubus_find_rom_dir(struct nubus_board* board) path to that address... */ if (nubus_readdir(&dir, &ent) == -1) goto badrom; - if (console_loglevel >= 10) + if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG) printk(KERN_DEBUG "nubus_get_rom_dir: entry %02x %06x\n", ent.type, ent.data); /* Bwahahahaha... */ @@ -816,7 +816,7 @@ static struct nubus_board* __init nubus_add_board(int slot, int bytelanes) board->fblock = rp; /* Dump the format block for debugging purposes */ - if (console_loglevel >= 10) { + if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG) { int i; printk(KERN_DEBUG "Slot %X, format block at 0x%p\n", slot, rp); diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index ce396ecdf41..b767a64e49d 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -88,7 +88,7 @@ static void sysrq_handle_loglevel(int key) int i; i = key - '0'; - console_loglevel = 7; + console_loglevel = CONSOLE_LOGLEVEL_DEFAULT; printk("Loglevel set to %d\n", i); console_loglevel = i; } @@ -343,7 +343,7 @@ static void send_sig_all(int sig) static void sysrq_handle_term(int key) { send_sig_all(SIGTERM); - console_loglevel = 8; + console_loglevel = CONSOLE_LOGLEVEL_DEBUG; } static struct sysrq_key_op sysrq_term_op = { .handler = sysrq_handle_term, @@ -387,7 +387,7 @@ static struct sysrq_key_op sysrq_thaw_op = { static void sysrq_handle_kill(int key) { send_sig_all(SIGKILL); - console_loglevel = 8; + console_loglevel = CONSOLE_LOGLEVEL_DEBUG; } static struct sysrq_key_op sysrq_kill_op = { .handler = sysrq_handle_kill, @@ -520,7 +520,7 @@ void __handle_sysrq(int key, bool check_mask) * routing in the consumers of /proc/kmsg. */ orig_log_level = console_loglevel; - console_loglevel = 7; + console_loglevel = CONSOLE_LOGLEVEL_DEFAULT; printk(KERN_INFO "SysRq : "); op_p = __sysrq_get_key_op(key); diff --git a/include/linux/printk.h b/include/linux/printk.h index 37f3a6589c1..319ff7e53ef 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -30,6 +30,17 @@ static inline const char *printk_skip_level(const char *buffer) return buffer; } +/* printk's without a loglevel use this.. */ +#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL + +/* We show everything that is MORE important than this.. */ +#define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ +#define CONSOLE_LOGLEVEL_MIN 1 /* Minimum loglevel we let people use */ +#define CONSOLE_LOGLEVEL_QUIET 4 /* Shhh ..., when booted with "quiet" */ +#define CONSOLE_LOGLEVEL_DEFAULT 7 /* anything MORE serious than KERN_DEBUG */ +#define CONSOLE_LOGLEVEL_DEBUG 10 /* issue debug messages */ +#define CONSOLE_LOGLEVEL_MOTORMOUTH 15 /* You can't shut this one up */ + extern int console_printk[]; #define console_loglevel (console_printk[0]) @@ -39,13 +50,13 @@ extern int console_printk[]; static inline void console_silent(void) { - console_loglevel = 0; + console_loglevel = CONSOLE_LOGLEVEL_SILENT; } static inline void console_verbose(void) { if (console_loglevel) - console_loglevel = 15; + console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; } struct va_format { diff --git a/init/main.c b/init/main.c index e08c0b2065a..04fab8d74c8 100644 --- a/init/main.c +++ b/init/main.c @@ -203,13 +203,13 @@ EXPORT_SYMBOL(loops_per_jiffy); static int __init debug_kernel(char *str) { - console_loglevel = 10; + console_loglevel = CONSOLE_LOGLEVEL_DEBUG; return 0; } static int __init quiet_kernel(char *str) { - console_loglevel = 4; + console_loglevel = CONSOLE_LOGLEVEL_QUIET; return 0; } diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index b03e0e814e4..fe15fff5df5 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -21,7 +21,7 @@ static void kdb_show_stack(struct task_struct *p, void *addr) { int old_lvl = console_loglevel; - console_loglevel = 15; + console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; kdb_trap_printk++; kdb_set_current_task(p); if (addr) { diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 14ff4849262..7c70812caea 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -710,7 +710,7 @@ kdb_printit: } if (logging) { saved_loglevel = console_loglevel; - console_loglevel = 0; + console_loglevel = CONSOLE_LOGLEVEL_SILENT; printk(KERN_INFO "%s", kdb_buffer); } diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 0b097c8a1e5..2f7c760305c 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1091,7 +1091,7 @@ static int kdb_reboot(int argc, const char **argv) static void kdb_dumpregs(struct pt_regs *regs) { int old_lvl = console_loglevel; - console_loglevel = 15; + console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; kdb_trap_printk++; show_regs(regs); kdb_trap_printk--; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 923c5d4e420..ea2d5f6962e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -54,18 +54,11 @@ #include "console_cmdline.h" #include "braille.h" -/* printk's without a loglevel use this.. */ -#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL - -/* We show everything that is MORE important than this.. */ -#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ -#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */ - int console_printk[4] = { - DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */ + CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */ - MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */ - DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */ + CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */ + CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ }; /* Deferred messaged from sched code are marked by this special level */ -- cgit v1.2.3-70-g09d2 From 6516a466193fe7f72644d65467fb9905139228c3 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:12:02 -0700 Subject: kernel/compat.c: use sizeof() instead of sizeof Fix 4 checkpatch warnings WARNING: sizeof *tv should be sizeof(*tv) Signed-off-by: Fabian Frederick Cc: "H. Peter Anvin" Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/compat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/compat.c b/kernel/compat.c index e40b0430b56..633394f442f 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -157,7 +157,7 @@ static int __compat_put_timespec(const struct timespec *ts, struct compat_timesp int compat_get_timeval(struct timeval *tv, const void __user *utv) { if (COMPAT_USE_64BIT_TIME) - return copy_from_user(tv, utv, sizeof *tv) ? -EFAULT : 0; + return copy_from_user(tv, utv, sizeof(*tv)) ? -EFAULT : 0; else return __compat_get_timeval(tv, utv); } @@ -166,7 +166,7 @@ EXPORT_SYMBOL_GPL(compat_get_timeval); int compat_put_timeval(const struct timeval *tv, void __user *utv) { if (COMPAT_USE_64BIT_TIME) - return copy_to_user(utv, tv, sizeof *tv) ? -EFAULT : 0; + return copy_to_user(utv, tv, sizeof(*tv)) ? -EFAULT : 0; else return __compat_put_timeval(tv, utv); } @@ -175,7 +175,7 @@ EXPORT_SYMBOL_GPL(compat_put_timeval); int compat_get_timespec(struct timespec *ts, const void __user *uts) { if (COMPAT_USE_64BIT_TIME) - return copy_from_user(ts, uts, sizeof *ts) ? -EFAULT : 0; + return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0; else return __compat_get_timespec(ts, uts); } @@ -184,7 +184,7 @@ EXPORT_SYMBOL_GPL(compat_get_timespec); int compat_put_timespec(const struct timespec *ts, void __user *uts) { if (COMPAT_USE_64BIT_TIME) - return copy_to_user(uts, ts, sizeof *ts) ? -EFAULT : 0; + return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0; else return __compat_put_timespec(ts, uts); } -- cgit v1.2.3-70-g09d2