aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/scheduler/sched-arch.txt10
-rw-r--r--arch/Kconfig9
-rw-r--r--arch/ia64/Kconfig12
-rw-r--r--arch/ia64/include/asm/switch_to.h8
-rw-r--r--arch/ia64/kernel/time.c66
-rw-r--r--arch/powerpc/include/asm/time.h6
-rw-r--r--arch/powerpc/kernel/process.c3
-rw-r--r--arch/powerpc/kernel/time.c55
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype16
-rw-r--r--arch/s390/Kconfig5
-rw-r--r--arch/s390/include/asm/cputime.h3
-rw-r--r--arch/s390/include/asm/switch_to.h4
-rw-r--r--arch/s390/kernel/vtime.c8
-rw-r--r--arch/tile/include/asm/topology.h1
-rw-r--r--arch/x86/Kconfig12
-rw-r--r--include/linux/hardirq.h8
-rw-r--r--include/linux/kernel_stat.h8
-rw-r--r--include/linux/kvm_host.h4
-rw-r--r--include/linux/sched.h10
-rw-r--r--include/linux/topology.h2
-rw-r--r--init/Kconfig157
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/sched/Makefile2
-rw-r--r--kernel/sched/core.c675
-rw-r--r--kernel/sched/cputime.c530
-rw-r--r--kernel/sched/fair.c81
-rw-r--r--kernel/sched/features.h10
-rw-r--r--kernel/sched/rt.c5
-rw-r--r--kernel/sched/sched.h69
-rw-r--r--kernel/softirq.c6
-rw-r--r--kernel/sysctl.c6
-rw-r--r--kernel/time/tick-sched.c3
32 files changed, 892 insertions, 906 deletions
diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.txt
index 28aa1075e29..b1b8587b86f 100644
--- a/Documentation/scheduler/sched-arch.txt
+++ b/Documentation/scheduler/sched-arch.txt
@@ -17,16 +17,6 @@ you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
Unlocked context switches introduce only a very minor performance
penalty to the core scheduler implementation in the CONFIG_SMP case.
-2. Interrupt status
-By default, the switch_to arch function is called with interrupts
-disabled. Interrupts may be enabled over the call if it is likely to
-introduce a significant interrupt latency by adding the line
-`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for
-unlocked context switches. This define also implies
-`__ARCH_WANT_UNLOCKED_CTXSW`. See arch/arm/include/asm/system.h for an
-example.
-
-
CPU idle
========
Your cpu_idle routines need to obey the following rules:
diff --git a/arch/Kconfig b/arch/Kconfig
index 1a7b468abf4..a62965d057f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -304,4 +304,13 @@ config HAVE_RCU_USER_QS
are already protected inside rcu_irq_enter/rcu_irq_exit() but
preemption or signal handling on irq exit still need to be protected.
+config HAVE_VIRT_CPU_ACCOUNTING
+ bool
+
+config HAVE_IRQ_TIME_ACCOUNTING
+ bool
+ help
+ Archs need to ensure they use a high enough resolution clock to
+ support irq time accounting and then call enable_sched_clock_irqtime().
+
source "kernel/gcov/Kconfig"
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 310cf5781fa..3c720ef6c32 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -25,6 +25,7 @@ config IA64
select HAVE_GENERIC_HARDIRQS
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
+ select HAVE_VIRT_CPU_ACCOUNTING
select ARCH_DISCARD_MEMBLOCK
select GENERIC_IRQ_PROBE
select GENERIC_PENDING_IRQ if SMP
@@ -340,17 +341,6 @@ config FORCE_MAX_ZONEORDER
default "17" if HUGETLB_PAGE
default "11"
-config VIRT_CPU_ACCOUNTING
- bool "Deterministic task and CPU time accounting"
- default n
- help
- Select this option to enable more accurate task and CPU time
- accounting. This is done by reading a CPU counter on each
- kernel entry and exit and on transitions within the kernel
- between system, softirq and hardirq state, so there is a
- small performance impact.
- If in doubt, say N here.
-
config SMP
bool "Symmetric multi-processing support"
select USE_GENERIC_SMP_HELPERS
diff --git a/arch/ia64/include/asm/switch_to.h b/arch/ia64/include/asm/switch_to.h
index cb2412fcd17..d38c7ea5eea 100644
--- a/arch/ia64/include/asm/switch_to.h
+++ b/arch/ia64/include/asm/switch_to.h
@@ -30,13 +30,6 @@ extern struct task_struct *ia64_switch_to (void *next_task);
extern void ia64_save_extra (struct task_struct *task);
extern void ia64_load_extra (struct task_struct *task);
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next);
-# define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n)
-#else
-# define IA64_ACCOUNT_ON_SWITCH(p,n)
-#endif
-
#ifdef CONFIG_PERFMON
DECLARE_PER_CPU(unsigned long, pfm_syst_info);
# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1)
@@ -49,7 +42,6 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct
|| PERFMON_IS_SYSWIDE())
#define __switch_to(prev,next,last) do { \
- IA64_ACCOUNT_ON_SWITCH(prev, next); \
if (IA64_HAS_EXTRA_STATE(prev)) \
ia64_save_extra(prev); \
if (IA64_HAS_EXTRA_STATE(next)) \
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index ecc904b33c5..80ff9acc5ed 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -83,32 +83,36 @@ static struct clocksource *itc_clocksource;
extern cputime_t cycle_to_cputime(u64 cyc);
+static void vtime_account_user(struct task_struct *tsk)
+{
+ cputime_t delta_utime;
+ struct thread_info *ti = task_thread_info(tsk);
+
+ if (ti->ac_utime) {
+ delta_utime = cycle_to_cputime(ti->ac_utime);
+ account_user_time(tsk, delta_utime, delta_utime);
+ ti->ac_utime = 0;
+ }
+}
+
/*
* Called from the context switch with interrupts disabled, to charge all
* accumulated times to the current process, and to prepare accounting on
* the next process.
*/
-void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
+void vtime_task_switch(struct task_struct *prev)
{
struct thread_info *pi = task_thread_info(prev);
- struct thread_info *ni = task_thread_info(next);
- cputime_t delta_stime, delta_utime;
- __u64 now;
+ struct thread_info *ni = task_thread_info(current);
- now = ia64_get_itc();
-
- delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
if (idle_task(smp_processor_id()) != prev)
- account_system_time(prev, 0, delta_stime, delta_stime);
+ vtime_account_system(prev);
else
- account_idle_time(delta_stime);
+ vtime_account_idle(prev);
- if (pi->ac_utime) {
- delta_utime = cycle_to_cputime(pi->ac_utime);
- account_user_time(prev, delta_utime, delta_utime);
- }
+ vtime_account_user(prev);
- pi->ac_stamp = ni->ac_stamp = now;
+ pi->ac_stamp = ni->ac_stamp;
ni->ac_stime = ni->ac_utime = 0;
}
@@ -116,29 +120,32 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
* Account time for a transition between system, hard irq or soft irq state.
* Note that this function is called with interrupts enabled.
*/
-void account_system_vtime(struct task_struct *tsk)
+static cputime_t vtime_delta(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
- unsigned long flags;
cputime_t delta_stime;
__u64 now;
- local_irq_save(flags);
-
now = ia64_get_itc();
delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
- if (irq_count() || idle_task(smp_processor_id()) != tsk)
- account_system_time(tsk, 0, delta_stime, delta_stime);
- else
- account_idle_time(delta_stime);
ti->ac_stime = 0;
-
ti->ac_stamp = now;
- local_irq_restore(flags);
+ return delta_stime;
+}
+
+void vtime_account_system(struct task_struct *tsk)
+{
+ cputime_t delta = vtime_delta(tsk);
+
+ account_system_time(tsk, 0, delta, delta);
+}
+
+void vtime_account_idle(struct task_struct *tsk)
+{
+ account_idle_time(vtime_delta(tsk));
}
-EXPORT_SYMBOL_GPL(account_system_vtime);
/*
* Called from the timer interrupt handler to charge accumulated user time
@@ -146,14 +153,7 @@ EXPORT_SYMBOL_GPL(account_system_vtime);
*/
void account_process_tick(struct task_struct *p, int user_tick)
{
- struct thread_info *ti = task_thread_info(p);
- cputime_t delta_utime;
-
- if (ti->ac_utime) {
- delta_utime = cycle_to_cputime(ti->ac_utime);
- account_user_time(p, delta_utime, delta_utime);
- ti->ac_utime = 0;
- }
+ vtime_account_user(p);
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 3b4b4a8da92..c1f267694ac 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -197,12 +197,6 @@ struct cpu_usage {
DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING)
-#define account_process_vtime(tsk) account_process_tick(tsk, 0)
-#else
-#define account_process_vtime(tsk) do { } while (0)
-#endif
-
extern void secondary_cpu_time_init(void);
DECLARE_PER_CPU(u64, decrementers_next_tb);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 1a1f2ddfb58..e9cb51f5f80 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -514,9 +514,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
local_irq_save(flags);
- account_system_vtime(current);
- account_process_vtime(current);
-
/*
* We can't take a PMU exception inside _switch() since there is a
* window where the kernel stack SLB and the kernel stack are out
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e49e93191b6..eaa9d0e6abc 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -291,13 +291,12 @@ static inline u64 calculate_stolen_time(u64 stop_tb)
* Account time for a transition between system, hard irq
* or soft irq state.
*/
-void account_system_vtime(struct task_struct *tsk)
+static u64 vtime_delta(struct task_struct *tsk,
+ u64 *sys_scaled, u64 *stolen)
{
- u64 now, nowscaled, delta, deltascaled;
- unsigned long flags;
- u64 stolen, udelta, sys_scaled, user_scaled;
+ u64 now, nowscaled, deltascaled;
+ u64 udelta, delta, user_scaled;
- local_irq_save(flags);
now = mftb();
nowscaled = read_spurr(now);
get_paca()->system_time += now - get_paca()->starttime;
@@ -305,7 +304,7 @@ void account_system_vtime(struct task_struct *tsk)
deltascaled = nowscaled - get_paca()->startspurr;
get_paca()->startspurr = nowscaled;
- stolen = calculate_stolen_time(now);
+ *stolen = calculate_stolen_time(now);
delta = get_paca()->system_time;
get_paca()->system_time = 0;
@@ -322,35 +321,45 @@ void account_system_vtime(struct task_struct *tsk)
* the user ticks get saved up in paca->user_time_scaled to be
* used by account_process_tick.
*/
- sys_scaled = delta;
+ *sys_scaled = delta;
user_scaled = udelta;
if (deltascaled != delta + udelta) {
if (udelta) {
- sys_scaled = deltascaled * delta / (delta + udelta);
- user_scaled = deltascaled - sys_scaled;
+ *sys_scaled = deltascaled * delta / (delta + udelta);
+ user_scaled = deltascaled - *sys_scaled;
} else {
- sys_scaled = deltascaled;
+ *sys_scaled = deltascaled;
}
}
get_paca()->user_time_scaled += user_scaled;
- if (in_interrupt() || idle_task(smp_processor_id()) != tsk) {
- account_system_time(tsk, 0, delta, sys_scaled);
- if (stolen)
- account_steal_time(stolen);
- } else {
- account_idle_time(delta + stolen);
- }
- local_irq_restore(flags);
+ return delta;
+}
+
+void vtime_account_system(struct task_struct *tsk)
+{
+ u64 delta, sys_scaled, stolen;
+
+ delta = vtime_delta(tsk, &sys_scaled, &stolen);
+ account_system_time(tsk, 0, delta, sys_scaled);
+ if (stolen)
+ account_steal_time(stolen);
+}
+
+void vtime_account_idle(struct task_struct *tsk)
+{
+ u64 delta, sys_scaled, stolen;
+
+ delta = vtime_delta(tsk, &sys_scaled, &stolen);
+ account_idle_time(delta + stolen);
}
-EXPORT_SYMBOL_GPL(account_system_vtime);
/*
* Transfer the user and system times accumulated in the paca
* by the exception entry and exit code to the generic process
* user and system time records.
* Must be called with interrupts disabled.
- * Assumes that account_system_vtime() has been called recently
+ * Assumes that vtime_account() has been called recently
* (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
@@ -366,6 +375,12 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
account_user_time(tsk, utime, utimescaled);
}
+void vtime_task_switch(struct task_struct *prev)
+{
+ vtime_account(prev);
+ account_process_tick(prev, 0);
+}
+
#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
#define calc_cputime_factors()
#endif
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 30fd01de6be..72afd2888ca 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,6 +1,7 @@
config PPC64
bool "64-bit kernel"
default n
+ select HAVE_VIRT_CPU_ACCOUNTING
help
This option selects whether a 32-bit or a 64-bit kernel
will be built.
@@ -337,21 +338,6 @@ config PPC_MM_SLICES
default y if (!PPC_FSL_BOOK3E && PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES)
default n
-config VIRT_CPU_ACCOUNTING
- bool "Deterministic task and CPU time accounting"
- depends on PPC64
- default y
- help
- Select this option to enable more accurate task and CPU time
- accounting. This is done by reading a CPU counter on each
- kernel entry and exit and on transitions within the kernel
- between system, softirq and hardirq state, so there is a
- small performance impact. This also enables accounting of
- stolen time on logically-partitioned systems running on
- IBM POWER5-based machines.
-
- If in doubt, say Y here.
-
config PPC_HAVE_PMU_SUPPORT
bool
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 107610e01a2..f5ab543396d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -49,9 +49,6 @@ config GENERIC_LOCKBREAK
config PGSTE
def_bool y if KVM
-config VIRT_CPU_ACCOUNTING
- def_bool y
-
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
def_bool y
@@ -89,6 +86,8 @@ config S390
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_CMPXCHG_LOCAL
+ select HAVE_VIRT_CPU_ACCOUNTING
+ select VIRT_CPU_ACCOUNTING
select ARCH_DISCARD_MEMBLOCK
select BUILDTIME_EXTABLE_SORT
select ARCH_INLINE_SPIN_TRYLOCK
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 8709bdef233..023d5ae2448 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -12,6 +12,9 @@
#include <linux/spinlock.h>
#include <asm/div64.h>
+
+#define __ARCH_HAS_VTIME_ACCOUNT
+
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
typedef unsigned long long __nocast cputime_t;
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index f223068b782..314cc9426fc 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -89,12 +89,8 @@ static inline void restore_access_regs(unsigned int *acrs)
prev = __switch_to(prev,next); \
} while (0)
-extern void account_vtime(struct task_struct *, struct task_struct *);
-extern void account_tick_vtime(struct task_struct *);
-
#define finish_arch_switch(prev) do { \
set_fs(current->thread.mm_segment); \
- account_vtime(prev, current); \
} while (0)
#endif /* __ASM_SWITCH_TO_H */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 4fc97b40a6e..cb5093c26d1 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -99,7 +99,7 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
return virt_timer_forward(user + system);
}
-void account_vtime(struct task_struct *prev, struct task_struct *next)
+void vtime_task_switch(struct task_struct *prev)
{
struct thread_info *ti;
@@ -107,7 +107,7 @@ void account_vtime(struct task_struct *prev, struct task_struct *next)
ti = task_thread_info(prev);
ti->user_timer = S390_lowcore.user_timer;
ti->system_timer = S390_lowcore.system_timer;
- ti = task_thread_info(next);
+ ti = task_thread_info(current);
S390_lowcore.user_timer = ti->user_timer;
S390_lowcore.system_timer = ti->system_timer;
}
@@ -122,7 +122,7 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
* Update process times based on virtual cpu times stored by entry.S
* to the lowcore fields user_timer, system_timer & steal_clock.
*/
-void account_system_vtime(struct task_struct *tsk)
+void vtime_account(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
u64 timer, system;
@@ -138,7 +138,7 @@ void account_system_vtime(struct task_struct *tsk)
virt_timer_forward(system);
}
-EXPORT_SYMBOL_GPL(account_system_vtime);
+EXPORT_SYMBOL_GPL(vtime_account);
void __kprobes vtime_stop_cpu(void)
{
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h
index 7a7ce390534..d5e86c9f74f 100644
--- a/arch/tile/include/asm/topology.h
+++ b/arch/tile/include/asm/topology.h
@@ -69,7 +69,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
| 1*SD_BALANCE_FORK \
| 0*SD_BALANCE_WAKE \
| 0*SD_WAKE_AFFINE \
- | 0*SD_PREFER_LOCAL \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ff1f56a018..488ba8da8fe 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -101,6 +101,7 @@ config X86
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select HAVE_RCU_USER_QS if X86_64
+ select HAVE_IRQ_TIME_ACCOUNTING
config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS || UPROBES)
@@ -800,17 +801,6 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
-config IRQ_TIME_ACCOUNTING
- bool "Fine granularity task level IRQ time accounting"
- default n
- ---help---
- Select this option to enable fine granularity task irq time
- accounting. This is done by reading a timestamp on each
- transitions between softirq and hardirq state, so there can be a
- small performance impact.
-
- If in doubt, say N here.
-
source "kernel/Kconfig.preempt"
config X86_UP_APIC
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 305f23cd7cf..cab3da3d094 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -132,11 +132,11 @@ extern void synchronize_irq(unsigned int irq);
struct task_struct;
#if !defined(CONFIG_VIRT_CPU_ACCOUNTING) && !defined(CONFIG_IRQ_TIME_ACCOUNTING)
-static inline void account_system_vtime(struct task_struct *tsk)
+static inline void vtime_account(struct task_struct *tsk)
{
}
#else
-extern void account_system_vtime(struct task_struct *tsk);
+extern void vtime_account(struct task_struct *tsk);
#endif
#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
@@ -162,7 +162,7 @@ extern void rcu_nmi_exit(void);
*/
#define __irq_enter() \
do { \
- account_system_vtime(current); \
+ vtime_account(current); \
add_preempt_count(HARDIRQ_OFFSET); \
trace_hardirq_enter(); \
} while (0)
@@ -178,7 +178,7 @@ extern void irq_enter(void);
#define __irq_exit() \
do { \
trace_hardirq_exit(); \
- account_system_vtime(current); \
+ vtime_account(current); \
sub_preempt_count(HARDIRQ_OFFSET); \
} while (0)
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 2fbd9053c2d..36d12f0884c 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -130,4 +130,12 @@ extern void account_process_tick(struct task_struct *, int user);
extern void account_steal_ticks(unsigned long ticks);
extern void account_idle_ticks(unsigned long ticks);
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+extern void vtime_task_switch(struct task_struct *prev);
+extern void vtime_account_system(struct task_struct *tsk);
+extern void vtime_account_idle(struct task_struct *tsk);
+#else
+static inline void vtime_task_switch(struct task_struct *prev) { }
+#endif
+
#endif /* _LINUX_KERNEL_STAT_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b70b48b0109..8a59e0abe5f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -685,7 +685,7 @@ static inline int kvm_deassign_device(struct kvm *kvm,
static inline void kvm_guest_enter(void)
{
BUG_ON(preemptible());
- account_system_vtime(current);
+ vtime_account(current);
current->flags |= PF_VCPU;
/* KVM does not hold any references to rcu protected data when it
* switches CPU into a guest mode. In fact switching to a guest mode
@@ -699,7 +699,7 @@ static inline void kvm_guest_enter(void)
static inline void kvm_guest_exit(void)
{
- account_system_vtime(current);
+ vtime_account(current);
current->flags &= ~PF_VCPU;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 83035269e59..765dffbb085 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -273,11 +273,11 @@ extern void init_idle_bootup_task(struct task_struct *idle);
extern int runqueue_is_locked(int cpu);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
-extern void select_nohz_load_balancer(int stop_tick);
+extern void nohz_balance_enter_idle(int cpu);
extern void set_cpu_sd_state_idle(void);
extern int get_nohz_timer_target(void);
#else
-static inline void select_nohz_load_balancer(int stop_tick) { }
+static inline void nohz_balance_enter_idle(int cpu) { }
static inline void set_cpu_sd_state_idle(void) { }
#endif
@@ -681,11 +681,6 @@ struct signal_struct {
* (notably. ptrace) */
};
-/* Context switch must be unlocked if interrupts are to be enabled */
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
-# define __ARCH_WANT_UNLOCKED_CTXSW
-#endif
-
/*
* Bits in flags field of signal_struct.
*/
@@ -863,7 +858,6 @@ enum cpu_idle_type {
#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
-#define SD_PREFER_LOCAL 0x0040 /* Prefer to keep tasks local to this domain */
#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index fec12d66721..d3cf0d6e771 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -129,7 +129,6 @@ int arch_update_cpu_topology(void);
| 1*SD_BALANCE_FORK \
| 0*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
- | 0*SD_PREFER_LOCAL \
| 0*SD_SHARE_CPUPOWER \
| 1*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
@@ -160,7 +159,6 @@ int arch_update_cpu_topology(void);
| 1*SD_BALANCE_FORK \
| 0*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
- | 0*SD_PREFER_LOCAL \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \
diff --git a/init/Kconfig b/init/Kconfig
index c26b8a1d2b5..3466a6e017b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -267,6 +267,106 @@ config POSIX_MQUEUE_SYSCTL
depends on SYSCTL
default y
+config FHANDLE
+ bool "open by fhandle syscalls"
+ select EXPORTFS
+ help
+ If you say Y here, a user level program will be able to map
+ file names to handle and then later use the handle for
+ different file system operations. This is useful in implementing
+ userspace file servers, which now track files using handles instead
+ of names. The handle would remain the same even if file names
+ get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
+ syscalls.
+
+config AUDIT
+ bool "Auditing support"
+ depends on NET
+ help
+ Enable auditing infrastructure that can be used with another
+ kernel subsystem, such as SELinux (which requires this for
+ logging of avc messages output). Does not do system-call
+ auditing without CONFIG_AUDITSYSCALL.
+
+config AUDITSYSCALL
+ bool "Enable system-call auditing support"
+ depends on AUDIT && (X86 || PPC || S390 || IA64 || UML || SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT))
+ default y if SECURITY_SELINUX
+ help
+ Enable low-overhead system-call auditing infrastructure that
+ can be used independently or with another kernel subsystem,
+ such as SELinux.
+
+config AUDIT_WATCH
+ def_bool y
+ depends on AUDITSYSCALL
+ select FSNOTIFY
+
+config AUDIT_TREE
+ def_bool y
+ depends on AUDITSYSCALL
+ select FSNOTIFY
+
+config AUDIT_LOGINUID_IMMUTABLE
+ bool "Make audit loginuid immutable"
+ depends on AUDIT
+ help
+ The config option toggles if a task setting its loginuid requires
+ CAP_SYS_AUDITCONTROL or if that task should require no special permissions
+ but should instead only allow setting its loginuid if it was never
+ previously set. On systems which use systemd or a similar central
+ process to restart login services this should be set to true. On older
+ systems in which an admin would typically have to directly stop and
+ start processes this should be set to false. Setting this to true allows
+ one to drop potentially dangerous capabilites from the login tasks,
+ but may not be backwards compatible with older init systems.
+
+source "kernel/irq/Kconfig"
+source "kernel/time/Kconfig"
+
+menu "CPU/Task time and stats accounting"
+
+choice
+ prompt "Cputime accounting"
+ default TICK_CPU_ACCOUNTING if !PPC64
+ default VIRT_CPU_ACCOUNTING if PPC64
+
+# Kind of a stub config for the pure tick based cputime accounting
+config TICK_CPU_ACCOUNTING
+ bool "Simple tick based cputime accounting"
+ depends on !S390
+ help
+ This is the basic tick based cputime accounting that maintains
+ statistics about user, system and idle time spent on per jiffies
+ granularity.
+
+ If unsure, say Y.
+
+config VIRT_CPU_ACCOUNTING
+ bool "Deterministic task and CPU time accounting"
+ depends on HAVE_VIRT_CPU_ACCOUNTING
+ help
+ Select this option to enable more accurate task and CPU time
+ accounting. This is done by reading a CPU counter on each
+ kernel entry and exit and on transitions within the kernel
+ between system, softirq and hardirq state, so there is a
+ small performance impact. In the case of s390 or IBM POWER > 5,
+ this also enables accounting of stolen time on logically-partitioned
+ systems.
+
+config IRQ_TIME_ACCOUNTING
+ bool "Fine granularity task level IRQ time accounting"
+ depends on HAVE_IRQ_TIME_ACCOUNTING
+ help
+ Select this option to enable fine granularity task irq time
+ accounting. This is done by reading a timestamp on each
+ transitions between softirq and hardirq state, so there can be a
+ small performance impact.
+
+ If in doubt, say N here.
+
+endchoice
+
config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
help
@@ -292,18 +392,6 @@ config BSD_PROCESS_ACCT_V3
for processing it. A preliminary version of these tools is available
at <http://www.gnu.org/software/acct/>.
-config FHANDLE
- bool "open by fhandle syscalls"
- select EXPORTFS
- help
- If you say Y here, a user level program will be able to map
- file names to handle and then later use the handle for
- different file system operations. This is useful in implementing
- userspace file servers, which now track files using handles instead
- of names. The handle would remain the same even if file names
- get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
- syscalls.
-
config TASKSTATS
bool "Export task/process statistics through netlink (EXPERIMENTAL)"
depends on NET
@@ -346,50 +434,7 @@ config TASK_IO_ACCOUNTING
Say N if unsure.
-config AUDIT
- bool "Auditing support"
- depends on NET
- help
- Enable auditing infrastructure that can be used with another
- kernel subsystem, such as SELinux (which requires this for
- logging of avc messages output). Does not do system-call
- auditing without CONFIG_AUDITSYSCALL.
-
-config AUDITSYSCALL
- bool "Enable system-call auditing support"
- depends on AUDIT && (X86 || PPC || S390 || IA64 || UML || SPARC64 || SUPERH || (ARM && AEABI && !OABI_COMPAT))
- default y if SECURITY_SELINUX
<