aboutsummaryrefslogtreecommitdiff
path: root/kernel/timer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/timer.c')
-rw-r--r--kernel/timer.c251
1 files changed, 70 insertions, 181 deletions
diff --git a/kernel/timer.c b/kernel/timer.c
index 367d0085848..3bb01a323b2 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1,7 +1,7 @@
/*
* linux/kernel/timer.c
*
- * Kernel internal timers, basic process system calls
+ * Kernel internal timers
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
@@ -39,7 +39,9 @@
#include <linux/kallsyms.h>
#include <linux/irq_work.h>
#include <linux/sched.h>
+#include <linux/sched/sysctl.h>
#include <linux/slab.h>
+#include <linux/compat.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -50,7 +52,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/timer.h>
-u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
+__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
EXPORT_SYMBOL(jiffies_64);
@@ -79,6 +81,7 @@ struct tvec_base {
unsigned long timer_jiffies;
unsigned long next_timer;
unsigned long active_timers;
+ unsigned long all_timers;
struct tvec_root tv1;
struct tvec tv2;
struct tvec tv3;
@@ -147,9 +150,11 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu,
/* now that we have rounded, subtract the extra skew again */
j -= cpu * 3;
- if (j <= jiffies) /* rounding ate our timeout entirely; */
- return original;
- return j;
+ /*
+ * Make sure j is still in the future. Otherwise return the
+ * unmodified value.
+ */
+ return time_is_after_jiffies(j) ? j : original;
}
/**
@@ -333,6 +338,20 @@ void set_timer_slack(struct timer_list *timer, int slack_hz)
}
EXPORT_SYMBOL_GPL(set_timer_slack);
+/*
+ * If the list is empty, catch up ->timer_jiffies to the current time.
+ * The caller must hold the tvec_base lock. Returns true if the list
+ * was empty and therefore ->timer_jiffies was updated.
+ */
+static bool catchup_timer_jiffies(struct tvec_base *base)
+{
+ if (!base->all_timers) {
+ base->timer_jiffies = jiffies;
+ return true;
+ }
+ return false;
+}
+
static void
__internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
@@ -379,15 +398,17 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
+ (void)catchup_timer_jiffies(base);
__internal_add_timer(base, timer);
/*
* Update base->active_timers and base->next_timer
*/
if (!tbase_get_deferrable(timer->base)) {
- if (time_before(timer->expires, base->next_timer))
+ if (!base->active_timers++ ||
+ time_before(timer->expires, base->next_timer))
base->next_timer = timer->expires;
- base->active_timers++;
}
+ base->all_timers++;
}
#ifdef CONFIG_TIMER_STATS
@@ -667,6 +688,8 @@ detach_expired_timer(struct timer_list *timer, struct tvec_base *base)
detach_timer(timer, true);
if (!tbase_get_deferrable(timer->base))
base->active_timers--;
+ base->all_timers--;
+ (void)catchup_timer_jiffies(base);
}
static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
@@ -681,6 +704,8 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base,
if (timer->expires == base->next_timer)
base->next_timer = base->timer_jiffies;
}
+ base->all_timers--;
+ (void)catchup_timer_jiffies(base);
return 1;
}
@@ -735,12 +760,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
debug_activate(timer, expires);
- cpu = smp_processor_id();
-
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
- if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
- cpu = get_nohz_timer_target();
-#endif
+ cpu = get_nohz_timer_target(pinned);
new_base = per_cpu(tvec_bases, cpu);
if (base != new_base) {
@@ -818,7 +838,7 @@ unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
bit = find_last_bit(&mask, BITS_PER_LONG);
- mask = (1 << bit) - 1;
+ mask = (1UL << bit) - 1;
expires_limit = expires_limit & ~(mask);
@@ -929,14 +949,21 @@ void add_timer_on(struct timer_list *timer, int cpu)
debug_activate(timer, timer->expires);
internal_add_timer(base, timer);
/*
- * Check whether the other CPU is idle and needs to be
- * triggered to reevaluate the timer wheel when nohz is
- * active. We are protected against the other CPU fiddling
+ * Check whether the other CPU is in dynticks mode and needs
+ * to be triggered to reevaluate the timer wheel.
+ * We are protected against the other CPU fiddling
* with the timer by holding the timer base lock. This also
- * makes sure that a CPU on the way to idle can not evaluate
- * the timer wheel.
+ * makes sure that a CPU on the way to stop its tick can not
+ * evaluate the timer wheel.
+ *
+ * Spare the IPI for deferrable timers on idle targets though.
+ * The next busy ticks will take care of it. Except full dynticks
+ * require special care against races with idle_cpu(), lets deal
+ * with that later.
*/
- wake_up_idle_cpu(cpu);
+ if (!tbase_get_deferrable(timer->base) || tick_nohz_full_cpu(cpu))
+ wake_up_nohz_cpu(cpu);
+
spin_unlock_irqrestore(&base->lock, flags);
}
EXPORT_SYMBOL_GPL(add_timer_on);
@@ -1088,7 +1115,7 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
unsigned long data)
{
- int preempt_count = preempt_count();
+ int count = preempt_count();
#ifdef CONFIG_LOCKDEP
/*
@@ -1115,16 +1142,16 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
lock_map_release(&lockdep_map);
- if (preempt_count != preempt_count()) {
+ if (count != preempt_count()) {
WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
- fn, preempt_count, preempt_count());
+ fn, count, preempt_count());
/*
* Restore the preempt count. That gives us a decent
* chance to survive and extract information. If the
* callback kept a lock held, bad luck, but not worse
* than the BUG() we had.
*/
- preempt_count() = preempt_count;
+ preempt_count_set(count);
}
}
@@ -1142,6 +1169,10 @@ static inline void __run_timers(struct tvec_base *base)
struct timer_list *timer;
spin_lock_irq(&base->lock);
+ if (catchup_timer_jiffies(base)) {
+ spin_unlock_irq(&base->lock);
+ return;
+ }
while (time_after_eq(jiffies, base->timer_jiffies)) {
struct list_head work_list;
struct list_head *head = &work_list;
@@ -1156,7 +1187,7 @@ static inline void __run_timers(struct tvec_base *base)
!cascade(base, &base->tv4, INDEX(2)))
cascade(base, &base->tv5, INDEX(3));
++base->timer_jiffies;
- list_replace_init(base->tv1.vec + index, &work_list);
+ list_replace_init(base->tv1.vec + index, head);
while (!list_empty(head)) {
void (*fn)(unsigned long);
unsigned long data;
@@ -1187,7 +1218,7 @@ static inline void __run_timers(struct tvec_base *base)
spin_unlock_irq(&base->lock);
}
-#ifdef CONFIG_NO_HZ
+#ifdef CONFIG_NO_HZ_COMMON
/*
* Find out when the next timer event is due to happen. This
* is used on S/390 to stop all activity when a CPU is idle.
@@ -1351,7 +1382,6 @@ void update_process_times(int user_tick)
account_process_tick(p, user_tick);
run_local_timers();
rcu_check_callbacks(cpu, user_tick);
- printk_tick();
#ifdef CONFIG_IRQ_WORK
if (in_irq())
irq_work_run();
@@ -1395,61 +1425,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds)
#endif
-/**
- * sys_getpid - return the thread group id of the current process
- *
- * Note, despite the name, this returns the tgid not the pid. The tgid and
- * the pid are identical unless CLONE_THREAD was specified on clone() in
- * which case the tgid is the same in all threads of the same group.
- *
- * This is SMP safe as current->tgid does not change.
- */
-SYSCALL_DEFINE0(getpid)
-{
- return task_tgid_vnr(current);
-}
-
-/*
- * Accessing ->real_parent is not SMP-safe, it could
- * change from under us. However, we can use a stale
- * value of ->real_parent under rcu_read_lock(), see
- * release_task()->call_rcu(delayed_put_task_struct).
- */
-SYSCALL_DEFINE0(getppid)
-{
- int pid;
-
- rcu_read_lock();
- pid = task_tgid_vnr(rcu_dereference(current->real_parent));
- rcu_read_unlock();
-
- return pid;
-}
-
-SYSCALL_DEFINE0(getuid)
-{
- /* Only we change this so SMP safe */
- return from_kuid_munged(current_user_ns(), current_uid());
-}
-
-SYSCALL_DEFINE0(geteuid)
-{
- /* Only we change this so SMP safe */
- return from_kuid_munged(current_user_ns(), current_euid());
-}
-
-SYSCALL_DEFINE0(getgid)
-{
- /* Only we change this so SMP safe */
- return from_kgid_munged(current_user_ns(), current_gid());
-}
-
-SYSCALL_DEFINE0(getegid)
-{
- /* Only we change this so SMP safe */
- return from_kgid_munged(current_user_ns(), current_egid());
-}
-
static void process_timeout(unsigned long __data)
{
wake_up_process((struct task_struct *)__data);
@@ -1557,96 +1532,11 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
}
EXPORT_SYMBOL(schedule_timeout_uninterruptible);
-/* Thread ID - the internal kernel "pid" */
-SYSCALL_DEFINE0(gettid)
-{
- return task_pid_vnr(current);
-}
-
-/**
- * do_sysinfo - fill in sysinfo struct
- * @info: pointer to buffer to fill
- */
-int do_sysinfo(struct sysinfo *info)
-{
- unsigned long mem_total, sav_total;
- unsigned int mem_unit, bitcount;
- struct timespec tp;
-
- memset(info, 0, sizeof(struct sysinfo));
-
- ktime_get_ts(&tp);
- monotonic_to_bootbased(&tp);
- info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
-
- get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
-
- info->procs = nr_threads;
-
- si_meminfo(info);
- si_swapinfo(info);
-
- /*
- * If the sum of all the available memory (i.e. ram + swap)
- * is less than can be stored in a 32 bit unsigned long then
- * we can be binary compatible with 2.2.x kernels. If not,
- * well, in that case 2.2.x was broken anyways...
- *
- * -Erik Andersen <andersee@debian.org>
- */
-
- mem_total = info->totalram + info->totalswap;
- if (mem_total < info->totalram || mem_total < info->totalswap)
- goto out;
- bitcount = 0;
- mem_unit = info->mem_unit;
- while (mem_unit > 1) {
- bitcount++;
- mem_unit >>= 1;
- sav_total = mem_total;
- mem_total <<= 1;
- if (mem_total < sav_total)
- goto out;
- }
-
- /*
- * If mem_total did not overflow, multiply all memory values by
- * info->mem_unit and set it to 1. This leaves things compatible
- * with 2.2.x, and also retains compatibility with earlier 2.4.x
- * kernels...
- */
-
- info->mem_unit = 1;
- info->totalram <<= bitcount;
- info->freeram <<= bitcount;
- info->sharedram <<= bitcount;
- info->bufferram <<= bitcount;
- info->totalswap <<= bitcount;
- info->freeswap <<= bitcount;
- info->totalhigh <<= bitcount;
- info->freehigh <<= bitcount;
-
-out:
- return 0;
-}
-
-SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info)
-{
- struct sysinfo val;
-
- do_sysinfo(&val);
-
- if (copy_to_user(info, &val, sizeof(struct sysinfo)))
- return -EFAULT;
-
- return 0;
-}
-
-static int __cpuinit init_timers_cpu(int cpu)
+static int init_timers_cpu(int cpu)
{
int j;
struct tvec_base *base;
- static char __cpuinitdata tvec_base_done[NR_CPUS];
+ static char tvec_base_done[NR_CPUS];
if (!tvec_base_done[cpu]) {
static char boot_done;
@@ -1655,15 +1545,13 @@ static int __cpuinit init_timers_cpu(int cpu)
/*
* The APs use this path later in boot
*/
- base = kmalloc_node(sizeof(*base),
- GFP_KERNEL | __GFP_ZERO,
- cpu_to_node(cpu));
+ base = kzalloc_node(sizeof(*base), GFP_KERNEL,
+ cpu_to_node(cpu));
if (!base)
return -ENOMEM;
- /* Make sure that tvec_base is 2 byte aligned */
- if (tbase_get_deferrable(base)) {
- WARN_ON(1);
+ /* Make sure tvec_base has TIMER_FLAG_MASK bits free */
+ if (WARN_ON(base != tbase_get_base(base))) {
kfree(base);
return -ENOMEM;
}
@@ -1678,12 +1566,12 @@ static int __cpuinit init_timers_cpu(int cpu)
boot_done = 1;
base = &boot_tvec_bases;
}
+ spin_lock_init(&base->lock);
tvec_base_done[cpu] = 1;
} else {
base = per_cpu(tvec_bases, cpu);
}
- spin_lock_init(&base->lock);
for (j = 0; j < TVN_SIZE; j++) {
INIT_LIST_HEAD(base->tv5.vec + j);
@@ -1697,6 +1585,7 @@ static int __cpuinit init_timers_cpu(int cpu)
base->timer_jiffies = jiffies;
base->next_timer = base->timer_jiffies;
base->active_timers = 0;
+ base->all_timers = 0;
return 0;
}
@@ -1714,7 +1603,7 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
}
}
-static void __cpuinit migrate_timers(int cpu)
+static void migrate_timers(int cpu)
{
struct tvec_base *old_base;
struct tvec_base *new_base;
@@ -1747,7 +1636,7 @@ static void __cpuinit migrate_timers(int cpu)
}
#endif /* CONFIG_HOTPLUG_CPU */
-static int __cpuinit timer_cpu_notify(struct notifier_block *self,
+static int timer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
@@ -1772,7 +1661,7 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata timers_nb = {
+static struct notifier_block timers_nb = {
.notifier_call = timer_cpu_notify,
};
@@ -1786,9 +1675,9 @@ void __init init_timers(void)
err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
- init_timer_stats();
-
BUG_ON(err != NOTIFY_OK);
+
+ init_timer_stats();
register_cpu_notifier(&timers_nb);
open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
}