aboutsummaryrefslogtreecommitdiff
path: root/kernel/posix-cpu-timers.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/posix-cpu-timers.c')
-rw-r--r--kernel/posix-cpu-timers.c1282
1 files changed, 523 insertions, 759 deletions
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 438ff452351..3b8946416a5 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -9,21 +9,23 @@
#include <asm/uaccess.h>
#include <linux/kernel_stat.h>
#include <trace/events/timer.h>
+#include <linux/random.h>
+#include <linux/tick.h>
+#include <linux/workqueue.h>
/*
- * Called after updating RLIMIT_CPU to set timer expiration if necessary.
+ * Called after updating RLIMIT_CPU to run cpu timer and update
+ * tsk->signal->cputime_expires expiration cache if necessary. Needs
+ * siglock protection since other code may update expiration cache as
+ * well.
*/
-void update_rlimit_cpu(unsigned long rlim_new)
+void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
{
cputime_t cputime = secs_to_cputime(rlim_new);
- struct signal_struct *const sig = current->signal;
- if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) ||
- cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) {
- spin_lock_irq(&current->sighand->siglock);
- set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
- spin_unlock_irq(&current->sighand->siglock);
- }
+ spin_lock_irq(&task->sighand->siglock);
+ set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL);
+ spin_unlock_irq(&task->sighand->siglock);
}
static int check_clock(const clockid_t which_clock)
@@ -38,83 +40,39 @@ static int check_clock(const clockid_t which_clock)
if (pid == 0)
return 0;
- read_lock(&tasklist_lock);
+ rcu_read_lock();
p = find_task_by_vpid(pid);
if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
- same_thread_group(p, current) : thread_group_leader(p))) {
+ same_thread_group(p, current) : has_group_leader_pid(p))) {
error = -EINVAL;
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return error;
}
-static inline union cpu_time_count
+static inline unsigned long long
timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
{
- union cpu_time_count ret;
- ret.sched = 0; /* high half always zero when .cpu used */
+ unsigned long long ret;
+
+ ret = 0; /* high half always zero when .cpu used */
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
- ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
+ ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
} else {
- ret.cpu = timespec_to_cputime(tp);
+ ret = cputime_to_expires(timespec_to_cputime(tp));
}
return ret;
}
static void sample_to_timespec(const clockid_t which_clock,
- union cpu_time_count cpu,
+ unsigned long long expires,
struct timespec *tp)
{
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
- *tp = ns_to_timespec(cpu.sched);
+ *tp = ns_to_timespec(expires);
else
- cputime_to_timespec(cpu.cpu, tp);
-}
-
-static inline int cpu_time_before(const clockid_t which_clock,
- union cpu_time_count now,
- union cpu_time_count then)
-{
- if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
- return now.sched < then.sched;
- } else {
- return cputime_lt(now.cpu, then.cpu);
- }
-}
-static inline void cpu_time_add(const clockid_t which_clock,
- union cpu_time_count *acc,
- union cpu_time_count val)
-{
- if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
- acc->sched += val.sched;
- } else {
- acc->cpu = cputime_add(acc->cpu, val.cpu);
- }
-}
-static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
- union cpu_time_count a,
- union cpu_time_count b)
-{
- if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
- a.sched -= b.sched;
- } else {
- a.cpu = cputime_sub(a.cpu, b.cpu);
- }
- return a;
-}
-
-/*
- * Divide and limit the result to res >= 1
- *
- * This is necessary to prevent signal delivery starvation, when the result of
- * the division would be rounded down to 0.
- */
-static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div)
-{
- cputime_t res = cputime_div(time, div);
-
- return max_t(cputime_t, res, 1);
+ cputime_to_timespec((__force cputime_t)expires, tp);
}
/*
@@ -122,62 +80,68 @@ static inline cputime_t cputime_div_non_zero(cputime_t time, unsigned long div)
* given the current clock sample.
*/
static void bump_cpu_timer(struct k_itimer *timer,
- union cpu_time_count now)
+ unsigned long long now)
{
int i;
+ unsigned long long delta, incr;
- if (timer->it.cpu.incr.sched == 0)
+ if (timer->it.cpu.incr == 0)
return;
- if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
- unsigned long long delta, incr;
+ if (now < timer->it.cpu.expires)
+ return;
- if (now.sched < timer->it.cpu.expires.sched)
- return;
- incr = timer->it.cpu.incr.sched;
- delta = now.sched + incr - timer->it.cpu.expires.sched;
- /* Don't use (incr*2 < delta), incr*2 might overflow. */
- for (i = 0; incr < delta - incr; i++)
- incr = incr << 1;
- for (; i >= 0; incr >>= 1, i--) {
- if (delta < incr)
- continue;
- timer->it.cpu.expires.sched += incr;
- timer->it_overrun += 1 << i;
- delta -= incr;
- }
- } else {
- cputime_t delta, incr;
+ incr = timer->it.cpu.incr;
+ delta = now + incr - timer->it.cpu.expires;
- if (cputime_lt(now.cpu, timer->it.cpu.expires.cpu))
- return;
- incr = timer->it.cpu.incr.cpu;
- delta = cputime_sub(cputime_add(now.cpu, incr),
- timer->it.cpu.expires.cpu);
- /* Don't use (incr*2 < delta), incr*2 might overflow. */
- for (i = 0; cputime_lt(incr, cputime_sub(delta, incr)); i++)
- incr = cputime_add(incr, incr);
- for (; i >= 0; incr = cputime_halve(incr), i--) {
- if (cputime_lt(delta, incr))
- continue;
- timer->it.cpu.expires.cpu =
- cputime_add(timer->it.cpu.expires.cpu, incr);
- timer->it_overrun += 1 << i;
- delta = cputime_sub(delta, incr);
- }
+ /* Don't use (incr*2 < delta), incr*2 might overflow. */
+ for (i = 0; incr < delta - incr; i++)
+ incr = incr << 1;
+
+ for (; i >= 0; incr >>= 1, i--) {
+ if (delta < incr)
+ continue;
+
+ timer->it.cpu.expires += incr;
+ timer->it_overrun += 1 << i;
+ delta -= incr;
}
}
-static inline cputime_t prof_ticks(struct task_struct *p)
+/**
+ * task_cputime_zero - Check a task_cputime struct for all zero fields.
+ *
+ * @cputime: The struct to compare.
+ *
+ * Checks @cputime to see if all fields are zero. Returns true if all fields
+ * are zero, false if any field is nonzero.
+ */
+static inline int task_cputime_zero(const struct task_cputime *cputime)
+{
+ if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
+ return 1;
+ return 0;
+}
+
+static inline unsigned long long prof_ticks(struct task_struct *p)
{
- return cputime_add(p->utime, p->stime);
+ cputime_t utime, stime;
+
+ task_cputime(p, &utime, &stime);
+
+ return cputime_to_expires(utime + stime);
}
-static inline cputime_t virt_ticks(struct task_struct *p)
+static inline unsigned long long virt_ticks(struct task_struct *p)
{
- return p->utime;
+ cputime_t utime;
+
+ task_cputime(p, &utime, NULL);
+
+ return cputime_to_expires(utime);
}
-int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
+static int
+posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
{
int error = check_clock(which_clock);
if (!error) {
@@ -195,7 +159,8 @@ int posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp)
return error;
}
-int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
+static int
+posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
{
/*
* You can never reset a CPU clock, but we check for other errors
@@ -213,61 +178,30 @@ int posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
* Sample a per-thread clock for the given task.
*/
static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
- union cpu_time_count *cpu)
+ unsigned long long *sample)
{
switch (CPUCLOCK_WHICH(which_clock)) {
default:
return -EINVAL;
case CPUCLOCK_PROF:
- cpu->cpu = prof_ticks(p);
+ *sample = prof_ticks(p);
break;
case CPUCLOCK_VIRT:
- cpu->cpu = virt_ticks(p);
+ *sample = virt_ticks(p);
break;
case CPUCLOCK_SCHED:
- cpu->sched = task_sched_runtime(p);
+ *sample = task_sched_runtime(p);
break;
}
return 0;
}
-void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
-{
- struct sighand_struct *sighand;
- struct signal_struct *sig;
- struct task_struct *t;
-
- *times = INIT_CPUTIME;
-
- rcu_read_lock();
- sighand = rcu_dereference(tsk->sighand);
- if (!sighand)
- goto out;
-
- sig = tsk->signal;
-
- t = tsk;
- do {
- times->utime = cputime_add(times->utime, t->utime);
- times->stime = cputime_add(times->stime, t->stime);
- times->sum_exec_runtime += t->se.sum_exec_runtime;
-
- t = next_thread(t);
- } while (t != tsk);
-
- times->utime = cputime_add(times->utime, sig->utime);
- times->stime = cputime_add(times->stime, sig->stime);
- times->sum_exec_runtime += sig->sum_sched_runtime;
-out:
- rcu_read_unlock();
-}
-
static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b)
{
- if (cputime_gt(b->utime, a->utime))
+ if (b->utime > a->utime)
a->utime = b->utime;
- if (cputime_gt(b->stime, a->stime))
+ if (b->stime > a->stime)
a->stime = b->stime;
if (b->sum_exec_runtime > a->sum_exec_runtime)
@@ -280,9 +214,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
struct task_cputime sum;
unsigned long flags;
- spin_lock_irqsave(&cputimer->lock, flags);
if (!cputimer->running) {
- cputimer->running = 1;
/*
* The POSIX timer interface allows for absolute time expiry
* values through the TIMER_ABSTIME flag, therefore we have
@@ -290,19 +222,23 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
* it.
*/
thread_group_cputime(tsk, &sum);
+ raw_spin_lock_irqsave(&cputimer->lock, flags);
+ cputimer->running = 1;
update_gt_cputime(&cputimer->cputime, &sum);
- }
+ } else
+ raw_spin_lock_irqsave(&cputimer->lock, flags);
*times = cputimer->cputime;
- spin_unlock_irqrestore(&cputimer->lock, flags);
+ raw_spin_unlock_irqrestore(&cputimer->lock, flags);
}
/*
* Sample a process (thread group) clock for the given group_leader task.
- * Must be called with tasklist_lock held for reading.
+ * Must be called with task sighand lock held for safe while_each_thread()
+ * traversal.
*/
static int cpu_clock_sample_group(const clockid_t which_clock,
struct task_struct *p,
- union cpu_time_count *cpu)
+ unsigned long long *sample)
{
struct task_cputime cputime;
@@ -311,43 +247,67 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
return -EINVAL;
case CPUCLOCK_PROF:
thread_group_cputime(p, &cputime);
- cpu->cpu = cputime_add(cputime.utime, cputime.stime);
+ *sample = cputime_to_expires(cputime.utime + cputime.stime);
break;
case CPUCLOCK_VIRT:
thread_group_cputime(p, &cputime);
- cpu->cpu = cputime.utime;
+ *sample = cputime_to_expires(cputime.utime);
break;
case CPUCLOCK_SCHED:
- cpu->sched = thread_group_sched_runtime(p);
+ thread_group_cputime(p, &cputime);
+ *sample = cputime.sum_exec_runtime;
break;
}
return 0;
}
+static int posix_cpu_clock_get_task(struct task_struct *tsk,
+ const clockid_t which_clock,
+ struct timespec *tp)
+{
+ int err = -EINVAL;
+ unsigned long long rtn;
-int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
+ if (CPUCLOCK_PERTHREAD(which_clock)) {
+ if (same_thread_group(tsk, current))
+ err = cpu_clock_sample(which_clock, tsk, &rtn);
+ } else {
+ unsigned long flags;
+ struct sighand_struct *sighand;
+
+ /*
+ * while_each_thread() is not yet entirely RCU safe,
+ * keep locking the group while sampling process
+ * clock for now.
+ */
+ sighand = lock_task_sighand(tsk, &flags);
+ if (!sighand)
+ return err;
+
+ if (tsk == current || thread_group_leader(tsk))
+ err = cpu_clock_sample_group(which_clock, tsk, &rtn);
+
+ unlock_task_sighand(tsk, &flags);
+ }
+
+ if (!err)
+ sample_to_timespec(which_clock, rtn, tp);
+
+ return err;
+}
+
+
+static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
{
const pid_t pid = CPUCLOCK_PID(which_clock);
- int error = -EINVAL;
- union cpu_time_count rtn;
+ int err = -EINVAL;
if (pid == 0) {
/*
* Special case constant value for our own clocks.
* We don't have to do any lookup to find ourselves.
*/
- if (CPUCLOCK_PERTHREAD(which_clock)) {
- /*
- * Sampling just ourselves we can do with no locking.
- */
- error = cpu_clock_sample(which_clock,
- current, &rtn);
- } else {
- read_lock(&tasklist_lock);
- error = cpu_clock_sample_group(which_clock,
- current, &rtn);
- read_unlock(&tasklist_lock);
- }
+ err = posix_cpu_clock_get_task(current, which_clock, tp);
} else {
/*
* Find the given PID, and validate that the caller
@@ -356,29 +316,12 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
struct task_struct *p;
rcu_read_lock();
p = find_task_by_vpid(pid);
- if (p) {
- if (CPUCLOCK_PERTHREAD(which_clock)) {
- if (same_thread_group(p, current)) {
- error = cpu_clock_sample(which_clock,
- p, &rtn);
- }
- } else {
- read_lock(&tasklist_lock);
- if (thread_group_leader(p) && p->signal) {
- error =
- cpu_clock_sample_group(which_clock,
- p, &rtn);
- }
- read_unlock(&tasklist_lock);
- }
- }
+ if (p)
+ err = posix_cpu_clock_get_task(p, which_clock, tp);
rcu_read_unlock();
}
- if (error)
- return error;
- sample_to_timespec(which_clock, rtn, tp);
- return 0;
+ return err;
}
@@ -387,7 +330,7 @@ int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
* This is called from sys_timer_create() and do_cpu_nanosleep() with the
* new timer already all-zeros initialized.
*/
-int posix_cpu_timer_create(struct k_itimer *new_timer)
+static int posix_cpu_timer_create(struct k_itimer *new_timer)
{
int ret = 0;
const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
@@ -398,7 +341,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
INIT_LIST_HEAD(&new_timer->it.cpu.entry);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
if (pid == 0) {
p = current;
@@ -412,7 +355,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
p = current->group_leader;
} else {
p = find_task_by_vpid(pid);
- if (p && !thread_group_leader(p))
+ if (p && !has_group_leader_pid(p))
p = NULL;
}
}
@@ -422,7 +365,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
} else {
ret = -EINVAL;
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return ret;
}
@@ -433,79 +376,60 @@ int posix_cpu_timer_create(struct k_itimer *new_timer)
* If we return TIMER_RETRY, it's necessary to release the timer's lock
* and try again. (This happens when the timer is in the middle of firing.)
*/
-int posix_cpu_timer_del(struct k_itimer *timer)
+static int posix_cpu_timer_del(struct k_itimer *timer)
{
- struct task_struct *p = timer->it.cpu.task;
int ret = 0;
+ unsigned long flags;
+ struct sighand_struct *sighand;
+ struct task_struct *p = timer->it.cpu.task;
- if (likely(p != NULL)) {
- read_lock(&tasklist_lock);
- if (unlikely(p->signal == NULL)) {
- /*
- * We raced with the reaping of the task.
- * The deletion should have cleared us off the list.
- */
- BUG_ON(!list_empty(&timer->it.cpu.entry));
- } else {
- spin_lock(&p->sighand->siglock);
- if (timer->it.cpu.firing)
- ret = TIMER_RETRY;
- else
- list_del(&timer->it.cpu.entry);
- spin_unlock(&p->sighand->siglock);
- }
- read_unlock(&tasklist_lock);
+ WARN_ON_ONCE(p == NULL);
- if (!ret)
- put_task_struct(p);
+ /*
+ * Protect against sighand release/switch in exit/exec and process/
+ * thread timer list entry concurrent read/writes.
+ */
+ sighand = lock_task_sighand(p, &flags);
+ if (unlikely(sighand == NULL)) {
+ /*
+ * We raced with the reaping of the task.
+ * The deletion should have cleared us off the list.
+ */
+ WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));
+ } else {
+ if (timer->it.cpu.firing)
+ ret = TIMER_RETRY;
+ else
+ list_del(&timer->it.cpu.entry);
+
+ unlock_task_sighand(p, &flags);
}
+ if (!ret)
+ put_task_struct(p);
+
return ret;
}
+static void cleanup_timers_list(struct list_head *head)
+{
+ struct cpu_timer_list *timer, *next;
+
+ list_for_each_entry_safe(timer, next, head, entry)
+ list_del_init(&timer->entry);
+}
+
/*
* Clean out CPU timers still ticking when a thread exited. The task
* pointer is cleared, and the expiry time is replaced with the residual
* time for later timer_gettime calls to return.
* This must be called with the siglock held.
*/
-static void cleanup_timers(struct list_head *head,
- cputime_t utime, cputime_t stime,
- unsigned long long sum_exec_runtime)
+static void cleanup_timers(struct list_head *head)
{
- struct cpu_timer_list *timer, *next;
- cputime_t ptime = cputime_add(utime, stime);
-
- list_for_each_entry_safe(timer, next, head, entry) {
- list_del_init(&timer->entry);
- if (cputime_lt(timer->expires.cpu, ptime)) {
- timer->expires.cpu = cputime_zero;
- } else {
- timer->expires.cpu = cputime_sub(timer->expires.cpu,
- ptime);
- }
- }
-
- ++head;
- list_for_each_entry_safe(timer, next, head, entry) {
- list_del_init(&timer->entry);
- if (cputime_lt(timer->expires.cpu, utime)) {
- timer->expires.cpu = cputime_zero;
- } else {
- timer->expires.cpu = cputime_sub(timer->expires.cpu,
- utime);
- }
- }
-
- ++head;
- list_for_each_entry_safe(timer, next, head, entry) {
- list_del_init(&timer->entry);
- if (timer->expires.sched < sum_exec_runtime) {
- timer->expires.sched = 0;
- } else {
- timer->expires.sched -= sum_exec_runtime;
- }
- }
+ cleanup_timers_list(head);
+ cleanup_timers_list(++head);
+ cleanup_timers_list(++head);
}
/*
@@ -515,144 +439,76 @@ static void cleanup_timers(struct list_head *head,
*/
void posix_cpu_timers_exit(struct task_struct *tsk)
{
- cleanup_timers(tsk->cpu_timers,
- tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);
+ add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
+ sizeof(unsigned long long));
+ cleanup_timers(tsk->cpu_timers);
}
void posix_cpu_timers_exit_group(struct task_struct *tsk)
{
- struct signal_struct *const sig = tsk->signal;
-
- cleanup_timers(tsk->signal->cpu_timers,
- cputime_add(tsk->utime, sig->utime),
- cputime_add(tsk->stime, sig->stime),
- tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
-}
-
-static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
-{
- /*
- * That's all for this thread or process.
- * We leave our residual in expires to be reported.
- */
- put_task_struct(timer->it.cpu.task);
- timer->it.cpu.task = NULL;
- timer->it.cpu.expires = cpu_time_sub(timer->it_clock,
- timer->it.cpu.expires,
- now);
+ cleanup_timers(tsk->signal->cpu_timers);
}
static inline int expires_gt(cputime_t expires, cputime_t new_exp)
{
- return cputime_eq(expires, cputime_zero) ||
- cputime_gt(expires, new_exp);
+ return expires == 0 || expires > new_exp;
}
-static inline int expires_le(cputime_t expires, cputime_t new_exp)
-{
- return !cputime_eq(expires, cputime_zero) &&
- cputime_le(expires, new_exp);
-}
/*
* Insert the timer on the appropriate list before any timers that
- * expire later. This must be called with the tasklist_lock held
- * for reading, and interrupts disabled.
+ * expire later. This must be called with the sighand lock held.
*/
-static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
+static void arm_timer(struct k_itimer *timer)
{
struct task_struct *p = timer->it.cpu.task;
struct list_head *head, *listpos;
+ struct task_cputime *cputime_expires;
struct cpu_timer_list *const nt = &timer->it.cpu;
struct cpu_timer_list *next;
- unsigned long i;
- head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
- p->cpu_timers : p->signal->cpu_timers);
+ if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
+ head = p->cpu_timers;
+ cputime_expires = &p->cputime_expires;
+ } else {
+ head = p->signal->cpu_timers;
+ cputime_expires = &p->signal->cputime_expires;
+ }
head += CPUCLOCK_WHICH(timer->it_clock);
- BUG_ON(!irqs_disabled());
- spin_lock(&p->sighand->siglock);
-
listpos = head;
- if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
- list_for_each_entry(next, head, entry) {
- if (next->expires.sched > nt->expires.sched)
- break;
- listpos = &next->entry;
- }
- } else {
- list_for_each_entry(next, head, entry) {
- if (cputime_gt(next->expires.cpu, nt->expires.cpu))
- break;
- listpos = &next->entry;
- }
+ list_for_each_entry(next, head, entry) {
+ if (nt->expires < next->expires)
+ break;
+ listpos = &next->entry;
}
list_add(&nt->entry, listpos);
if (listpos == head) {
+ unsigned long long exp = nt->expires;
+
/*
- * We are the new earliest-expiring timer.
- * If we are a thread timer, there can always
- * be a process timer telling us to stop earlier.
+ * We are the new earliest-expiring POSIX 1.b timer, hence
+ * need to update expiration cache. Take into account that
+ * for process timers we share expiration cache with itimers
+ * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
*/
- if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
- union cpu_time_count *exp = &nt->expires;
-
- switch (CPUCLOCK_WHICH(timer->it_clock)) {
- default:
- BUG();
- case CPUCLOCK_PROF:
- if (expires_gt(p->cputime_expires.prof_exp,
- exp->cpu))
- p->cputime_expires.prof_exp = exp->cpu;
- break;
- case CPUCLOCK_VIRT:
- if (expires_gt(p->cputime_expires.virt_exp,
- exp->cpu))
- p->cputime_expires.virt_exp = exp->cpu;
- break;
- case CPUCLOCK_SCHED:
- if (p->cputime_expires.sched_exp == 0 ||
- p->cputime_expires.sched_exp > exp->sched)
- p->cputime_expires.sched_exp =
- exp->sched;
- break;
- }
- } else {
- struct signal_struct *const sig = p->signal;
- union cpu_time_count *exp = &timer->it.cpu.expires;
-
- /*
- * For a process timer, set the cached expiration time.
- */
- switch (CPUCLOCK_WHICH(timer->it_clock)) {
- default:
- BUG();
- case CPUCLOCK_VIRT:
- if (expires_le(sig->it[CPUCLOCK_VIRT].expires,
- exp->cpu))
- break;
- sig->cputime_expires.virt_exp = exp->cpu;
- break;
- case CPUCLOCK_PROF:
- if (expires_le(sig->it[CPUCLOCK_PROF].expires,
- exp->cpu))
- break;
- i = sig->rlim[RLIMIT_CPU].rlim_cur;
- if (i != RLIM_INFINITY &&
- i <= cputime_to_secs(exp->cpu))
- break;
- sig->cputime_expires.prof_exp = exp->cpu;
- break;
- case CPUCLOCK_SCHED:
- sig->cputime_expires.sched_exp = exp->sched;
- break;
- }
+ switch (CPUCLOCK_WHICH(timer->it_clock)) {
+ case CPUCLOCK_PROF:
+ if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp)))
+ cputime_expires->prof_exp = expires_to_cputime(exp);
+ break;
+ case CPUCLOCK_VIRT:
+ if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp)))
+ cputime_expires->virt_exp = expires_to_cputime(exp);
+ break;
+ case CPUCLOCK_SCHED:
+ if (cputime_expires->sched_exp == 0 ||
+ cputime_expires->sched_exp > exp)
+ cputime_expires->sched_exp = exp;
+ break;
}
}
-
- spin_unlock(&p->sighand->siglock);
}
/*
@@ -660,19 +516,24 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
*/
static void cpu_timer_fire(struct k_itimer *timer)
{
- if (unlikely(timer->sigq == NULL)) {
+ if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
+ /*
+ * User don't want any signal.
+ */
+ timer->it.cpu.expires = 0;
+ } else if (unlikely(timer->sigq == NULL)) {
/*
* This a special case for clock_nanosleep,
* not a normal timer from sys_timer_create.
*/
wake_up_process(timer->it_process);
- timer->it.cpu.expires.sched = 0;
- } else if (timer->it.cpu.incr.sched == 0) {
+ timer->it.cpu.expires = 0;
+ } else if (timer->it.cpu.incr == 0) {
/*
* One-shot timer. Clear it as soon as it's fired.
*/
posix_timer_event(timer, 0);
- timer->it.cpu.expires.sched = 0;
+ timer->it.cpu.expires = 0;
} else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
/*
* The signal did not get queued because the signal
@@ -686,11 +547,12 @@ static void cpu_timer_fire(struct k_itimer *timer)
/*
* Sample a process (thread group) timer for the given group_leader task.
- * Must be called with tasklist_lock held for reading.
+ * Must be called with task sighand lock held for safe while_each_thread()
+ * traversal.
*/
static int cpu_timer_sample_group(const clockid_t which_clock,
struct task_struct *p,
- union cpu_time_count *cpu)
+ unsigned long long *sample)
{
struct task_cputime cputime;
@@ -699,67 +561,95 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
default:
return -EINVAL;
case CPUCLOCK_PROF:
- cpu->cpu = cputime_add(cputime.utime, cputime.stime);
+ *sample = cputime_to_expires(cputime.utime + cputime.stime);
break;
case CPUCLOCK_VIRT:
- cpu->cpu = cputime.utime;
+ *sample = cputime_to_expires(cputime.utime);
break;
case CPUCLOCK_SCHED:
- cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
+ *sample = cputime.sum_exec_runtime + task_delta_exec(p);
break;
}
return 0;
}
+#ifdef CONFIG_NO_HZ_FULL
+static void nohz_kick_work_fn(struct work_struct *work)
+{
+ tick_nohz_full_kick_all();
+}
+
+static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
+
+/*
+ * We need the IPIs to be sent from sane process context.
+ * The posix cpu timers are always set with irqs disabled.
+ */
+static void posix_cpu_timer_kick_nohz(void)
+{
+ if (context_tracking_is_enabled())
+ schedule_work(&nohz_kick_work);
+}
+
+bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
+{
+ if (!task_cputime_zero(&tsk->cputime_expires))
+ return false;
+
+ if (tsk->signal->cputimer.running)
+ return false;
+
+ return true;
+}
+#else
+static inline void posix_cpu_timer_kick_nohz(void) { }
+#endif
+
/*
* Guts of sys_timer_settime for CPU timers.
* This is called with the timer locked and interrupts disabled.
* If we return TIMER_RETRY, it's necessary to release the timer's lock
* and try again. (This happens when the timer is in the middle of firing.)
*/
-int posix_cpu_timer_set(struct k_itimer *timer, int flags,
- struct itimerspec *new, struct itimerspec *old)
+static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
+ struct itimerspec *new, struct itimerspec *old)
{
+ unsigned long flags;
+ struct sighand_struct *sighand;
struct task_struct *p = timer->it.cpu.task;
- union cpu_time_count old_expires, new_expires, val;
+ unsigned long long old_expires, new_expires, old_incr, val;
int ret;
- if (unlikely(p == NULL)) {
- /*
- * Timer refers to a dead task's clock.
- */
- return -ESRCH;
- }
+ WARN_ON_ONCE(p == NULL);
new_expires = timespec_to_sample(timer->it_clock, &new->it_value);
- read_lock(&tasklist_lock);
/*
- * We need the tasklist_lock to protect against reaping that
- * clears p->signal. If p has just been reaped, we can no
+ * Protect against sighand release/switch in exit/exec and p->cpu_timers
+ * and p->signal->cpu_timers read/write in arm_timer()
+ */
+ sighand = lock_task_sighand(p, &flags);
+ /*
+ * If p has just been reaped, we can no
* longer get any information about it at all.
*/
- if (unlikely(p->signal == NULL)) {
- read_unlock(&tasklist_lock);
- put_task_struct(p);
- timer->it.cpu.task = NULL;
+ if (unlikely(sighand == NULL)) {
return -ESRCH;
}
/*
* Disarm any old timer after extracting its expiry time.
*/
- BUG_ON(!irqs_disabled());
+ WARN_ON_ONCE(!irqs_disabled());
ret = 0;
- spin_lock(&p->sighand->siglock);
+ old_incr = timer->it.cpu.incr;
old_expires = timer->it.cpu.expires;
if (unlikely(timer->it.cpu.firing)) {
timer->it.cpu.firing = -1;
ret = TIMER_RETRY;
} else
list_del_init(&timer->it.cpu.entry);
- spin_unlock(&p->sighand->siglock);
/*
* We need to sample the current value to convert the new
@@ -776,7 +666,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
}
if (old) {
- if (old_expires.sched == 0) {
+ if (old_expires == 0) {
old->it_value.tv_sec = 0;
old->it_value.tv_nsec = 0;
} else {
@@ -791,11 +681,8 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
* new setting.
*/
bump_cpu_timer(timer, val);
- if (cpu_time_before(timer->it_clock, val,
- timer->it.cpu.expires)) {
- old_expires = cpu_time_sub(
- timer->it_clock,
- timer->it.cpu.expires, val);
+ if (val < timer->it.cpu.expires) {
+ old_expires = timer->it.cpu.expires - val;
sample_to_timespec(timer->it_clock,
old_expires,
&old->it_value);
@@ -813,12 +700,12 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
* disable this firing since we are already reporting
* it as an overrun (thanks to bump_cpu_timer above).
*/
- read_unlock(&tasklist_lock);
+ unlock_task_sighand(p, &flags);
goto out;
}
- if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
- cpu_time_add(timer->it_clock, &new_expires, val);
+ if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
+ new_expires += val;
}
/*
@@ -827,14 +714,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
* arm the timer (we'll just fake it for timer_gettime).
*/
timer->it.cpu.expires = new_expires;
- if (new_expires.sched != 0 &&
- (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
- cpu_time_before(timer->it_clock, val, new_expires)) {
- arm_timer(timer, val);
+ if (new_expires != 0 && val < new_expires) {
+ arm_timer(timer);
}
- read_unlock(&tasklist_lock);
-
+ unlock_task_sighand(p, &flags);
/*
* Install the new reload setting, and
* set up the signal and overrun bookkeeping.
@@ -852,9 +736,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
timer->it_overrun_last = 0;
timer->it_overrun = -1;
- if (new_expires.sched != 0 &&
- (timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
- !cpu_time_before(timer->it_clock, val, new_expires)) {
+ if (new_expires != 0 && !(val < new_expires)) {
/*
* The designated time already passed, so we notify
* immediately, even if the thread never runs to
@@ -867,16 +749,19 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
out:
if (old) {
sample_to_timespec(timer->it_clock,
- timer->it.cpu.incr, &old->it_interval);
+ old_incr, &old->it_interval);
}
+ if (!ret)
+ posix_cpu_timer_kick_nohz();
return ret;
}
-void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
+static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
{
- union cpu_time_count now;
+ unsigned long long now;
struct task_struct *p = timer->it.cpu.task;
- int clear_dead;
+
+ WARN_ON_ONCE(p == NULL);
/*
* Easy part: convert the reload time.
@@ -884,82 +769,44 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
sample_to_timespec(timer->it_clock,
timer->it.cpu.incr, &itp->it_interval);
- if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all. */
+ if (timer->it.cpu.expires == 0) { /* Timer not armed at all. */
itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
return;
}
- if (unlikely(p == NULL)) {
- /*
- * This task already died and the timer will never fire.
- * In this case, expires is actually the dead value.
- */
- dead:
- sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
- &itp->it_value);
- return;
- }
-
/*
* Sample the clock to take the difference with the expiry time.
*/
if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
cpu_clock_sample(timer->it_clock, p, &now);
- clear_dead = p->exit_state;
} else {
- read_lock(&tasklist_lock);
- if (unlikely(p->signal == NULL)) {
+ struct sighand_struct *sighand;
+ unsigned long flags;
+
+ /*
+ * Protect against sighand release/switch in exit/exec and
+ * also make timer sampling safe if it ends up calling
+ * thread_group_cputime().
+ */
+ sighand = lock_task_sighand(p, &flags);
+ if (unlikely(sighand == NULL)) {
/*
* The process has been reaped.
* We can't even collect a sample any more.
* Call the timer disarmed, nothing else to do.
*/
- put_task_struct(p);
- timer->it.cpu.task = NULL;
- timer->it.cpu.expires.sched = 0;
- read_unlock(&tasklist_lock);
- goto dead;
+ timer->it.cpu.expires = 0;
+ sample_to_timespec(timer->it_clock, timer->it.cpu.expires,
+ &itp->it_value);
} else {
cpu_timer_sample_group(timer->it_clock, p, &now);
- clear_dead = (unlikely(p->exit_state) &&
- thread_group_empty(p));
+ unlock_task_sighand(p, &flags);
}
- read_unlock(&tasklist_lock);
- }
-
- if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
- if (timer->it.cpu.incr.sched == 0 &&
- cpu_time_before(timer->it_clock,
- timer->it.cpu.expires, now)) {
- /*
- * Do-nothing timer expired and has no reload,
- * so it's as if it was never set.
- */
- timer->it.cpu.expires.sched = 0;
- itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
- return;
- }
- /*
- * Account for any expirations and reloads that should
- * have happened.
- */
- bump_cpu_timer(timer, now);
- }
-
- if (unlikely(clear_dead)) {
- /*
- * We've noticed that the thread is dead, but
- * not yet reaped. Take this opportunity to
- * drop our task ref.
- */
- clear_dead_task(timer, now);
- goto dead;
}
- if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) {
+ if (now < timer->it.cpu.expires) {
sample_to_timespec(timer->it_clock,
- cpu_time_sub(timer->it_clock,
- timer->it.cpu.expires, now),
+ timer->it.cpu.expires - now,
&itp->it_value);
} else {
/*
@@ -971,6 +818,28 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
}
}
+static unsigned long long
+check_timers_list(struct list_head *timers,
+ struct list_head *firing,
+ unsigned long long curr)
+{
+ int maxfire = 20;
+
+ while (!list_empty(timers)) {
+ struct cpu_timer_list *t;
+
+ t = list_first_entry(timers, struct cpu_timer_list, entry);
+
+ if (!--maxfire || curr < t->expires)
+ return t->expires;
+
+ t->firing = 1;
+ list_move_tail(&t->entry, firing);
+ }
+
+ return 0;
+}
+
/*
* Check for any per-thread CPU timers that have fired and move them off
* the tsk->cpu_timers[N] list onto the firing list. Here we update the
@@ -979,60 +848,28 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
static void check_thread_timers(struct task_struct *tsk,
struct list_head *firing)
{
- int maxfire;
struct list_head *timers = tsk->cpu_timers;
struct signal_struct *const sig = tsk->signal;
+ struct task_cputime *tsk_expires = &tsk->cputime_expires;
+ unsigned long long expires;
+ unsigned long soft;
- maxfire = 20;
- tsk->cputime_expires.prof_exp = cputime_zero;
- while (!list_empty(timers)) {
- struct cpu_timer_list *t = list_first_entry(timers,
- struct cpu_timer_list,
- entry);
- if (!--maxfire || cputime_lt(prof_ticks(tsk), t->expires.cpu)) {
- tsk->cputime_expires.prof_exp = t->expires.cpu;
- break;
- }
- t->firing = 1;
- list_move_tail(&t->entry, firing);
- }
+ expires = check_timers_list(timers, firing, prof_ticks(tsk));
+ tsk_expires->prof_exp = expires_to_cputime(expires);
- ++timers;
- maxfire = 20;
- tsk->cputime_expires.virt_exp = cputime_zero;
- while (!list_empty(timers)) {
- struct cpu_timer_list *t = list_first_entry(timers,
- struct cpu_timer_list,
- entry);
- if (!--maxfire || cputime_lt(virt_ticks(tsk), t->expires.cpu)) {
- tsk->cputime_expires.virt_exp = t->expires.cpu;
- break;
- }
- t->firing = 1;
- list_move_tail(&t->entry, firing);
- }
+ expires = check_timers_list(++timers, firing, virt_ticks(tsk));
+ tsk_expires->virt_exp = expires_to_cputime(expires);
- ++timers;
- maxfire = 20;
- tsk->cputime_expires.sched_exp = 0;
- while (!list_empty(timers)) {
- struct cpu_timer_list *t = list_first_entry(timers,
- struct cpu_timer_list,
- entry);
- if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
- tsk->cputime_expires.sched_exp = t->expires.sched;
- break;
- }
- t->firing = 1;
- list_move_tail(&t->entry, firing);
- }
+ tsk_expires->sched_exp = check_timers_list(++timers, firing,
+ tsk->se.sum_exec_runtime);
/*
* Check for the special case thread timers.
*/
- if (sig->rlim[RLIMIT_RTTIME].rlim_cur != RLIM_INFINITY) {
- unsigned long hard = sig->rlim[RLIMIT_RTTIME].rlim_max;
- unsigned long *soft = &sig->rlim[RLIMIT_RTTIME].rlim_cur;
+ soft = ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur);
+ if (soft != RLIM_INFINITY) {
+ unsigned long hard =
+ ACCESS_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max);
if (hard != RLIM_INFINITY &&
tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
@@ -1043,14 +880,13 @@ static void check_thread_timers(struct task_struct *tsk,
__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
return;
}
- if (tsk->rt.timeout > DIV_ROUND_UP(*soft, USEC_PER_SEC/HZ)) {
+ if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
/*
* At the soft limit, send a SIGXCPU every second.
*/
- if (sig->rlim[RLIMIT_RTTIME].rlim_cur
- < sig->rlim[RLIMIT_RTTIME].rlim_max) {
- sig->rlim[RLIMIT_RTTIME].rlim_cur +=
- USEC_PER_SEC;
+ if (soft < hard) {
+ soft += USEC_PER_SEC;
+ sig->rlim[RLIMIT_RTTIME].rlim_cur = soft;
}
printk(KERN_INFO
"RT Watchdog Timeout: %s[%d]\n",
@@ -1060,38 +896,35 @@ static void check_thread_timers(struct task_struct *tsk,
}
}
-static void stop_process_timers(struct task_struct *tsk)
+static void stop_process_timers(struct signal_struct *sig)
{
- struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+ struct thread_group_cputimer *cputimer = &sig->cputimer;
unsigned long flags;
- if (!cputimer->running)
- return;
-
- spin_lock_irqsave(&cputimer->lock, flags);
+ raw_spin_lock_irqsave(&cputimer->lock, flags);
cputimer->running = 0;
- spin_unlock_irqrestore(&cputimer->lock, flags);
+ raw_spin_unlock_irqrestore(&cputimer->lock, flags);
}
static u32 onecputick;
static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
- cputime_t *expires, cputime_t cur_time, int signo)
+ unsigned long long *expires,
+ unsigned long long cur_time, int signo)
{
- if (cputime_eq(it->expires, cputime_zero))
+ if (!it->expires)
return;
- if (cputime_ge(cur_time, it->expires)) {
- if (!cputime_eq(it->incr, cputime_zero)) {
- it->expires = cputime_add(it->expires, it->incr);
+ if (cur_time >= it->expires) {
+ if (it->incr) {
+ it->expires += it->incr;
it->error += it->incr_error;
if (it->error >= onecputick) {
- it->expires = cputime_sub(it->expires,
- cputime_one_jiffy);
+ it->expires -= cputime_one_jiffy;
it->error -= onecputick;
}
} else {
- it->expires = cputime_zero;
+ it->expires = 0;
}
trace_itimer_expire(signo == SIGPROF ?
@@ -1100,9 +933,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
}
- if (!cputime_eq(it->expires, cputime_zero) &&
- (cputime_eq(*expires, cputime_zero) ||
- cputime_lt(it->expires, *expires))) {
+ if (it->expires && (!*expires || it->expires < *expires)) {
*expires = it->expires;
}
}
@@ -1115,76 +946,24 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
static void check_process_timers(struct task_struct *tsk,
struct list_head *firing)
{
- int maxfire;
struct signal_struct *const sig = tsk->signal;
- cputime_t utime, ptime, virt_expires, prof_expires;
+ unsigned long long utime, ptime, virt_expires, prof_expires;
unsigned long long sum_sched_runtime, sched_expires;
struct list_head *timers = sig->cpu_timers;
struct task_cputime cputime;
-
- /*
- * Don't sample the current process CPU clocks if there are no timers.
- */
- if (list_empty(&timers[CPUCLOCK_PROF]) &&
- cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) &&
- sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
- list_empty(&timers[CPUCLOCK_VIRT]) &&
- cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
- list_empty(&timers[CPUCLOCK_SCHED])) {
- stop_process_timers(tsk);
- return;
- }
+ unsigned long soft;
/*
* Collect the current process totals.
*/
thread_group_cputimer(tsk, &cputime);
- utime = cputime.utime;
- ptime = cputime_add(utime, cputime.stime);
+ utime = cputime_to_expires(cputime.utime);
+ ptime = utime + cputime_to_expires(cputime.stime);
sum_sched_runtime = cputime.sum_exec_runtime;
- maxfire = 20;
- prof_expires = cputime_zero;
- while (!list_empty(timers)) {
- struct cpu_timer_list *tl = list_first_entry(timers,
- struct cpu_timer_list,
- entry);
- if (!--maxfire || cputime_lt(ptime, tl->expires.cpu)) {
- prof_expires = tl->expires.cpu;
- break;
- }
- tl->firing = 1;
- list_move_tail(&tl->entry, firing);
- }
- ++timers;
- maxfire = 20;
- virt_expires = cputime_zero;
- while (!list_empty(timers)) {
- struct cpu_timer_list *tl = list_first_entry(timers,
- struct cpu_timer_list,
- entry);
- if (!--maxfire || cputime_lt(utime, tl->expires.cpu)) {
- virt_expires = tl->expires.cpu;
- break;
- }
- tl->firing = 1;
- list_move_tail(&tl->entry, firing);
- }
-
- ++timers;
- maxfire = 20;
- sched_expires = 0;
- while (!list_empty(timers)) {
- struct cpu_timer_list *tl = list_first_entry(timers,
- struct cpu_timer_list,
- entry);
- if (!--maxfire || sum_sched_runtime < tl->expires.sched) {
- sched_expires = tl->expires.sched;
- break;
- }
- tl->firing = 1;
- list_move_tail(&tl->entry, firing);
- }
+ prof_expires = check_timers_list(timers, firing, ptime);
+ virt_expires = check_timers_list(++timers, firing, utime);
+ sched_expires = check_timers_list(++timers, firing, sum_sched_runtime);
/*
* Check for the special case process timers.
@@ -1193,11 +972,13 @@ static void check_process_timers(struct task_struct *tsk,
SIGPROF);
check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
SIGVTALRM);
-
- if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
+ soft = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
+ if (soft != RLIM_INFINITY) {
unsigned long psecs = cputime_to_secs(ptime);
+ unsigned long hard =
+ ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);
cputime_t x;
- if (psecs >= sig->rlim[RLIMIT_CPU].rlim_max) {
+ if (psecs >= hard) {
/*
* At the hard limit, we just die.
* No need to calculate anything else now.
@@ -1205,35 +986,27 @@ static void check_process_timers(struct task_struct *tsk,
__group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
return;
}
- if (psecs >= sig->rlim[RLIMIT_CPU].rlim_cur) {
+ if (psecs >= soft) {
/*
* At the soft limit, send a SIGXCPU every second.
*/
__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
- if (sig->rlim[RLIMIT_CPU].rlim_cur
- < sig->rlim[RLIMIT_CPU].rlim_max) {
- sig->rlim[RLIMIT_CPU].rlim_cur++;
+ if (soft < hard) {
+ soft++;
+ sig->rlim[RLIMIT_CPU].rlim_cur = soft;
}
}
- x = secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
- if (cputime_eq(prof_expires, cputime_zero) ||
- cputime_lt(x, prof_expires)) {
+ x = secs_to_cputime(soft);
+ if (!prof_expires || x < prof_expires) {
prof_expires = x;
}
}
- if (!cputime_eq(prof_expires, cputime_zero) &&
- (cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) ||
- cputime_gt(sig->cputime_expires.prof_exp, prof_expires)))
- sig->cputime_expires.prof_exp = prof_expires;
- if (!cputime_eq(virt_expires, cputime_zero) &&
- (cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
- cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
- sig->cputime_expires.virt_exp = virt_expires;
- if (sched_expires != 0 &&
- (sig->cputime_expires.sched_exp == 0 ||
- sig->cputime_expires.sched_exp > sched_expires))
- sig->cputime_expires.sched_exp = sched_expires;
+ sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires);
+ sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires);
+ sig->cputime_expires.sched_exp = sched_expires;
+ if (task_cputime_zero(&sig->cputime_expires))
+ stop_process_timers(sig);
}
/*
@@ -1242,14 +1015,12 @@ static void check_process_timers(struct task_struct *tsk,
*/
void posix_cpu_timer_schedule(struct k_itimer *timer)
{
+ struct sighand_struct *sighand;
+ unsigned long flags;
struct task_struct *p = timer->it.cpu.task;
- union cpu_time_count now;
+ unsigned long long now;
- if (unlikely(p == NULL))
- /*
- * The task was cleaned up already, no future firings.
- */
- goto out;
+ WARN_ON_ONCE(p == NULL);
/*
* Fetch the current sample and update the timer's expiry time.
@@ -1257,44 +1028,45 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
cpu_clock_sample(timer->it_clock, p, &now);
bump_cpu_timer(timer, now);
- if (unlikely(p->exit_state)) {
- clear_dead_task(timer, now);
+ if (unlikely(p->exit_state))
+ goto out;
+
+ /* Protect timer list r/w in arm_timer() */
+ sighand = lock_task_sighand(p, &flags);
+ if (!sighand)
goto out;
- }
- read_lock(&tasklist_lock); /* arm_timer needs it. */
} else {
- read_lock(&tasklist_lock);
- if (unlikely(p->signal == NULL)) {
+ /*
+ * Protect arm_timer() and timer sampling in case of call to
+ * thread_group_cputime().
+ */
+ sighand = lock_task_sighand(p, &flags);
+ if (unlikely(sighand == NULL)) {
/*
* The process has been reaped.
* We can't even collect a sample any more.
*/
- put_task_struct(p);
- timer->it.cpu.task = p = NULL;
- timer->it.cpu.expires.sched = 0;
- goto out_unlock;
+ timer->it.cpu.expires = 0;
+ goto out;
} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
- /*
- * We've noticed that the thread is dead, but
- * not yet reaped. Take this opportunity to
- * drop our task ref.
- */
- clear_dead_task(timer, now);
- goto out_unlock;
+ unlock_task_sighand(p, &flags);
+ /* Optimizations: if the process is dying, no need to rearm */
+ goto out;
}
cpu_timer_sample_group(timer->it_clock, p, &now);
bump_cpu_timer(timer, now);
- /* Leave the tasklist_lock locked for the call below. */
+ /* Leave the sighand locked for the call below. */
}
/*
* Now re-arm for the new expiry time.
*/
- arm_timer(timer, now);
-
-out_unlock:
- read_unlock(&tasklist_lock);
+ WARN_ON_ONCE(!irqs_disabled());
+ arm_timer(timer);
+ unlock_task_sighand(p, &flags);
+ /* Kick full dynticks CPUs in case they need to tick on the new timer */
+ posix_cpu_timer_kick_nohz();
out:
timer->it_overrun_last = timer->it_overrun;
timer->it_overrun = -1;
@@ -1302,23 +1074,6 @@ out:
}
/**
- * task_cputime_zero - Check a task_cputime struct for all zero fields.
- *
- * @cputime: The struct to compare.
- *
- * Checks @cputime to see if all fields are zero. Returns true if all fields
- * are zero, false if any field is nonzero.
- */
-static inline int task_cputime_zero(const struct task_cputime *cputime)
-{
- if (cputime_eq(cputime->utime, cputime_zero) &&
- cputime_eq(cputime->stime, cputime_zero) &&
- cputime->sum_exec_runtime == 0)
- return 1;
- return 0;
-}
-
-/**
* task_cputime_expired - Compare two task_cputime entities.
*
* @sample: The task_cputime structure to be checked for expiration.
@@ -1331,12 +1086,9 @@ static inline int task_cputime_zero(const struct task_cputime *cputime)
static inline int task_cputime_expired(const struct task_cputime *sample,
const struct task_cputime *expires)
{
- if (!cputime_eq(expires->utime, cputime_zero) &&
- cputime_ge(sample->utime, expires->utime))
+ if (expires->utime && sample->utime >= expires->utime)
return 1;
- if (!cputime_eq(expires->stime, cputime_zero) &&
- cputime_ge(cputime_add(sample->utime, sample->stime),
- expires->stime))
+ if (expires->stime && sample->utime + sample->stime >= expires->stime)
return 1;
if (expires->sum_exec_runtime != 0 &&
sample->sum_exec_runtime >= expires->sum_exec_runtime)
@@ -1357,15 +1109,14 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
static inline int fastpath_timer_check(struct task_struct *tsk)
{
struct signal_struct *sig;
+ cputime_t utime, stime;
- /* tsk == current, ensure it is safe to use ->signal/sighand */
- if (unlikely(tsk->exit_state))
- return 0;
+ task_cputime(tsk, &utime, &stime);
if (!task_cputime_zero(&tsk->cputime_expires)) {
struct task_cputime task_sample = {
- .utime = tsk->utime,
- .stime = tsk->stime,
+ .utime = utime,
+ .stime = stime,
.sum_exec_runtime = tsk->se.sum_exec_runtime
};
@@ -1374,15 +1125,18 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
}
sig = tsk->signal;
- if (!task_cputime_zero(&sig->cputime_expires)) {
+ if (sig->cputimer.running) {
struct task_cputime group_sample;
- thread_group_cputimer(tsk, &group_sample);
+ raw_spin_lock(&sig->cputimer.lock);
+ group_sample = sig->cputimer.cputime;
+ raw_spin_unlock(&sig->cputimer.lock);
+
if (task_cputime_expired(&group_sample, &sig->cputime_expires))
return 1;
}
- return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY;
+ return 0;
}
/*
@@ -1394,8 +1148,9 @@ void run_posix_cpu_timers(struct task_struct *tsk)
{
LIST_HEAD(firing);
struct k_itimer *timer, *next;
+ unsigned long flags;
- BUG_ON(!irqs_disabled());
+ WARN_ON_ONCE(!irqs_disabled());
/*
* The fast path checks that there are no expired thread or thread
@@ -1404,14 +1159,20 @@ void run_posix_cpu_timers(struct task_struct *tsk)
if (!fastpath_timer_check(tsk))
return;
- spin_lock(&tsk->sighand->siglock);
+ if (!lock_task_sighand(tsk, &flags))
+ return;
/*
* Here we take off tsk->signal->cpu_timers[N] and
* tsk->cpu_timers[N] all the timers that are firing, and
* put them on the firing list.
*/
check_thread_timers(tsk, &firing);
- check_process_timers(tsk, &firing);
+ /*
+ * If there are any active process wide timers (POSIX 1.b, itimers,
+ * RLIMIT_CPU) cputimer must be running.
+ */
+ if (tsk->signal->cputimer.running)
+ check_process_timers(tsk, &firing);
/*
* We must release these locks before taking any timer's lock.
@@ -1421,11 +1182,11 @@ void run_posix_cpu_timers(struct task_struct *tsk)
* that gets the timer lock before we do will give it up and
* spin until we've taken care of that timer below.
*/
- spin_unlock(&tsk->sighand->siglock);
+ unlock_task_sighand(tsk, &flags);
/*
* Now that all the timers on our list have the firing flag,
- * noone will touch their list entries but us. We'll take
+ * no one will touch their list entries but us. We'll take
* each timer's lock before clearing its firing flag, so no
* timer call will interfere.
*/
@@ -1448,61 +1209,53 @@ void run_posix_cpu_timers(struct task_struct *tsk)
}
/*
- * Set one of the process-wide special case CPU timers.
+ * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
* The tsk->sighand->siglock must be held by the caller.
- * The *newval argument is relative and we update it to be absolute, *oldval
- * is absolute and we update it to be relative.
*/
void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval)
{
- union cpu_time_count now;
- struct list_head *head;
+ unsigned long long now;
- BUG_ON(clock_idx == CPUCLOCK_SCHED);
+ WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
cpu_timer_sample_group(clock_idx, tsk, &now);
if (oldval) {
- if (!cputime_eq(*oldval, cputime_zero)) {
- if (cputime_le(*oldval, now.cpu)) {
+ /*
+ * We are setting itimer. The *oldval is absolute and we update
+ * it to be relative, *newval argument is relative and we update
+ * it to be absolute.
+ */
+ if (*oldval) {
+ if (*oldval <= now) {
/* Just about to fire. */
*oldval = cputime_one_jiffy;
} else {
- *oldval = cputime_sub(*oldval, now.cpu);
+ *oldval -= now;
}
}
- if (cputime_eq(*newval, cputime_zero))
- return;
- *newval = cputime_add(*newval, now.cpu);
-
- /*
- * If the RLIMIT_CPU timer will expire before the
- * ITIMER_PROF timer, we have nothing else to do.
- */
- if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
- < cputime_to_secs(*newval))
- return;
+ if (!*newval)
+ goto out;
+ *newval += now;
}
/*
- * Check whether there are any process timers already set to fire
- * before this one. If so, we don't have anything more to do.
+ * Update expiration cache if we are the earliest timer, or eventually
+ * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
*/
- head = &tsk->signal->cpu_timers[clock_idx];
- if (list_empty(head) ||
- cputime_ge(list_first_entry(head,
- struct cpu_timer_list, entry)->expires.cpu,
- *newval)) {
- switch (clock_idx) {
- case CPUCLOCK_PROF:
+ switch (clock_idx) {
+ case CPUCLOCK_PROF:
+ if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
tsk->signal->cputime_expires.prof_exp = *newval;
- break;
- case CPUCLOCK_VIRT:
+ break;
+ case CPUCLOCK_VIRT:
+ if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
tsk->signal->cputime_expires.virt_exp = *newval;
- break;
- }
+ break;
}
+out:
+ posix_cpu_timer_kick_nohz();
}
static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
@@ -1534,10 +1287,12 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
}
while (!signal_pending(current)) {
- if (timer.it.cpu.expires.sched == 0) {
+ if (timer.it.cpu.expires == 0) {
/*
- * Our timer fired and was reset.
+ * Our timer fired and was reset, below
+ * deletion can not fail.
*/
+ posix_cpu_timer_del(&timer);
spin_unlock_irq(&timer.it_lock);
return 0;
}
@@ -1555,9 +1310,26 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
* We were interrupted by a signal.
*/
sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp);
- posix_cpu_timer_set(&timer, 0, &zero_it, it);
+ error = posix_cpu_timer_set(&timer, 0, &zero_it, it);
+ if (!error) {
+ /*
+ * Timer is now unarmed, deletion can not fail.
+ */
+ posix_cpu_timer_del(&timer);
+ }
spin_unlock_irq(&timer.it_lock);
+ while (error == TIMER_RETRY) {
+ /*
+ * We need to handle case when timer was or is in the
+ * middle of firing. In other cases we already freed
+ * resources.
+ */
+ spin_lock_irq(&timer.it_lock);
+ error = posix_cpu_timer_del(&timer);
+ spin_unlock_irq(&timer.it_lock);
+ }
+
if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) {
/*
* It actually did fire already.
@@ -1571,11 +1343,13 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
return error;
}
-int posix_cpu_nsleep(const clockid_t which_clock, int flags,
- struct timespec *rqtp, struct timespec __user *rmtp)
+static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
+
+static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
+ struct timespec *rqtp, struct timespec __user *rmtp)
{
struct restart_block *restart_block =
- &current_thread_info()->restart_block;
+ &current_thread_info()->restart_block;
struct itimerspec it;
int error;
@@ -1591,56 +1365,47 @@ int posix_cpu_nsleep(const clockid_t which_clock, int flags,
if (error == -ERESTART_RESTARTBLOCK) {
- if (flags & TIMER_ABSTIME)
+ if (flags & TIMER_ABSTIME)
return -ERESTARTNOHAND;
/*
- * Report back to the user the time still remaining.
- */
- if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
+ * Report back to the user the time still remaining.
+ */
+ if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
return -EFAULT;
restart_block->fn = posix_cpu_nsleep_restart;
- restart_block->arg0 = which_clock;
- restart_block->arg1 = (unsigned long) rmtp;
- restart_block->arg2 = rqtp->tv_sec;
- restart_block->arg3 = rqtp->tv_nsec;
+ restart_block->nanosleep.clockid = which_clock;
+ restart_block->nanosleep.rmtp = rmtp;
+ restart_block->nanosleep.expires = timespec_to_ns(rqtp);
}
return error;
}
-long posix_cpu_nsleep_restart(struct restart_block *restart_block)
+static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
{
- clockid_t which_clock = restart_block->arg0;
- struct timespec __user *rmtp;
+ clockid_t which_clock = restart_block->nanosleep.clockid;
struct timespec t;
struct itimerspec it;
int error;
- rmtp = (struct timespec __user *) restart_block->arg1;
- t.tv_sec = restart_block->arg2;
- t.tv_nsec = restart_block->arg3;
+ t = ns_to_timespec(restart_block->nanosleep.expires);
- restart_block->fn = do_no_restart_syscall;
error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it);
if (error == -ERESTART_RESTARTBLOCK) {
+ struct timespec __user *rmtp = restart_block->nanosleep.rmtp;
/*
- * Report back to the user the time still remaining.
- */
- if (rmtp != NULL && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
+ * Report back to the user the time still remaining.
+ */
+ if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp))
return -EFAULT;
- restart_block->fn = posix_cpu_nsleep_restart;
- restart_block->arg0 = which_clock;
- restart_block->arg1 = (unsigned long) rmtp;
- restart_block->arg2 = t.tv_sec;
- restart_block->arg3 = t.tv_nsec;
+ restart_block->nanosleep.expires = timespec_to_ns(&t);
}
return error;
}
-
#define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED)
#define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED)
@@ -1684,38 +1449,37 @@ static int thread_cpu_timer_create(struct k_itimer *timer)
timer->it_clock = THREAD_CLOCK;
return posix_cpu_timer_create(timer);
}
-static int thread_cpu_nsleep(const clockid_t which_clock, int flags,
- struct timespec *rqtp, struct timespec __user *rmtp)
-{
- return -EINVAL;
-}
-static long thread_cpu_nsleep_restart(struct restart_block *restart_block)
-{
- return -EINVAL;
-}
+
+struct k_clock clock_posix_cpu = {
+ .clock_getres = posix_cpu_clock_getres,
+ .clock_set = posix_cpu_clock_set,
+ .clock_get = posix_cpu_clock_get,
+ .timer_create = posix_cpu_timer_create,
+ .nsleep = posix_cpu_nsleep,
+ .nsleep_restart = posix_cpu_nsleep_restart,
+ .timer_set = posix_cpu_timer_set,
+ .timer_del = posix_cpu_timer_del,
+ .timer_get = posix_cpu_timer_get,
+};
static __init int init_posix_cpu_timers(void)
{
struct k_clock process = {
- .clock_getres = process_cpu_clock_getres,
- .clock_get = process_cpu_clock_get,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = process_cpu_timer_create,
- .nsleep = process_cpu_nsleep,
- .nsleep_restart = process_cpu_nsleep_restart,
+ .clock_getres = process_cpu_clock_getres,
+ .clock_get = process_cpu_clock_get,
+ .timer_create = process_cpu_timer_create,
+ .nsleep = process_cpu_nsleep,
+ .nsleep_restart = process_cpu_nsleep_restart,
};
struct k_clock thread = {
- .clock_getres = thread_cpu_clock_getres,
- .clock_get = thread_cpu_clock_get,
- .clock_set = do_posix_clock_nosettime,
- .timer_create = thread_cpu_timer_create,
- .nsleep = thread_cpu_nsleep,
- .nsleep_restart = thread_cpu_nsleep_restart,
+ .clock_getres = thread_cpu_clock_getres,
+ .clock_get = thread_cpu_clock_get,
+ .timer_create = thread_cpu_timer_create,
};
struct timespec ts;
- register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
- register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
+ posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
+ posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
cputime_to_timespec(cputime_one_jiffy, &ts);
onecputick = ts.tv_nsec;